Text.php 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Analysis
  18. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Analysis_Analyzer_Common */
  23. require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
  24. /**
  25. * @category Zend
  26. * @package Zend_Search_Lucene
  27. * @subpackage Analysis
  28. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  29. * @license http://framework.zend.com/license/new-bsd New BSD License
  30. */
  31. class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
  32. {
  33. /**
  34. * Current position in a stream
  35. *
  36. * @var integer
  37. */
  38. private $_position;
  39. /**
  40. * Reset token stream
  41. */
  42. public function reset()
  43. {
  44. $this->_position = 0;
  45. if ($this->_input === null) {
  46. return;
  47. }
  48. // convert input into ascii
  49. if (PHP_OS != 'AIX') {
  50. $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
  51. }
  52. $this->_encoding = 'ASCII';
  53. }
  54. /**
  55. * Tokenization stream API
  56. * Get next token
  57. * Returns null at the end of stream
  58. *
  59. * @return Zend_Search_Lucene_Analysis_Token|null
  60. */
  61. public function nextToken()
  62. {
  63. if ($this->_input === null) {
  64. return null;
  65. }
  66. do {
  67. if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
  68. // It covers both cases a) there are no matches (preg_match(...) === 0)
  69. // b) error occured (preg_match(...) === FALSE)
  70. return null;
  71. }
  72. $str = $match[0][0];
  73. $pos = $match[0][1];
  74. $endpos = $pos + strlen($str);
  75. $this->_position = $endpos;
  76. $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos));
  77. } while ($token === null); // try again if token is skipped
  78. return $token;
  79. }
  80. }