Text.php 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Analysis
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Analysis_Analyzer_Common */
  22. require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
  23. /**
  24. * @category Zend
  25. * @package Zend_Search_Lucene
  26. * @subpackage Analysis
  27. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  28. * @license http://framework.zend.com/license/new-bsd New BSD License
  29. */
  30. class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
  31. {
  32. /**
  33. * Current position in a stream
  34. *
  35. * @var integer
  36. */
  37. private $_position;
  38. /**
  39. * Reset token stream
  40. */
  41. public function reset()
  42. {
  43. $this->_position = 0;
  44. if ($this->_input === null) {
  45. return;
  46. }
  47. // convert input into ascii
  48. if (PHP_OS != 'AIX') {
  49. $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
  50. }
  51. $this->_encoding = 'ASCII';
  52. }
  53. /**
  54. * Tokenization stream API
  55. * Get next token
  56. * Returns null at the end of stream
  57. *
  58. * @return Zend_Search_Lucene_Analysis_Token|null
  59. */
  60. public function nextToken()
  61. {
  62. if ($this->_input === null) {
  63. return null;
  64. }
  65. do {
  66. if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
  67. // It covers both cases a) there are no matches (preg_match(...) === 0)
  68. // b) error occured (preg_match(...) === FALSE)
  69. return null;
  70. }
  71. $str = $match[0][0];
  72. $pos = $match[0][1];
  73. $endpos = $pos + strlen($str);
  74. $this->_position = $endpos;
  75. $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos));
  76. } while ($token === null); // try again if token is skipped
  77. return $token;
  78. }
  79. }