2
0

Token.php 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Analysis
  18. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * @category Zend
  24. * @package Zend_Search_Lucene
  25. * @subpackage Analysis
  26. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  27. * @license http://framework.zend.com/license/new-bsd New BSD License
  28. */
  29. class Zend_Search_Lucene_Analysis_Token
  30. {
  31. /**
  32. * The text of the term.
  33. *
  34. * @var string
  35. */
  36. private $_termText;
  37. /**
  38. * Start in source text.
  39. *
  40. * @var integer
  41. */
  42. private $_startOffset;
  43. /**
  44. * End in source text
  45. *
  46. * @var integer
  47. */
  48. private $_endOffset;
  49. /**
  50. * The position of this token relative to the previous Token.
  51. *
  52. * The default value is one.
  53. *
  54. * Some common uses for this are:
  55. * Set it to zero to put multiple terms in the same position. This is
  56. * useful if, e.g., a word has multiple stems. Searches for phrases
  57. * including either stem will match. In this case, all but the first stem's
  58. * increment should be set to zero: the increment of the first instance
  59. * should be one. Repeating a token with an increment of zero can also be
  60. * used to boost the scores of matches on that token.
  61. *
  62. * Set it to values greater than one to inhibit exact phrase matches.
  63. * If, for example, one does not want phrases to match across removed stop
  64. * words, then one could build a stop word filter that removes stop words and
  65. * also sets the increment to the number of stop words removed before each
  66. * non-stop word. Then exact phrase queries will only match when the terms
  67. * occur with no intervening stop words.
  68. *
  69. * @var integer
  70. */
  71. private $_positionIncrement;
  72. /**
  73. * Object constructor
  74. *
  75. * @param string $text
  76. * @param integer $start
  77. * @param integer $end
  78. * @param string $type
  79. */
  80. public function __construct($text, $start, $end)
  81. {
  82. $this->_termText = $text;
  83. $this->_startOffset = $start;
  84. $this->_endOffset = $end;
  85. $this->_positionIncrement = 1;
  86. }
  87. /**
  88. * positionIncrement setter
  89. *
  90. * @param integer $positionIncrement
  91. */
  92. public function setPositionIncrement($positionIncrement)
  93. {
  94. $this->_positionIncrement = $positionIncrement;
  95. }
  96. /**
  97. * Returns the position increment of this Token.
  98. *
  99. * @return integer
  100. */
  101. public function getPositionIncrement()
  102. {
  103. return $this->_positionIncrement;
  104. }
  105. /**
  106. * Returns the Token's term text.
  107. *
  108. * @return string
  109. */
  110. public function getTermText()
  111. {
  112. return $this->_termText;
  113. }
  114. /**
  115. * Sets the Token's term text.
  116. *
  117. * @param string $text
  118. * @return this
  119. */
  120. public function setTermText($text)
  121. {
  122. $this->_termText = $text;
  123. return $this;
  124. }
  125. /**
  126. * Returns this Token's starting offset, the position of the first character
  127. * corresponding to this token in the source text.
  128. *
  129. * Note:
  130. * The difference between getEndOffset() and getStartOffset() may not be equal
  131. * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
  132. * by a stemmer or some other filter.
  133. *
  134. * @return integer
  135. */
  136. public function getStartOffset()
  137. {
  138. return $this->_startOffset;
  139. }
  140. /**
  141. * Returns this Token's ending offset, one greater than the position of the
  142. * last character corresponding to this token in the source text.
  143. *
  144. * @return integer
  145. */
  146. public function getEndOffset()
  147. {
  148. return $this->_endOffset;
  149. }
  150. }