2
0

Token.php 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Analysis
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /**
  22. * @category Zend
  23. * @package Zend_Search_Lucene
  24. * @subpackage Analysis
  25. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  26. * @license http://framework.zend.com/license/new-bsd New BSD License
  27. */
  28. class Zend_Search_Lucene_Analysis_Token
  29. {
  30. /**
  31. * The text of the term.
  32. *
  33. * @var string
  34. */
  35. private $_termText;
  36. /**
  37. * Start in source text.
  38. *
  39. * @var integer
  40. */
  41. private $_startOffset;
  42. /**
  43. * End in source text
  44. *
  45. * @var integer
  46. */
  47. private $_endOffset;
  48. /**
  49. * The position of this token relative to the previous Token.
  50. *
  51. * The default value is one.
  52. *
  53. * Some common uses for this are:
  54. * Set it to zero to put multiple terms in the same position. This is
  55. * useful if, e.g., a word has multiple stems. Searches for phrases
  56. * including either stem will match. In this case, all but the first stem's
  57. * increment should be set to zero: the increment of the first instance
  58. * should be one. Repeating a token with an increment of zero can also be
  59. * used to boost the scores of matches on that token.
  60. *
  61. * Set it to values greater than one to inhibit exact phrase matches.
  62. * If, for example, one does not want phrases to match across removed stop
  63. * words, then one could build a stop word filter that removes stop words and
  64. * also sets the increment to the number of stop words removed before each
  65. * non-stop word. Then exact phrase queries will only match when the terms
  66. * occur with no intervening stop words.
  67. *
  68. * @var integer
  69. */
  70. private $_positionIncrement;
  71. /**
  72. * Object constructor
  73. *
  74. * @param string $text
  75. * @param integer $start
  76. * @param integer $end
  77. * @param string $type
  78. */
  79. public function __construct($text, $start, $end)
  80. {
  81. $this->_termText = $text;
  82. $this->_startOffset = $start;
  83. $this->_endOffset = $end;
  84. $this->_positionIncrement = 1;
  85. }
  86. /**
  87. * positionIncrement setter
  88. *
  89. * @param integer $positionIncrement
  90. */
  91. public function setPositionIncrement($positionIncrement)
  92. {
  93. $this->_positionIncrement = $positionIncrement;
  94. }
  95. /**
  96. * Returns the position increment of this Token.
  97. *
  98. * @return integer
  99. */
  100. public function getPositionIncrement()
  101. {
  102. return $this->_positionIncrement;
  103. }
  104. /**
  105. * Returns the Token's term text.
  106. *
  107. * @return string
  108. */
  109. public function getTermText()
  110. {
  111. return $this->_termText;
  112. }
  113. /**
  114. * Returns this Token's starting offset, the position of the first character
  115. * corresponding to this token in the source text.
  116. *
  117. * Note:
  118. * The difference between getEndOffset() and getStartOffset() may not be equal
  119. * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
  120. * by a stemmer or some other filter.
  121. *
  122. * @return integer
  123. */
  124. public function getStartOffset()
  125. {
  126. return $this->_startOffset;
  127. }
  128. /**
  129. * Returns this Token's ending offset, one greater than the position of the
  130. * last character corresponding to this token in the source text.
  131. *
  132. * @return integer
  133. */
  134. public function getEndOffset()
  135. {
  136. return $this->_endOffset;
  137. }
  138. }