QueryToken.php 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /**
  22. * @category Zend
  23. * @package Zend_Search_Lucene
  24. * @subpackage Search
  25. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  26. * @license http://framework.zend.com/license/new-bsd New BSD License
  27. */
  28. class Zend_Search_Lucene_Search_QueryToken
  29. {
  30. /**
  31. * Token types.
  32. */
  33. const TT_WORD = 0; // Word
  34. const TT_PHRASE = 1; // Phrase (one or several quoted words)
  35. const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
  36. const TT_FIELD_INDICATOR = 3; // ':'
  37. const TT_REQUIRED = 4; // '+'
  38. const TT_PROHIBITED = 5; // '-'
  39. const TT_FUZZY_PROX_MARK = 6; // '~'
  40. const TT_BOOSTING_MARK = 7; // '^'
  41. const TT_RANGE_INCL_START = 8; // '['
  42. const TT_RANGE_INCL_END = 9; // ']'
  43. const TT_RANGE_EXCL_START = 10; // '{'
  44. const TT_RANGE_EXCL_END = 11; // '}'
  45. const TT_SUBQUERY_START = 12; // '('
  46. const TT_SUBQUERY_END = 13; // ')'
  47. const TT_AND_LEXEME = 14; // 'AND' or 'and'
  48. const TT_OR_LEXEME = 15; // 'OR' or 'or'
  49. const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
  50. const TT_TO_LEXEME = 17; // 'TO' or 'to'
  51. const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
  52. /**
  53. * Returns all possible lexeme types.
  54. * It's used for syntax analyzer state machine initialization
  55. *
  56. * @return array
  57. */
  58. public static function getTypes()
  59. {
  60. return array( self::TT_WORD,
  61. self::TT_PHRASE,
  62. self::TT_FIELD,
  63. self::TT_FIELD_INDICATOR,
  64. self::TT_REQUIRED,
  65. self::TT_PROHIBITED,
  66. self::TT_FUZZY_PROX_MARK,
  67. self::TT_BOOSTING_MARK,
  68. self::TT_RANGE_INCL_START,
  69. self::TT_RANGE_INCL_END,
  70. self::TT_RANGE_EXCL_START,
  71. self::TT_RANGE_EXCL_END,
  72. self::TT_SUBQUERY_START,
  73. self::TT_SUBQUERY_END,
  74. self::TT_AND_LEXEME,
  75. self::TT_OR_LEXEME,
  76. self::TT_NOT_LEXEME,
  77. self::TT_TO_LEXEME,
  78. self::TT_NUMBER
  79. );
  80. }
  81. /**
  82. * TokenCategories
  83. */
  84. const TC_WORD = 0; // Word
  85. const TC_PHRASE = 1; // Phrase (one or several quoted words)
  86. const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
  87. const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
  88. /**
  89. * Token type.
  90. *
  91. * @var integer
  92. */
  93. public $type;
  94. /**
  95. * Token text.
  96. *
  97. * @var integer
  98. */
  99. public $text;
  100. /**
  101. * Token position within query.
  102. *
  103. * @var integer
  104. */
  105. public $position;
  106. /**
  107. * IndexReader constructor needs token type and token text as a parameters.
  108. *
  109. * @param integer $tokenCategory
  110. * @param string $tokText
  111. * @param integer $position
  112. */
  113. public function __construct($tokenCategory, $tokenText, $position)
  114. {
  115. $this->text = $tokenText;
  116. $this->position = $position + 1; // Start from 1
  117. switch ($tokenCategory) {
  118. case self::TC_WORD:
  119. if ( strtolower($tokenText) == 'and') {
  120. $this->type = self::TT_AND_LEXEME;
  121. } else if (strtolower($tokenText) == 'or') {
  122. $this->type = self::TT_OR_LEXEME;
  123. } else if (strtolower($tokenText) == 'not') {
  124. $this->type = self::TT_NOT_LEXEME;
  125. } else if (strtolower($tokenText) == 'to') {
  126. $this->type = self::TT_TO_LEXEME;
  127. } else {
  128. $this->type = self::TT_WORD;
  129. }
  130. break;
  131. case self::TC_PHRASE:
  132. $this->type = self::TT_PHRASE;
  133. break;
  134. case self::TC_NUMBER:
  135. $this->type = self::TT_NUMBER;
  136. break;
  137. case self::TC_SYNTAX_ELEMENT:
  138. switch ($tokenText) {
  139. case ':':
  140. $this->type = self::TT_FIELD_INDICATOR;
  141. break;
  142. case '+':
  143. $this->type = self::TT_REQUIRED;
  144. break;
  145. case '-':
  146. $this->type = self::TT_PROHIBITED;
  147. break;
  148. case '~':
  149. $this->type = self::TT_FUZZY_PROX_MARK;
  150. break;
  151. case '^':
  152. $this->type = self::TT_BOOSTING_MARK;
  153. break;
  154. case '[':
  155. $this->type = self::TT_RANGE_INCL_START;
  156. break;
  157. case ']':
  158. $this->type = self::TT_RANGE_INCL_END;
  159. break;
  160. case '{':
  161. $this->type = self::TT_RANGE_EXCL_START;
  162. break;
  163. case '}':
  164. $this->type = self::TT_RANGE_EXCL_END;
  165. break;
  166. case '(':
  167. $this->type = self::TT_SUBQUERY_START;
  168. break;
  169. case ')':
  170. $this->type = self::TT_SUBQUERY_END;
  171. break;
  172. case '!':
  173. $this->type = self::TT_NOT_LEXEME;
  174. break;
  175. case '&&':
  176. $this->type = self::TT_AND_LEXEME;
  177. break;
  178. case '||':
  179. $this->type = self::TT_OR_LEXEME;
  180. break;
  181. default:
  182. require_once 'Zend/Search/Lucene/Exception.php';
  183. throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
  184. }
  185. break;
  186. case self::TC_NUMBER:
  187. $this->type = self::TT_NUMBER;
  188. default:
  189. require_once 'Zend/Search/Lucene/Exception.php';
  190. throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
  191. }
  192. }
  193. }