QueryLexer.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_FSM */
  22. require_once 'Zend/Search/Lucene/FSM.php';
  23. /** Zend_Search_Lucene_Search_QueryParser */
  24. require_once 'Zend/Search/Lucene/Search/QueryToken.php';
  25. /**
  26. * @category Zend
  27. * @package Zend_Search_Lucene
  28. * @subpackage Search
  29. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  30. * @license http://framework.zend.com/license/new-bsd New BSD License
  31. */
  32. class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
  33. {
  34. /** State Machine states */
  35. const ST_WHITE_SPACE = 0;
  36. const ST_SYNT_LEXEME = 1;
  37. const ST_LEXEME = 2;
  38. const ST_QUOTED_LEXEME = 3;
  39. const ST_ESCAPED_CHAR = 4;
  40. const ST_ESCAPED_QCHAR = 5;
  41. const ST_LEXEME_MODIFIER = 6;
  42. const ST_NUMBER = 7;
  43. const ST_MANTISSA = 8;
  44. const ST_ERROR = 9;
  45. /** Input symbols */
  46. const IN_WHITE_SPACE = 0;
  47. const IN_SYNT_CHAR = 1;
  48. const IN_LEXEME_MODIFIER = 2;
  49. const IN_ESCAPE_CHAR = 3;
  50. const IN_QUOTE = 4;
  51. const IN_DECIMAL_POINT = 5;
  52. const IN_ASCII_DIGIT = 6;
  53. const IN_CHAR = 7;
  54. const IN_MUTABLE_CHAR = 8;
  55. const QUERY_WHITE_SPACE_CHARS = " \n\r\t";
  56. const QUERY_SYNT_CHARS = ':()[]{}!|&';
  57. const QUERY_MUTABLE_CHARS = '+-';
  58. const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
  59. const QUERY_LEXEMEMODIFIER_CHARS = '~^';
  60. const QUERY_ASCIIDIGITS_CHARS = '0123456789';
  61. /**
  62. * List of recognized lexemes
  63. *
  64. * @var array
  65. */
  66. private $_lexemes;
  67. /**
  68. * Query string (array of single- or non single-byte characters)
  69. *
  70. * @var array
  71. */
  72. private $_queryString;
  73. /**
  74. * Current position within a query string
  75. * Used to create appropriate error messages
  76. *
  77. * @var integer
  78. */
  79. private $_queryStringPosition;
  80. /**
  81. * Recognized part of current lexeme
  82. *
  83. * @var string
  84. */
  85. private $_currentLexeme;
  86. public function __construct()
  87. {
  88. parent::__construct( array(self::ST_WHITE_SPACE,
  89. self::ST_SYNT_LEXEME,
  90. self::ST_LEXEME,
  91. self::ST_QUOTED_LEXEME,
  92. self::ST_ESCAPED_CHAR,
  93. self::ST_ESCAPED_QCHAR,
  94. self::ST_LEXEME_MODIFIER,
  95. self::ST_NUMBER,
  96. self::ST_MANTISSA,
  97. self::ST_ERROR),
  98. array(self::IN_WHITE_SPACE,
  99. self::IN_SYNT_CHAR,
  100. self::IN_MUTABLE_CHAR,
  101. self::IN_LEXEME_MODIFIER,
  102. self::IN_ESCAPE_CHAR,
  103. self::IN_QUOTE,
  104. self::IN_DECIMAL_POINT,
  105. self::IN_ASCII_DIGIT,
  106. self::IN_CHAR));
  107. $lexemeModifierErrorAction = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
  108. $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
  109. $wrongNumberErrorAction = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
  110. $this->addRules(array( array(self::ST_WHITE_SPACE, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  111. array(self::ST_WHITE_SPACE, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  112. array(self::ST_WHITE_SPACE, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  113. array(self::ST_WHITE_SPACE, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  114. array(self::ST_WHITE_SPACE, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  115. array(self::ST_WHITE_SPACE, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  116. array(self::ST_WHITE_SPACE, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  117. array(self::ST_WHITE_SPACE, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  118. array(self::ST_WHITE_SPACE, self::IN_CHAR, self::ST_LEXEME)
  119. ));
  120. $this->addRules(array( array(self::ST_SYNT_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  121. array(self::ST_SYNT_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  122. array(self::ST_SYNT_LEXEME, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  123. array(self::ST_SYNT_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  124. array(self::ST_SYNT_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  125. array(self::ST_SYNT_LEXEME, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  126. array(self::ST_SYNT_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  127. array(self::ST_SYNT_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  128. array(self::ST_SYNT_LEXEME, self::IN_CHAR, self::ST_LEXEME)
  129. ));
  130. $this->addRules(array( array(self::ST_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  131. array(self::ST_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  132. array(self::ST_LEXEME, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
  133. array(self::ST_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  134. array(self::ST_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  135. // IN_QUOTE not allowed
  136. array(self::ST_LEXEME, self::IN_QUOTE, self::ST_ERROR, $quoteWithinLexemeErrorAction),
  137. array(self::ST_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  138. array(self::ST_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  139. array(self::ST_LEXEME, self::IN_CHAR, self::ST_LEXEME)
  140. ));
  141. $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
  142. array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
  143. array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
  144. array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
  145. array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_QCHAR),
  146. array(self::ST_QUOTED_LEXEME, self::IN_QUOTE, self::ST_WHITE_SPACE),
  147. array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
  148. array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
  149. array(self::ST_QUOTED_LEXEME, self::IN_CHAR, self::ST_QUOTED_LEXEME)
  150. ));
  151. $this->addRules(array( array(self::ST_ESCAPED_CHAR, self::IN_WHITE_SPACE, self::ST_LEXEME),
  152. array(self::ST_ESCAPED_CHAR, self::IN_SYNT_CHAR, self::ST_LEXEME),
  153. array(self::ST_ESCAPED_CHAR, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
  154. array(self::ST_ESCAPED_CHAR, self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
  155. array(self::ST_ESCAPED_CHAR, self::IN_ESCAPE_CHAR, self::ST_LEXEME),
  156. array(self::ST_ESCAPED_CHAR, self::IN_QUOTE, self::ST_LEXEME),
  157. array(self::ST_ESCAPED_CHAR, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  158. array(self::ST_ESCAPED_CHAR, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  159. array(self::ST_ESCAPED_CHAR, self::IN_CHAR, self::ST_LEXEME)
  160. ));
  161. $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
  162. array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
  163. array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
  164. array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
  165. array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR, self::ST_QUOTED_LEXEME),
  166. array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  167. array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
  168. array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
  169. array(self::ST_ESCAPED_QCHAR, self::IN_CHAR, self::ST_QUOTED_LEXEME)
  170. ));
  171. $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  172. array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  173. array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  174. array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  175. // IN_ESCAPE_CHAR not allowed
  176. array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
  177. // IN_QUOTE not allowed
  178. array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE, self::ST_ERROR, $lexemeModifierErrorAction),
  179. array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
  180. array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
  181. // IN_CHAR not allowed
  182. array(self::ST_LEXEME_MODIFIER, self::IN_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
  183. ));
  184. $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  185. array(self::ST_NUMBER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  186. array(self::ST_NUMBER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  187. array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  188. // IN_ESCAPE_CHAR not allowed
  189. array(self::ST_NUMBER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  190. // IN_QUOTE not allowed
  191. array(self::ST_NUMBER, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
  192. array(self::ST_NUMBER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
  193. array(self::ST_NUMBER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
  194. // IN_CHAR not allowed
  195. array(self::ST_NUMBER, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  196. ));
  197. $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  198. array(self::ST_MANTISSA, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  199. array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  200. array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  201. // IN_ESCAPE_CHAR not allowed
  202. array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  203. // IN_QUOTE not allowed
  204. array(self::ST_MANTISSA, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
  205. // IN_DECIMAL_POINT not allowed
  206. array(self::ST_MANTISSA, self::IN_DECIMAL_POINT, self::ST_ERROR, $wrongNumberErrorAction),
  207. array(self::ST_MANTISSA, self::IN_ASCII_DIGIT, self::ST_MANTISSA),
  208. // IN_CHAR not allowed
  209. array(self::ST_MANTISSA, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  210. ));
  211. /** Actions */
  212. $syntaxLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
  213. $lexemeModifierAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
  214. $addLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
  215. $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
  216. $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
  217. $addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
  218. /** Syntax lexeme */
  219. $this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
  220. // Two lexemes in succession
  221. $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
  222. /** Lexeme */
  223. $this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
  224. $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
  225. // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
  226. $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
  227. $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
  228. $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
  229. $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
  230. $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
  231. $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
  232. /** Quoted lexeme */
  233. // We don't need entry action (skeep quote)
  234. $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
  235. $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
  236. // Closing quote changes state to the ST_WHITE_SPACE other states are not used
  237. $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
  238. /** Lexeme modifier */
  239. $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
  240. /** Number */
  241. $this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
  242. $this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
  243. $this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
  244. // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
  245. $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
  246. $this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
  247. $this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
  248. $this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
  249. $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
  250. $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
  251. $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
  252. }
  253. /**
  254. * Translate input char to an input symbol of state machine
  255. *
  256. * @param string $char
  257. * @return integer
  258. */
  259. private function _translateInput($char)
  260. {
  261. if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
  262. } else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
  263. } else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
  264. } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
  265. } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
  266. } else if ($char === '"' ) { return self::IN_QUOTE;
  267. } else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
  268. } else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
  269. } else { return self::IN_CHAR;
  270. }
  271. }
  272. /**
  273. * This method is used to tokenize query string into lexemes
  274. *
  275. * @param string $inputString
  276. * @param string $encoding
  277. * @return array
  278. * @throws Zend_Search_Lucene_Search_QueryParserException
  279. */
  280. public function tokenize($inputString, $encoding)
  281. {
  282. $this->reset();
  283. $this->_lexemes = array();
  284. $this->_queryString = array();
  285. if (PHP_OS == 'AIX' && $encoding == '') {
  286. $encoding = 'ISO8859-1';
  287. }
  288. $strLength = iconv_strlen($inputString, $encoding);
  289. // Workaround for iconv_substr bug
  290. $inputString .= ' ';
  291. for ($count = 0; $count < $strLength; $count++) {
  292. $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
  293. }
  294. for ($this->_queryStringPosition = 0;
  295. $this->_queryStringPosition < count($this->_queryString);
  296. $this->_queryStringPosition++) {
  297. $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
  298. }
  299. $this->process(self::IN_WHITE_SPACE);
  300. if ($this->getState() != self::ST_WHITE_SPACE) {
  301. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  302. throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
  303. }
  304. $this->_queryString = null;
  305. return $this->_lexemes;
  306. }
  307. /*********************************************************************
  308. * Actions implementation
  309. *
  310. * Actions affect on recognized lexemes list
  311. *********************************************************************/
  312. /**
  313. * Add query syntax lexeme
  314. *
  315. * @throws Zend_Search_Lucene_Search_QueryParserException
  316. */
  317. public function addQuerySyntaxLexeme()
  318. {
  319. $lexeme = $this->_queryString[$this->_queryStringPosition];
  320. // Process two char lexemes
  321. if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
  322. // increase current position in a query string
  323. $this->_queryStringPosition++;
  324. // check,
  325. if ($this->_queryStringPosition == count($this->_queryString) ||
  326. $this->_queryString[$this->_queryStringPosition] != $lexeme) {
  327. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  328. throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
  329. }
  330. // duplicate character
  331. $lexeme .= $lexeme;
  332. }
  333. $token = new Zend_Search_Lucene_Search_QueryToken(
  334. Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
  335. $lexeme,
  336. $this->_queryStringPosition);
  337. // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
  338. if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
  339. $token = array_pop($this->_lexemes);
  340. if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
  341. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  342. throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
  343. }
  344. $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
  345. }
  346. $this->_lexemes[] = $token;
  347. }
  348. /**
  349. * Add lexeme modifier
  350. */
  351. public function addLexemeModifier()
  352. {
  353. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  354. Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
  355. $this->_queryString[$this->_queryStringPosition],
  356. $this->_queryStringPosition);
  357. }
  358. /**
  359. * Add lexeme
  360. */
  361. public function addLexeme()
  362. {
  363. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  364. Zend_Search_Lucene_Search_QueryToken::TC_WORD,
  365. $this->_currentLexeme,
  366. $this->_queryStringPosition - 1);
  367. $this->_currentLexeme = '';
  368. }
  369. /**
  370. * Add quoted lexeme
  371. */
  372. public function addQuotedLexeme()
  373. {
  374. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  375. Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
  376. $this->_currentLexeme,
  377. $this->_queryStringPosition);
  378. $this->_currentLexeme = '';
  379. }
  380. /**
  381. * Add number lexeme
  382. */
  383. public function addNumberLexeme()
  384. {
  385. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  386. Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
  387. $this->_currentLexeme,
  388. $this->_queryStringPosition - 1);
  389. $this->_currentLexeme = '';
  390. }
  391. /**
  392. * Extend lexeme by one char
  393. */
  394. public function addLexemeChar()
  395. {
  396. $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
  397. }
  398. /**
  399. * Position message
  400. *
  401. * @return string
  402. */
  403. private function _positionMsg()
  404. {
  405. return 'Position is ' . $this->_queryStringPosition . '.';
  406. }
  407. /*********************************************************************
  408. * Syntax errors actions
  409. *********************************************************************/
  410. public function lexModifierErrException()
  411. {
  412. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  413. throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
  414. }
  415. public function quoteWithinLexemeErrException()
  416. {
  417. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  418. throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
  419. }
  420. public function wrongNumberErrException()
  421. {
  422. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  423. throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
  424. }
  425. }