QueryLexer.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_FSM */
  23. require_once 'Zend/Search/Lucene/FSM.php';
  24. /** Zend_Search_Lucene_Search_QueryParser */
  25. require_once 'Zend/Search/Lucene/Search/QueryToken.php';
  26. /**
  27. * @category Zend
  28. * @package Zend_Search_Lucene
  29. * @subpackage Search
  30. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
  34. {
  35. /** State Machine states */
  36. const ST_WHITE_SPACE = 0;
  37. const ST_SYNT_LEXEME = 1;
  38. const ST_LEXEME = 2;
  39. const ST_QUOTED_LEXEME = 3;
  40. const ST_ESCAPED_CHAR = 4;
  41. const ST_ESCAPED_QCHAR = 5;
  42. const ST_LEXEME_MODIFIER = 6;
  43. const ST_NUMBER = 7;
  44. const ST_MANTISSA = 8;
  45. const ST_ERROR = 9;
  46. /** Input symbols */
  47. const IN_WHITE_SPACE = 0;
  48. const IN_SYNT_CHAR = 1;
  49. const IN_LEXEME_MODIFIER = 2;
  50. const IN_ESCAPE_CHAR = 3;
  51. const IN_QUOTE = 4;
  52. const IN_DECIMAL_POINT = 5;
  53. const IN_ASCII_DIGIT = 6;
  54. const IN_CHAR = 7;
  55. const IN_MUTABLE_CHAR = 8;
  56. const QUERY_WHITE_SPACE_CHARS = " \n\r\t";
  57. const QUERY_SYNT_CHARS = ':()[]{}!|&';
  58. const QUERY_MUTABLE_CHARS = '+-';
  59. const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
  60. const QUERY_LEXEMEMODIFIER_CHARS = '~^';
  61. const QUERY_ASCIIDIGITS_CHARS = '0123456789';
  62. /**
  63. * List of recognized lexemes
  64. *
  65. * @var array
  66. */
  67. private $_lexemes;
  68. /**
  69. * Query string (array of single- or non single-byte characters)
  70. *
  71. * @var array
  72. */
  73. private $_queryString;
  74. /**
  75. * Current position within a query string
  76. * Used to create appropriate error messages
  77. *
  78. * @var integer
  79. */
  80. private $_queryStringPosition;
  81. /**
  82. * Recognized part of current lexeme
  83. *
  84. * @var string
  85. */
  86. private $_currentLexeme;
  87. public function __construct()
  88. {
  89. parent::__construct( array(self::ST_WHITE_SPACE,
  90. self::ST_SYNT_LEXEME,
  91. self::ST_LEXEME,
  92. self::ST_QUOTED_LEXEME,
  93. self::ST_ESCAPED_CHAR,
  94. self::ST_ESCAPED_QCHAR,
  95. self::ST_LEXEME_MODIFIER,
  96. self::ST_NUMBER,
  97. self::ST_MANTISSA,
  98. self::ST_ERROR),
  99. array(self::IN_WHITE_SPACE,
  100. self::IN_SYNT_CHAR,
  101. self::IN_MUTABLE_CHAR,
  102. self::IN_LEXEME_MODIFIER,
  103. self::IN_ESCAPE_CHAR,
  104. self::IN_QUOTE,
  105. self::IN_DECIMAL_POINT,
  106. self::IN_ASCII_DIGIT,
  107. self::IN_CHAR));
  108. $lexemeModifierErrorAction = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
  109. $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
  110. $wrongNumberErrorAction = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
  111. $this->addRules(array( array(self::ST_WHITE_SPACE, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  112. array(self::ST_WHITE_SPACE, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  113. array(self::ST_WHITE_SPACE, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  114. array(self::ST_WHITE_SPACE, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  115. array(self::ST_WHITE_SPACE, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  116. array(self::ST_WHITE_SPACE, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  117. array(self::ST_WHITE_SPACE, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  118. array(self::ST_WHITE_SPACE, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  119. array(self::ST_WHITE_SPACE, self::IN_CHAR, self::ST_LEXEME)
  120. ));
  121. $this->addRules(array( array(self::ST_SYNT_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  122. array(self::ST_SYNT_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  123. array(self::ST_SYNT_LEXEME, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  124. array(self::ST_SYNT_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  125. array(self::ST_SYNT_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  126. array(self::ST_SYNT_LEXEME, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  127. array(self::ST_SYNT_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  128. array(self::ST_SYNT_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  129. array(self::ST_SYNT_LEXEME, self::IN_CHAR, self::ST_LEXEME)
  130. ));
  131. $this->addRules(array( array(self::ST_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  132. array(self::ST_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  133. array(self::ST_LEXEME, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
  134. array(self::ST_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  135. array(self::ST_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  136. // IN_QUOTE not allowed
  137. array(self::ST_LEXEME, self::IN_QUOTE, self::ST_ERROR, $quoteWithinLexemeErrorAction),
  138. array(self::ST_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  139. array(self::ST_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  140. array(self::ST_LEXEME, self::IN_CHAR, self::ST_LEXEME)
  141. ));
  142. $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
  143. array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
  144. array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
  145. array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
  146. array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_QCHAR),
  147. array(self::ST_QUOTED_LEXEME, self::IN_QUOTE, self::ST_WHITE_SPACE),
  148. array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
  149. array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
  150. array(self::ST_QUOTED_LEXEME, self::IN_CHAR, self::ST_QUOTED_LEXEME)
  151. ));
  152. $this->addRules(array( array(self::ST_ESCAPED_CHAR, self::IN_WHITE_SPACE, self::ST_LEXEME),
  153. array(self::ST_ESCAPED_CHAR, self::IN_SYNT_CHAR, self::ST_LEXEME),
  154. array(self::ST_ESCAPED_CHAR, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
  155. array(self::ST_ESCAPED_CHAR, self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
  156. array(self::ST_ESCAPED_CHAR, self::IN_ESCAPE_CHAR, self::ST_LEXEME),
  157. array(self::ST_ESCAPED_CHAR, self::IN_QUOTE, self::ST_LEXEME),
  158. array(self::ST_ESCAPED_CHAR, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  159. array(self::ST_ESCAPED_CHAR, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  160. array(self::ST_ESCAPED_CHAR, self::IN_CHAR, self::ST_LEXEME)
  161. ));
  162. $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
  163. array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
  164. array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
  165. array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
  166. array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR, self::ST_QUOTED_LEXEME),
  167. array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  168. array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
  169. array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
  170. array(self::ST_ESCAPED_QCHAR, self::IN_CHAR, self::ST_QUOTED_LEXEME)
  171. ));
  172. $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  173. array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  174. array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  175. array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  176. // IN_ESCAPE_CHAR not allowed
  177. array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
  178. // IN_QUOTE not allowed
  179. array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE, self::ST_ERROR, $lexemeModifierErrorAction),
  180. array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
  181. array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
  182. // IN_CHAR not allowed
  183. array(self::ST_LEXEME_MODIFIER, self::IN_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
  184. ));
  185. $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  186. array(self::ST_NUMBER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  187. array(self::ST_NUMBER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  188. array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  189. // IN_ESCAPE_CHAR not allowed
  190. array(self::ST_NUMBER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  191. // IN_QUOTE not allowed
  192. array(self::ST_NUMBER, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
  193. array(self::ST_NUMBER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
  194. array(self::ST_NUMBER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
  195. // IN_CHAR not allowed
  196. array(self::ST_NUMBER, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  197. ));
  198. $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  199. array(self::ST_MANTISSA, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  200. array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  201. array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  202. // IN_ESCAPE_CHAR not allowed
  203. array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  204. // IN_QUOTE not allowed
  205. array(self::ST_MANTISSA, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
  206. // IN_DECIMAL_POINT not allowed
  207. array(self::ST_MANTISSA, self::IN_DECIMAL_POINT, self::ST_ERROR, $wrongNumberErrorAction),
  208. array(self::ST_MANTISSA, self::IN_ASCII_DIGIT, self::ST_MANTISSA),
  209. // IN_CHAR not allowed
  210. array(self::ST_MANTISSA, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  211. ));
  212. /** Actions */
  213. $syntaxLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
  214. $lexemeModifierAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
  215. $addLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
  216. $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
  217. $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
  218. $addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
  219. /** Syntax lexeme */
  220. $this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
  221. // Two lexemes in succession
  222. $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
  223. /** Lexeme */
  224. $this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
  225. $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
  226. // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
  227. $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
  228. $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
  229. $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
  230. $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
  231. $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
  232. $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
  233. /** Quoted lexeme */
  234. // We don't need entry action (skeep quote)
  235. $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
  236. $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
  237. // Closing quote changes state to the ST_WHITE_SPACE other states are not used
  238. $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
  239. /** Lexeme modifier */
  240. $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
  241. /** Number */
  242. $this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
  243. $this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
  244. $this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
  245. // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
  246. $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
  247. $this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
  248. $this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
  249. $this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
  250. $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
  251. $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
  252. $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
  253. }
  254. /**
  255. * Translate input char to an input symbol of state machine
  256. *
  257. * @param string $char
  258. * @return integer
  259. */
  260. private function _translateInput($char)
  261. {
  262. if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
  263. } else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
  264. } else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
  265. } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
  266. } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
  267. } else if ($char === '"' ) { return self::IN_QUOTE;
  268. } else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
  269. } else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
  270. } else { return self::IN_CHAR;
  271. }
  272. }
  273. /**
  274. * This method is used to tokenize query string into lexemes
  275. *
  276. * @param string $inputString
  277. * @param string $encoding
  278. * @return array
  279. * @throws Zend_Search_Lucene_Search_QueryParserException
  280. */
  281. public function tokenize($inputString, $encoding)
  282. {
  283. $this->reset();
  284. $this->_lexemes = array();
  285. $this->_queryString = array();
  286. if (PHP_OS == 'AIX' && $encoding == '') {
  287. $encoding = 'ISO8859-1';
  288. }
  289. $strLength = iconv_strlen($inputString, $encoding);
  290. // Workaround for iconv_substr bug
  291. $inputString .= ' ';
  292. for ($count = 0; $count < $strLength; $count++) {
  293. $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
  294. }
  295. for ($this->_queryStringPosition = 0;
  296. $this->_queryStringPosition < count($this->_queryString);
  297. $this->_queryStringPosition++) {
  298. $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
  299. }
  300. $this->process(self::IN_WHITE_SPACE);
  301. if ($this->getState() != self::ST_WHITE_SPACE) {
  302. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  303. throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
  304. }
  305. $this->_queryString = null;
  306. return $this->_lexemes;
  307. }
  308. /*********************************************************************
  309. * Actions implementation
  310. *
  311. * Actions affect on recognized lexemes list
  312. *********************************************************************/
  313. /**
  314. * Add query syntax lexeme
  315. *
  316. * @throws Zend_Search_Lucene_Search_QueryParserException
  317. */
  318. public function addQuerySyntaxLexeme()
  319. {
  320. $lexeme = $this->_queryString[$this->_queryStringPosition];
  321. // Process two char lexemes
  322. if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
  323. // increase current position in a query string
  324. $this->_queryStringPosition++;
  325. // check,
  326. if ($this->_queryStringPosition == count($this->_queryString) ||
  327. $this->_queryString[$this->_queryStringPosition] != $lexeme) {
  328. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  329. throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
  330. }
  331. // duplicate character
  332. $lexeme .= $lexeme;
  333. }
  334. $token = new Zend_Search_Lucene_Search_QueryToken(
  335. Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
  336. $lexeme,
  337. $this->_queryStringPosition);
  338. // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
  339. if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
  340. $token = array_pop($this->_lexemes);
  341. if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
  342. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  343. throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
  344. }
  345. $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
  346. }
  347. $this->_lexemes[] = $token;
  348. }
  349. /**
  350. * Add lexeme modifier
  351. */
  352. public function addLexemeModifier()
  353. {
  354. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  355. Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
  356. $this->_queryString[$this->_queryStringPosition],
  357. $this->_queryStringPosition);
  358. }
  359. /**
  360. * Add lexeme
  361. */
  362. public function addLexeme()
  363. {
  364. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  365. Zend_Search_Lucene_Search_QueryToken::TC_WORD,
  366. $this->_currentLexeme,
  367. $this->_queryStringPosition - 1);
  368. $this->_currentLexeme = '';
  369. }
  370. /**
  371. * Add quoted lexeme
  372. */
  373. public function addQuotedLexeme()
  374. {
  375. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  376. Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
  377. $this->_currentLexeme,
  378. $this->_queryStringPosition);
  379. $this->_currentLexeme = '';
  380. }
  381. /**
  382. * Add number lexeme
  383. */
  384. public function addNumberLexeme()
  385. {
  386. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  387. Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
  388. $this->_currentLexeme,
  389. $this->_queryStringPosition - 1);
  390. $this->_currentLexeme = '';
  391. }
  392. /**
  393. * Extend lexeme by one char
  394. */
  395. public function addLexemeChar()
  396. {
  397. $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
  398. }
  399. /**
  400. * Position message
  401. *
  402. * @return string
  403. */
  404. private function _positionMsg()
  405. {
  406. return 'Position is ' . $this->_queryStringPosition . '.';
  407. }
  408. /*********************************************************************
  409. * Syntax errors actions
  410. *********************************************************************/
  411. public function lexModifierErrException()
  412. {
  413. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  414. throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
  415. }
  416. public function quoteWithinLexemeErrException()
  417. {
  418. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  419. throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
  420. }
  421. public function wrongNumberErrException()
  422. {
  423. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  424. throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
  425. }
  426. }