QueryParserContext.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_FSM */
  23. require_once 'Zend/Search/Lucene/FSM.php';
  24. /** Zend_Search_Lucene_Index_Term */
  25. require_once 'Zend/Search/Lucene/Index/Term.php';
  26. /** Zend_Search_Lucene_Search_QueryToken */
  27. require_once 'Zend/Search/Lucene/Search/QueryToken.php';
  28. /** Zend_Search_Lucene_Search_Query_Term */
  29. require_once 'Zend/Search/Lucene/Search/Query/Term.php';
  30. /** Zend_Search_Lucene_Search_Query_MultiTerm */
  31. require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
  32. /** Zend_Search_Lucene_Search_Query_Boolean */
  33. require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
  34. /** Zend_Search_Lucene_Search_Query_Phrase */
  35. require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
  36. /** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
  37. require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
  38. /** Zend_Search_Lucene_Search_QueryEntry */
  39. require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
  40. /**
  41. * @category Zend
  42. * @package Zend_Search_Lucene
  43. * @subpackage Search
  44. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  45. * @license http://framework.zend.com/license/new-bsd New BSD License
  46. */
  47. class Zend_Search_Lucene_Search_QueryParserContext
  48. {
  49. /**
  50. * Default field for the context.
  51. *
  52. * null means, that term should be searched through all fields
  53. * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
  54. *
  55. * @var string|null
  56. */
  57. private $_defaultField;
  58. /**
  59. * Field specified for next entry
  60. *
  61. * @var string
  62. */
  63. private $_nextEntryField = null;
  64. /**
  65. * True means, that term is required.
  66. * False means, that term is prohibited.
  67. * null means, that term is neither prohibited, nor required
  68. *
  69. * @var boolean
  70. */
  71. private $_nextEntrySign = null;
  72. /**
  73. * Entries grouping mode
  74. */
  75. const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
  76. const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)'
  77. /**
  78. * Grouping mode
  79. *
  80. * @var integer
  81. */
  82. private $_mode = null;
  83. /**
  84. * Entries signs.
  85. * Used in GM_SIGNS grouping mode
  86. *
  87. * @var arrays
  88. */
  89. private $_signs = array();
  90. /**
  91. * Query entries
  92. * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
  93. * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
  94. *
  95. * @var array
  96. */
  97. private $_entries = array();
  98. /**
  99. * Query string encoding
  100. *
  101. * @var string
  102. */
  103. private $_encoding;
  104. /**
  105. * Context object constructor
  106. *
  107. * @param string $encoding
  108. * @param string|null $defaultField
  109. */
  110. public function __construct($encoding, $defaultField = null)
  111. {
  112. $this->_encoding = $encoding;
  113. $this->_defaultField = $defaultField;
  114. }
  115. /**
  116. * Get context default field
  117. *
  118. * @return string|null
  119. */
  120. public function getField()
  121. {
  122. return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField;
  123. }
  124. /**
  125. * Set field for next entry
  126. *
  127. * @param string $field
  128. */
  129. public function setNextEntryField($field)
  130. {
  131. $this->_nextEntryField = $field;
  132. }
  133. /**
  134. * Set sign for next entry
  135. *
  136. * @param integer $sign
  137. * @throws Zend_Search_Lucene_Exception
  138. */
  139. public function setNextEntrySign($sign)
  140. {
  141. if ($this->_mode === self::GM_BOOLEAN) {
  142. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  143. throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
  144. }
  145. $this->_mode = self::GM_SIGNS;
  146. if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
  147. $this->_nextEntrySign = true;
  148. } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
  149. $this->_nextEntrySign = false;
  150. } else {
  151. require_once 'Zend/Search/Lucene/Exception.php';
  152. throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
  153. }
  154. }
  155. /**
  156. * Add entry to a query
  157. *
  158. * @param Zend_Search_Lucene_Search_QueryEntry $entry
  159. */
  160. public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
  161. {
  162. if ($this->_mode !== self::GM_BOOLEAN) {
  163. $this->_signs[] = $this->_nextEntrySign;
  164. }
  165. $this->_entries[] = $entry;
  166. $this->_nextEntryField = null;
  167. $this->_nextEntrySign = null;
  168. }
  169. /**
  170. * Process fuzzy search or proximity search modifier
  171. *
  172. * @throws Zend_Search_Lucene_Search_QueryParserException
  173. */
  174. public function processFuzzyProximityModifier($parameter = null)
  175. {
  176. // Check, that modifier has came just after word or phrase
  177. if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
  178. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  179. throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
  180. }
  181. $lastEntry = array_pop($this->_entries);
  182. if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
  183. // there are no entries or last entry is boolean operator
  184. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  185. throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
  186. }
  187. $lastEntry->processFuzzyProximityModifier($parameter);
  188. $this->_entries[] = $lastEntry;
  189. }
  190. /**
  191. * Set boost factor to the entry
  192. *
  193. * @param float $boostFactor
  194. */
  195. public function boost($boostFactor)
  196. {
  197. // Check, that modifier has came just after word or phrase
  198. if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
  199. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  200. throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
  201. }
  202. $lastEntry = array_pop($this->_entries);
  203. if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
  204. // there are no entries or last entry is boolean operator
  205. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  206. throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
  207. }
  208. $lastEntry->boost($boostFactor);
  209. $this->_entries[] = $lastEntry;
  210. }
  211. /**
  212. * Process logical operator
  213. *
  214. * @param integer $operator
  215. */
  216. public function addLogicalOperator($operator)
  217. {
  218. if ($this->_mode === self::GM_SIGNS) {
  219. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  220. throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
  221. }
  222. $this->_mode = self::GM_BOOLEAN;
  223. $this->_entries[] = $operator;
  224. }
  225. /**
  226. * Generate 'signs style' query from the context
  227. * '+term1 term2 -term3 +(<subquery1>) ...'
  228. *
  229. * @return Zend_Search_Lucene_Search_Query
  230. */
  231. public function _signStyleExpressionQuery()
  232. {
  233. $query = new Zend_Search_Lucene_Search_Query_Boolean();
  234. if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
  235. $defaultSign = true; // required
  236. } else {
  237. // Zend_Search_Lucene_Search_QueryParser::B_OR
  238. $defaultSign = null; // optional
  239. }
  240. foreach ($this->_entries as $entryId => $entry) {
  241. $sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign;
  242. $query->addSubquery($entry->getQuery($this->_encoding), $sign);
  243. }
  244. return $query;
  245. }
  246. /**
  247. * Generate 'boolean style' query from the context
  248. * 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)'
  249. *
  250. * @return Zend_Search_Lucene_Search_Query
  251. * @throws Zend_Search_Lucene
  252. */
  253. private function _booleanExpressionQuery()
  254. {
  255. /**
  256. * We treat each level of an expression as a boolean expression in
  257. * a Disjunctive Normal Form
  258. *
  259. * AND operator has higher precedence than OR
  260. *
  261. * Thus logical query is a disjunction of one or more conjunctions of
  262. * one or more query entries
  263. */
  264. $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
  265. require_once 'Zend/Search/Lucene/Exception.php';
  266. try {
  267. foreach ($this->_entries as $entry) {
  268. if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
  269. $expressionRecognizer->processLiteral($entry);
  270. } else {
  271. switch ($entry) {
  272. case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
  273. $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
  274. break;
  275. case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
  276. $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
  277. break;
  278. case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
  279. $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
  280. break;
  281. default:
  282. throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
  283. }
  284. }
  285. }
  286. $conjuctions = $expressionRecognizer->finishExpression();
  287. } catch (Zend_Search_Exception $e) {
  288. // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
  289. // $e->getMessage() . '\'.' );
  290. // It's query syntax error message and it should be user friendly. So FSM message is omitted
  291. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  292. throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
  293. }
  294. // Remove 'only negative' conjunctions
  295. foreach ($conjuctions as $conjuctionId => $conjuction) {
  296. $nonNegativeEntryFound = false;
  297. foreach ($conjuction as $conjuctionEntry) {
  298. if ($conjuctionEntry[1]) {
  299. $nonNegativeEntryFound = true;
  300. break;
  301. }
  302. }
  303. if (!$nonNegativeEntryFound) {
  304. unset($conjuctions[$conjuctionId]);
  305. }
  306. }
  307. $subqueries = array();
  308. foreach ($conjuctions as $conjuction) {
  309. // Check, if it's a one term conjuction
  310. if (count($conjuction) == 1) {
  311. $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
  312. } else {
  313. $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
  314. foreach ($conjuction as $conjuctionEntry) {
  315. $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
  316. }
  317. $subqueries[] = $subquery;
  318. }
  319. }
  320. if (count($subqueries) == 0) {
  321. return new Zend_Search_Lucene_Search_Query_Insignificant();
  322. }
  323. if (count($subqueries) == 1) {
  324. return $subqueries[0];
  325. }
  326. $query = new Zend_Search_Lucene_Search_Query_Boolean();
  327. foreach ($subqueries as $subquery) {
  328. // Non-requirered entry/subquery
  329. $query->addSubquery($subquery);
  330. }
  331. return $query;
  332. }
  333. /**
  334. * Generate query from current context
  335. *
  336. * @return Zend_Search_Lucene_Search_Query
  337. */
  338. public function getQuery()
  339. {
  340. if ($this->_mode === self::GM_BOOLEAN) {
  341. return $this->_booleanExpressionQuery();
  342. } else {
  343. return $this->_signStyleExpressionQuery();
  344. }
  345. }
  346. }