BooleanExpressionRecognizer.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_FSM */
  23. require_once 'Zend/Search/Lucene/FSM.php';
  24. /** Zend_Search_Lucene_Search_QueryToken */
  25. require_once 'Zend/Search/Lucene/Search/QueryToken.php';
  26. /** Zend_Search_Lucene_Search_QueryParser */
  27. require_once 'Zend/Search/Lucene/Search/QueryParser.php';
  28. /**
  29. * @category Zend
  30. * @package Zend_Search_Lucene
  31. * @subpackage Search
  32. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  33. * @license http://framework.zend.com/license/new-bsd New BSD License
  34. */
  35. class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_Lucene_FSM
  36. {
  37. /** State Machine states */
  38. const ST_START = 0;
  39. const ST_LITERAL = 1;
  40. const ST_NOT_OPERATOR = 2;
  41. const ST_AND_OPERATOR = 3;
  42. const ST_OR_OPERATOR = 4;
  43. /** Input symbols */
  44. const IN_LITERAL = 0;
  45. const IN_NOT_OPERATOR = 1;
  46. const IN_AND_OPERATOR = 2;
  47. const IN_OR_OPERATOR = 3;
  48. /**
  49. * NOT operator signal
  50. *
  51. * @var boolean
  52. */
  53. private $_negativeLiteral = false;
  54. /**
  55. * Current literal
  56. *
  57. * @var mixed
  58. */
  59. private $_literal;
  60. /**
  61. * Set of boolean query conjunctions
  62. *
  63. * Each conjunction is an array of conjunction elements
  64. * Each conjunction element is presented with two-elements array:
  65. * array(<literal>, <is_negative>)
  66. *
  67. * So, it has a structure:
  68. * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
  69. * array(<literal>, <is_negative>), // second literal of first conjuction
  70. * ...
  71. * array(<literal>, <is_negative>)
  72. * ), // end of first conjuction
  73. * array( array(<literal>, <is_negative>), // first literal of second conjuction
  74. * array(<literal>, <is_negative>), // second literal of second conjuction
  75. * ...
  76. * array(<literal>, <is_negative>)
  77. * ), // end of second conjuction
  78. * ...
  79. * ) // end of structure
  80. *
  81. * @var array
  82. */
  83. private $_conjunctions = array();
  84. /**
  85. * Current conjuction
  86. *
  87. * @var array
  88. */
  89. private $_currentConjunction = array();
  90. /**
  91. * Object constructor
  92. */
  93. public function __construct()
  94. {
  95. parent::__construct( array(self::ST_START,
  96. self::ST_LITERAL,
  97. self::ST_NOT_OPERATOR,
  98. self::ST_AND_OPERATOR,
  99. self::ST_OR_OPERATOR),
  100. array(self::IN_LITERAL,
  101. self::IN_NOT_OPERATOR,
  102. self::IN_AND_OPERATOR,
  103. self::IN_OR_OPERATOR));
  104. $emptyOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyOperatorAction');
  105. $emptyNotOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyNotOperatorAction');
  106. $this->addRules(array( array(self::ST_START, self::IN_LITERAL, self::ST_LITERAL),
  107. array(self::ST_START, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
  108. array(self::ST_LITERAL, self::IN_AND_OPERATOR, self::ST_AND_OPERATOR),
  109. array(self::ST_LITERAL, self::IN_OR_OPERATOR, self::ST_OR_OPERATOR),
  110. array(self::ST_LITERAL, self::IN_LITERAL, self::ST_LITERAL, $emptyOperatorAction),
  111. array(self::ST_LITERAL, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR, $emptyNotOperatorAction),
  112. array(self::ST_NOT_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
  113. array(self::ST_AND_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
  114. array(self::ST_AND_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
  115. array(self::ST_OR_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
  116. array(self::ST_OR_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
  117. ));
  118. $notOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'notOperatorAction');
  119. $orOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'orOperatorAction');
  120. $literalAction = new Zend_Search_Lucene_FSMAction($this, 'literalAction');
  121. $this->addEntryAction(self::ST_NOT_OPERATOR, $notOperatorAction);
  122. $this->addEntryAction(self::ST_OR_OPERATOR, $orOperatorAction);
  123. $this->addEntryAction(self::ST_LITERAL, $literalAction);
  124. }
  125. /**
  126. * Process next operator.
  127. *
  128. * Operators are defined by class constants: IN_AND_OPERATOR, IN_OR_OPERATOR and IN_NOT_OPERATOR
  129. *
  130. * @param integer $operator
  131. */
  132. public function processOperator($operator)
  133. {
  134. $this->process($operator);
  135. }
  136. /**
  137. * Process expression literal.
  138. *
  139. * @param integer $operator
  140. */
  141. public function processLiteral($literal)
  142. {
  143. $this->_literal = $literal;
  144. $this->process(self::IN_LITERAL);
  145. }
  146. /**
  147. * Finish an expression and return result
  148. *
  149. * Result is a set of boolean query conjunctions
  150. *
  151. * Each conjunction is an array of conjunction elements
  152. * Each conjunction element is presented with two-elements array:
  153. * array(<literal>, <is_negative>)
  154. *
  155. * So, it has a structure:
  156. * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
  157. * array(<literal>, <is_negative>), // second literal of first conjuction
  158. * ...
  159. * array(<literal>, <is_negative>)
  160. * ), // end of first conjuction
  161. * array( array(<literal>, <is_negative>), // first literal of second conjuction
  162. * array(<literal>, <is_negative>), // second literal of second conjuction
  163. * ...
  164. * array(<literal>, <is_negative>)
  165. * ), // end of second conjuction
  166. * ...
  167. * ) // end of structure
  168. *
  169. * @return array
  170. * @throws Zend_Search_Lucene_Exception
  171. */
  172. public function finishExpression()
  173. {
  174. if ($this->getState() != self::ST_LITERAL) {
  175. require_once 'Zend/Search/Lucene/Exception.php';
  176. throw new Zend_Search_Lucene_Exception('Literal expected.');
  177. }
  178. $this->_conjunctions[] = $this->_currentConjunction;
  179. return $this->_conjunctions;
  180. }
  181. /*********************************************************************
  182. * Actions implementation
  183. *********************************************************************/
  184. /**
  185. * default (omitted) operator processing
  186. */
  187. public function emptyOperatorAction()
  188. {
  189. if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
  190. // Do nothing
  191. } else {
  192. $this->orOperatorAction();
  193. }
  194. // Process literal
  195. $this->literalAction();
  196. }
  197. /**
  198. * default (omitted) + NOT operator processing
  199. */
  200. public function emptyNotOperatorAction()
  201. {
  202. if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
  203. // Do nothing
  204. } else {
  205. $this->orOperatorAction();
  206. }
  207. // Process NOT operator
  208. $this->notOperatorAction();
  209. }
  210. /**
  211. * NOT operator processing
  212. */
  213. public function notOperatorAction()
  214. {
  215. $this->_negativeLiteral = true;
  216. }
  217. /**
  218. * OR operator processing
  219. * Close current conjunction
  220. */
  221. public function orOperatorAction()
  222. {
  223. $this->_conjunctions[] = $this->_currentConjunction;
  224. $this->_currentConjunction = array();
  225. }
  226. /**
  227. * Literal processing
  228. */
  229. public function literalAction()
  230. {
  231. // Add literal to the current conjunction
  232. $this->_currentConjunction[] = array($this->_literal, !$this->_negativeLiteral);
  233. // Switch off negative signal
  234. $this->_negativeLiteral = false;
  235. }
  236. }