2
0

Query.php 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * @category Zend
  24. * @package Zend_Search_Lucene
  25. * @subpackage Search
  26. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  27. * @license http://framework.zend.com/license/new-bsd New BSD License
  28. */
  29. abstract class Zend_Search_Lucene_Search_Query
  30. {
  31. /**
  32. * query boost factor
  33. *
  34. * @var float
  35. */
  36. private $_boost = 1;
  37. /**
  38. * Query weight
  39. *
  40. * @var Zend_Search_Lucene_Search_Weight
  41. */
  42. protected $_weight = null;
  43. /**
  44. * Current highlight color
  45. *
  46. * @var integer
  47. */
  48. private $_currentColorIndex = 0;
  49. /**
  50. * Gets the boost for this clause. Documents matching
  51. * this clause will (in addition to the normal weightings) have their score
  52. * multiplied by boost. The boost is 1.0 by default.
  53. *
  54. * @return float
  55. */
  56. public function getBoost()
  57. {
  58. return $this->_boost;
  59. }
  60. /**
  61. * Sets the boost for this query clause to $boost.
  62. *
  63. * @param float $boost
  64. */
  65. public function setBoost($boost)
  66. {
  67. $this->_boost = $boost;
  68. }
  69. /**
  70. * Score specified document
  71. *
  72. * @param integer $docId
  73. * @param Zend_Search_Lucene_Interface $reader
  74. * @return float
  75. */
  76. abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
  77. /**
  78. * Get document ids likely matching the query
  79. *
  80. * It's an array with document ids as keys (performance considerations)
  81. *
  82. * @return array
  83. */
  84. abstract public function matchedDocs();
  85. /**
  86. * Execute query in context of index reader
  87. * It also initializes necessary internal structures
  88. *
  89. * Query specific implementation
  90. *
  91. * @param Zend_Search_Lucene_Interface $reader
  92. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  93. */
  94. abstract public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null);
  95. /**
  96. * Constructs an appropriate Weight implementation for this query.
  97. *
  98. * @param Zend_Search_Lucene_Interface $reader
  99. * @return Zend_Search_Lucene_Search_Weight
  100. */
  101. abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
  102. /**
  103. * Constructs an initializes a Weight for a _top-level_query_.
  104. *
  105. * @param Zend_Search_Lucene_Interface $reader
  106. */
  107. protected function _initWeight(Zend_Search_Lucene_Interface $reader)
  108. {
  109. // Check, that it's a top-level query and query weight is not initialized yet.
  110. if ($this->_weight !== null) {
  111. return $this->_weight;
  112. }
  113. $this->createWeight($reader);
  114. $sum = $this->_weight->sumOfSquaredWeights();
  115. $queryNorm = $reader->getSimilarity()->queryNorm($sum);
  116. $this->_weight->normalize($queryNorm);
  117. }
  118. /**
  119. * Re-write query into primitive queries in the context of specified index
  120. *
  121. * @param Zend_Search_Lucene_Interface $index
  122. * @return Zend_Search_Lucene_Search_Query
  123. */
  124. abstract public function rewrite(Zend_Search_Lucene_Interface $index);
  125. /**
  126. * Optimize query in the context of specified index
  127. *
  128. * @param Zend_Search_Lucene_Interface $index
  129. * @return Zend_Search_Lucene_Search_Query
  130. */
  131. abstract public function optimize(Zend_Search_Lucene_Interface $index);
  132. /**
  133. * Reset query, so it can be reused within other queries or
  134. * with other indeces
  135. */
  136. public function reset()
  137. {
  138. $this->_weight = null;
  139. }
  140. /**
  141. * Print a query
  142. *
  143. * @return string
  144. */
  145. abstract public function __toString();
  146. /**
  147. * Return query terms
  148. *
  149. * @return array
  150. */
  151. abstract public function getQueryTerms();
  152. /**
  153. * Query specific matches highlighting
  154. *
  155. * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
  156. */
  157. abstract protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter);
  158. /**
  159. * Highlight matches in $inputHTML
  160. *
  161. * @param string $inputHTML
  162. * @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
  163. * @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
  164. * @return string
  165. */
  166. public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
  167. {
  168. if ($highlighter === null) {
  169. require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
  170. $highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
  171. }
  172. /** Zend_Search_Lucene_Document_Html */
  173. require_once 'Zend/Search/Lucene/Document/Html.php';
  174. $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML, false, $defaultEncoding);
  175. $highlighter->setDocument($doc);
  176. $this->_highlightMatches($highlighter);
  177. return $doc->getHTML();
  178. }
  179. /**
  180. * Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
  181. *
  182. * @param string $inputHtmlFragment
  183. * @param string $encoding Input HTML string encoding
  184. * @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
  185. * @return string
  186. */
  187. public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
  188. {
  189. if ($highlighter === null) {
  190. require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
  191. $highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
  192. }
  193. $inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
  194. . iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
  195. /** Zend_Search_Lucene_Document_Html */
  196. require_once 'Zend/Search/Lucene/Document/Html.php';
  197. $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
  198. $highlighter->setDocument($doc);
  199. $this->_highlightMatches($highlighter);
  200. return $doc->getHtmlBody();
  201. }
  202. }