Query.php 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Document_Html */
  22. require_once 'Zend/Search/Lucene/Document/Html.php';
  23. /** Zend_Search_Lucene_Index_DocsFilter */
  24. require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
  25. /** Zend_Search_Lucene_Search_Highlighter_Default */
  26. require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
  27. /**
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Search
  31. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. abstract class Zend_Search_Lucene_Search_Query
  35. {
  36. /**
  37. * query boost factor
  38. *
  39. * @var float
  40. */
  41. private $_boost = 1;
  42. /**
  43. * Query weight
  44. *
  45. * @var Zend_Search_Lucene_Search_Weight
  46. */
  47. protected $_weight = null;
  48. /**
  49. * Current highlight color
  50. *
  51. * @var integer
  52. */
  53. private $_currentColorIndex = 0;
  54. /**
  55. * Gets the boost for this clause. Documents matching
  56. * this clause will (in addition to the normal weightings) have their score
  57. * multiplied by boost. The boost is 1.0 by default.
  58. *
  59. * @return float
  60. */
  61. public function getBoost()
  62. {
  63. return $this->_boost;
  64. }
  65. /**
  66. * Sets the boost for this query clause to $boost.
  67. *
  68. * @param float $boost
  69. */
  70. public function setBoost($boost)
  71. {
  72. $this->_boost = $boost;
  73. }
  74. /**
  75. * Score specified document
  76. *
  77. * @param integer $docId
  78. * @param Zend_Search_Lucene_Interface $reader
  79. * @return float
  80. */
  81. abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
  82. /**
  83. * Get document ids likely matching the query
  84. *
  85. * It's an array with document ids as keys (performance considerations)
  86. *
  87. * @return array
  88. */
  89. abstract public function matchedDocs();
  90. /**
  91. * Execute query in context of index reader
  92. * It also initializes necessary internal structures
  93. *
  94. * Query specific implementation
  95. *
  96. * @param Zend_Search_Lucene_Interface $reader
  97. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  98. */
  99. abstract public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null);
  100. /**
  101. * Constructs an appropriate Weight implementation for this query.
  102. *
  103. * @param Zend_Search_Lucene_Interface $reader
  104. * @return Zend_Search_Lucene_Search_Weight
  105. */
  106. abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
  107. /**
  108. * Constructs an initializes a Weight for a _top-level_query_.
  109. *
  110. * @param Zend_Search_Lucene_Interface $reader
  111. */
  112. protected function _initWeight(Zend_Search_Lucene_Interface $reader)
  113. {
  114. // Check, that it's a top-level query and query weight is not initialized yet.
  115. if ($this->_weight !== null) {
  116. return $this->_weight;
  117. }
  118. $this->createWeight($reader);
  119. $sum = $this->_weight->sumOfSquaredWeights();
  120. $queryNorm = $reader->getSimilarity()->queryNorm($sum);
  121. $this->_weight->normalize($queryNorm);
  122. }
  123. /**
  124. * Re-write query into primitive queries in the context of specified index
  125. *
  126. * @param Zend_Search_Lucene_Interface $index
  127. * @return Zend_Search_Lucene_Search_Query
  128. */
  129. abstract public function rewrite(Zend_Search_Lucene_Interface $index);
  130. /**
  131. * Optimize query in the context of specified index
  132. *
  133. * @param Zend_Search_Lucene_Interface $index
  134. * @return Zend_Search_Lucene_Search_Query
  135. */
  136. abstract public function optimize(Zend_Search_Lucene_Interface $index);
  137. /**
  138. * Reset query, so it can be reused within other queries or
  139. * with other indeces
  140. */
  141. public function reset()
  142. {
  143. $this->_weight = null;
  144. }
  145. /**
  146. * Print a query
  147. *
  148. * @return string
  149. */
  150. abstract public function __toString();
  151. /**
  152. * Return query terms
  153. *
  154. * @return array
  155. */
  156. abstract public function getQueryTerms();
  157. /**
  158. * Query specific matches highlighting
  159. *
  160. * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
  161. */
  162. abstract protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter);
  163. /**
  164. * Highlight matches in $inputHTML
  165. *
  166. * @param string $inputHTML
  167. * @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
  168. * @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
  169. * @return string
  170. */
  171. public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
  172. {
  173. if ($highlighter === null) {
  174. $highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
  175. }
  176. $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML, false, $defaultEncoding);
  177. $highlighter->setDocument($doc);
  178. $this->_highlightMatches($highlighter);
  179. return $doc->getHTML();
  180. }
  181. /**
  182. * Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
  183. *
  184. * @param string $inputHtmlFragment
  185. * @param string $encoding Input HTML string encoding
  186. * @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
  187. * @return string
  188. */
  189. public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
  190. {
  191. if ($highlighter === null) {
  192. $highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
  193. }
  194. $inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
  195. . iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
  196. $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
  197. $highlighter->setDocument($doc);
  198. $this->_highlightMatches($highlighter);
  199. return $doc->getHtmlBody();
  200. }
  201. }