SearchHighlightTest.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. <?php
  2. /**
  3. * @category Zend
  4. * @package Zend_Search_Lucene
  5. * @subpackage UnitTests
  6. */
  7. /**
  8. * Zend_Search_Lucene
  9. */
  10. require_once 'Zend/Search/Lucene.php';
  11. /**
  12. * PHPUnit test case
  13. */
  14. require_once 'PHPUnit/Framework/TestCase.php';
  15. /**
  16. * @category Zend
  17. * @package Zend_Search_Lucene
  18. * @subpackage UnitTests
  19. */
  20. class Zend_Search_Lucene_SearchHighlightTest extends PHPUnit_Framework_TestCase
  21. {
  22. /**
  23. * Wildcard pattern minimum preffix
  24. *
  25. * @var integer
  26. */
  27. protected $_wildcardMinPrefix;
  28. /**
  29. * Fuzzy search default preffix length
  30. *
  31. * @var integer
  32. */
  33. protected $_defaultPrefixLength;
  34. public function setUp()
  35. {
  36. $this->_wildcardMinPrefix = Zend_Search_Lucene_Search_Query_Wildcard::getMinPrefixLength();
  37. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
  38. $this->_defaultPrefixLength = Zend_Search_Lucene_Search_Query_Fuzzy::getDefaultPrefixLength();
  39. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0);
  40. }
  41. public function tearDown()
  42. {
  43. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
  44. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
  45. }
  46. public function testHtmlFragmentHighlightMatches()
  47. {
  48. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  49. $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Text highlighting using Zend_Search_Lucene is the right way to go!');
  50. $this->assertEquals($highlightedHtmlFragment,
  51. 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  52. }
  53. // public function testHtmlFragmentHighlightMatchesCyrillic()
  54. // {
  55. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"некоторый текст" AND text:поехали', 'UTF-8');
  56. //
  57. // $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!');
  58. //
  59. // $this->assertEquals($highlightedHtmlFragment,
  60. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  61. // }
  62. //
  63. // public function testHtmlFragmentHighlightMatchesCyrillicWin1251()
  64. // {
  65. // $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
  66. // Zend_Search_Lucene_Analysis_Analyzer::setDefault(
  67. // new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()
  68. // );
  69. //
  70. //
  71. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"Некоторый текст" AND text:поехали');
  72. //
  73. // $highlightedHtmlFragment =
  74. // $query->htmlFragmentHighlightMatches(iconv('UTF-8',
  75. // 'Windows-1251',
  76. // 'Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!'),
  77. // 'Windows-1251');
  78. //
  79. // Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
  80. //
  81. // $this->assertEquals($highlightedHtmlFragment,
  82. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  83. // }
  84. public function testHighlightPhrasePlusTerm()
  85. {
  86. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  87. $html = '<HTML>'
  88. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  89. . '<BODY>'
  90. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  91. . '</BODY>'
  92. . '</HTML>';
  93. $highlightedHTML = $query->highlightMatches($html);
  94. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">the</b>') !== false);
  95. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">right</b>') !== false);
  96. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">way</b>') !== false);
  97. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">go</b>') !== false);
  98. }
  99. public function testHighlightMultitermWithProhibitedTerms()
  100. {
  101. $query = Zend_Search_Lucene_Search_QueryParser::parse('+text +highlighting -using -right +go');
  102. $html = '<HTML>'
  103. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  104. . '<BODY>'
  105. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  106. . '</BODY>'
  107. . '</HTML>';
  108. $highlightedHTML = $query->highlightMatches($html);
  109. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Text</b>') !== false);
  110. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">highlighting</b>') !== false);
  111. $this->assertTrue(strpos($highlightedHTML, 'using Zend_Search_Lucene is the right way to') !== false);
  112. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ffff66">go</b>') !== false);
  113. }
  114. public function testHighlightWildcard1()
  115. {
  116. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t');
  117. $html = '<HTML>'
  118. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  119. . '<BODY>'
  120. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  121. . '</BODY>'
  122. . '</HTML>';
  123. $highlightedHTML = $query->highlightMatches($html);
  124. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  125. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  126. // Check that 'Testing' word is not highlighted
  127. $this->assertTrue(strpos($highlightedHTML, 'mark. Testing...') !== false);
  128. }
  129. public function testHighlightWildcard2()
  130. {
  131. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t*');
  132. $html = '<HTML>'
  133. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  134. . '<BODY>'
  135. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  136. . '</BODY>'
  137. . '</HTML>';
  138. $highlightedHTML = $query->highlightMatches($html);
  139. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  140. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  141. // Check that 'Testing' word is also highlighted
  142. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Testing</b>') !== false);
  143. }
  144. public function testHighlightFuzzy1()
  145. {
  146. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~');
  147. $html = '<HTML>'
  148. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  149. . '<BODY>'
  150. . 'Test of text fuzzy search terms highlighting. '
  151. . 'Words: test, text, latest, left, list, next, ...'
  152. . '</BODY>'
  153. . '</HTML>';
  154. $highlightedHTML = $query->highlightMatches($html);
  155. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  156. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  157. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  158. // Check that other words are not highlighted
  159. $this->assertTrue(strpos($highlightedHTML, 'latest, left, list, next, ...') !== false);
  160. }
  161. public function testHighlightFuzzy2()
  162. {
  163. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~0.4');
  164. $html = '<HTML>'
  165. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  166. . '<BODY>'
  167. . 'Test of text fuzzy search terms highlighting. '
  168. . 'Words: test, text, latest, left, list, next, ...'
  169. . '</BODY>'
  170. . '</HTML>';
  171. $highlightedHTML = $query->highlightMatches($html);
  172. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  173. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  174. // Check that other words are also highlighted
  175. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  176. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">latest</b>') !== false);
  177. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">left</b>') !== false);
  178. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">list</b>') !== false);
  179. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">next</b>') !== false);
  180. }
  181. public function testHighlightRangeInclusive()
  182. {
  183. $query = Zend_Search_Lucene_Search_QueryParser::parse('[business TO by]');
  184. $html = '<HTML>'
  185. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  186. . '<BODY>'
  187. . 'Test of text using range query. '
  188. . 'It has to match "business", "by", "buss" and "but" words, but has to skip "bus"'
  189. . '</BODY>'
  190. . '</HTML>';
  191. $highlightedHTML = $query->highlightMatches($html);
  192. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">business</b>') !== false);
  193. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">by</b>') !== false);
  194. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  195. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  196. // Check that "bus" word is skipped
  197. $this->assertTrue(strpos($highlightedHTML, 'has to skip "bus"') !== false);
  198. }
  199. public function testHighlightRangeNonInclusive()
  200. {
  201. $query = Zend_Search_Lucene_Search_QueryParser::parse('{business TO by}');
  202. $html = '<HTML>'
  203. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  204. . '<BODY>'
  205. . 'Test of text using range query. '
  206. . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"'
  207. . '</BODY>'
  208. . '</HTML>';
  209. $highlightedHTML = $query->highlightMatches($html);
  210. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  211. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  212. // Check that "bus" word is skipped
  213. $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
  214. }
  215. }