2
0

SearchHighlightTest.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. <?php
  2. /**
  3. * @category Zend
  4. * @package Zend_Search_Lucene
  5. * @subpackage UnitTests
  6. */
  7. /**
  8. * Zend_Search_Lucene
  9. */
  10. require_once 'Zend/Search/Lucene.php';
  11. /**
  12. * PHPUnit test case
  13. */
  14. require_once 'PHPUnit/Framework/TestCase.php';
  15. /**
  16. * @category Zend
  17. * @package Zend_Search_Lucene
  18. * @subpackage UnitTests
  19. */
  20. class Zend_Search_Lucene_SearchHighlightTest extends PHPUnit_Framework_TestCase
  21. {
  22. /**
  23. * Wildcard pattern minimum preffix
  24. *
  25. * @var integer
  26. */
  27. protected $_wildcardMinPrefix;
  28. /**
  29. * Fuzzy search default preffix length
  30. *
  31. * @var integer
  32. */
  33. protected $_defaultPrefixLength;
  34. public function setUp()
  35. {
  36. $this->_wildcardMinPrefix = Zend_Search_Lucene_Search_Query_Wildcard::getMinPrefixLength();
  37. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
  38. $this->_defaultPrefixLength = Zend_Search_Lucene_Search_Query_Fuzzy::getDefaultPrefixLength();
  39. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0);
  40. }
  41. public function tearDown()
  42. {
  43. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
  44. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
  45. }
  46. public function testHtmlFragmentHighlightMatches()
  47. {
  48. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  49. $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Text highlighting using Zend_Search_Lucene is the right way to go!');
  50. $this->assertEquals($highlightedHtmlFragment,
  51. 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  52. }
  53. // public function testHtmlFragmentHighlightMatchesCyrillic()
  54. // {
  55. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"некоторый текст" AND text:поехали');
  56. //
  57. // $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!');
  58. //
  59. // $this->assertEquals($highlightedHtmlFragment,
  60. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  61. // }
  62. //
  63. // public function testHtmlFragmentHighlightMatchesCyrillicWindows()
  64. // {
  65. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"Некоторый текст" AND text:поехали');
  66. //
  67. // $highlightedHtmlFragment =
  68. // $query->htmlFragmentHighlightMatches(iconv('UTF-8',
  69. // 'Windows-1251',
  70. // 'Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!'),
  71. // 'Windows-1251');
  72. //
  73. // $this->assertEquals($highlightedHtmlFragment,
  74. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  75. // }
  76. public function testHighlightPhrasePlusTerm()
  77. {
  78. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  79. $html = '<HTML>'
  80. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  81. . '<BODY>'
  82. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  83. . '</BODY>'
  84. . '</HTML>';
  85. $highlightedHTML = $query->highlightMatches($html);
  86. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">the</b>') !== false);
  87. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">right</b>') !== false);
  88. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">way</b>') !== false);
  89. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">go</b>') !== false);
  90. }
  91. public function testHighlightMultitermWithProhibitedTerms()
  92. {
  93. $query = Zend_Search_Lucene_Search_QueryParser::parse('+text +highlighting -using -right +go');
  94. $html = '<HTML>'
  95. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  96. . '<BODY>'
  97. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  98. . '</BODY>'
  99. . '</HTML>';
  100. $highlightedHTML = $query->highlightMatches($html);
  101. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Text</b>') !== false);
  102. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">highlighting</b>') !== false);
  103. $this->assertTrue(strpos($highlightedHTML, 'using Zend_Search_Lucene is the right way to') !== false);
  104. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ffff66">go</b>') !== false);
  105. }
  106. public function testHighlightWildcard1()
  107. {
  108. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t');
  109. $html = '<HTML>'
  110. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  111. . '<BODY>'
  112. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  113. . '</BODY>'
  114. . '</HTML>';
  115. $highlightedHTML = $query->highlightMatches($html);
  116. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  117. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  118. // Check that 'Testing' word is not highlighted
  119. $this->assertTrue(strpos($highlightedHTML, 'mark. Testing...') !== false);
  120. }
  121. public function testHighlightWildcard2()
  122. {
  123. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t*');
  124. $html = '<HTML>'
  125. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  126. . '<BODY>'
  127. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  128. . '</BODY>'
  129. . '</HTML>';
  130. $highlightedHTML = $query->highlightMatches($html);
  131. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  132. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  133. // Check that 'Testing' word is also highlighted
  134. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Testing</b>') !== false);
  135. }
  136. public function testHighlightFuzzy1()
  137. {
  138. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~');
  139. $html = '<HTML>'
  140. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  141. . '<BODY>'
  142. . 'Test of text fuzzy search terms highlighting. '
  143. . 'Words: test, text, latest, left, list, next, ...'
  144. . '</BODY>'
  145. . '</HTML>';
  146. $highlightedHTML = $query->highlightMatches($html);
  147. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  148. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  149. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  150. // Check that other words are not highlighted
  151. $this->assertTrue(strpos($highlightedHTML, 'latest, left, list, next, ...') !== false);
  152. }
  153. public function testHighlightFuzzy2()
  154. {
  155. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~0.4');
  156. $html = '<HTML>'
  157. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  158. . '<BODY>'
  159. . 'Test of text fuzzy search terms highlighting. '
  160. . 'Words: test, text, latest, left, list, next, ...'
  161. . '</BODY>'
  162. . '</HTML>';
  163. $highlightedHTML = $query->highlightMatches($html);
  164. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  165. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  166. // Check that other words are also highlighted
  167. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  168. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">latest</b>') !== false);
  169. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">left</b>') !== false);
  170. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">list</b>') !== false);
  171. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">next</b>') !== false);
  172. }
  173. public function testHighlightRangeInclusive()
  174. {
  175. $query = Zend_Search_Lucene_Search_QueryParser::parse('[business TO by]');
  176. $html = '<HTML>'
  177. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  178. . '<BODY>'
  179. . 'Test of text using range query. '
  180. . 'It has to match "business", "by", "buss" and "but" words, but has to skip "bus"'
  181. . '</BODY>'
  182. . '</HTML>';
  183. $highlightedHTML = $query->highlightMatches($html);
  184. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">business</b>') !== false);
  185. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">by</b>') !== false);
  186. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  187. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  188. // Check that "bus" word is skipped
  189. $this->assertTrue(strpos($highlightedHTML, 'has to skip "bus"') !== false);
  190. }
  191. public function testHighlightRangeNonInclusive()
  192. {
  193. $query = Zend_Search_Lucene_Search_QueryParser::parse('{business TO by}');
  194. $html = '<HTML>'
  195. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  196. . '<BODY>'
  197. . 'Test of text using range query. '
  198. . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"'
  199. . '</BODY>'
  200. . '</HTML>';
  201. $highlightedHTML = $query->highlightMatches($html);
  202. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  203. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  204. // Check that "bus" word is skipped
  205. $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
  206. }
  207. }