2
0

SearchHighlightTest.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * Zend_Search_Lucene
  24. */
  25. require_once 'Zend/Search/Lucene.php';
  26. /**
  27. * @category Zend
  28. * @package Zend_Search_Lucene
  29. * @subpackage UnitTests
  30. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. * @group Zend_Search_Lucene
  33. */
  34. class Zend_Search_Lucene_SearchHighlightTest extends PHPUnit_Framework_TestCase
  35. {
  36. /**
  37. * Wildcard pattern minimum preffix
  38. *
  39. * @var integer
  40. */
  41. protected $_wildcardMinPrefix;
  42. /**
  43. * Fuzzy search default preffix length
  44. *
  45. * @var integer
  46. */
  47. protected $_defaultPrefixLength;
  48. public function setUp()
  49. {
  50. require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
  51. $this->_wildcardMinPrefix = Zend_Search_Lucene_Search_Query_Wildcard::getMinPrefixLength();
  52. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
  53. require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
  54. $this->_defaultPrefixLength = Zend_Search_Lucene_Search_Query_Fuzzy::getDefaultPrefixLength();
  55. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0);
  56. }
  57. public function tearDown()
  58. {
  59. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
  60. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
  61. }
  62. public function testHtmlFragmentHighlightMatches()
  63. {
  64. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  65. $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Text highlighting using Zend_Search_Lucene is the right way to go!');
  66. $this->assertEquals($highlightedHtmlFragment,
  67. 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  68. }
  69. // public function testHtmlFragmentHighlightMatchesCyrillic()
  70. // {
  71. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"некоторый текст" AND text:поехали');
  72. //
  73. // $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!');
  74. //
  75. // $this->assertEquals($highlightedHtmlFragment,
  76. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  77. // }
  78. //
  79. // public function testHtmlFragmentHighlightMatchesCyrillicWindows()
  80. // {
  81. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"Некоторый текст" AND text:поехали');
  82. //
  83. // $highlightedHtmlFragment =
  84. // $query->htmlFragmentHighlightMatches(iconv('UTF-8',
  85. // 'Windows-1251',
  86. // 'Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!'),
  87. // 'Windows-1251');
  88. //
  89. // $this->assertEquals($highlightedHtmlFragment,
  90. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  91. // }
  92. public function testHighlightPhrasePlusTerm()
  93. {
  94. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  95. $html = '<HTML>'
  96. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  97. . '<BODY>'
  98. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  99. . '</BODY>'
  100. . '</HTML>';
  101. $highlightedHTML = $query->highlightMatches($html);
  102. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">the</b>') !== false);
  103. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">right</b>') !== false);
  104. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">way</b>') !== false);
  105. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">go</b>') !== false);
  106. }
  107. public function testHighlightMultitermWithProhibitedTerms()
  108. {
  109. $query = Zend_Search_Lucene_Search_QueryParser::parse('+text +highlighting -using -right +go');
  110. $html = '<HTML>'
  111. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  112. . '<BODY>'
  113. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  114. . '</BODY>'
  115. . '</HTML>';
  116. $highlightedHTML = $query->highlightMatches($html);
  117. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Text</b>') !== false);
  118. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">highlighting</b>') !== false);
  119. $this->assertTrue(strpos($highlightedHTML, 'using Zend_Search_Lucene is the right way to') !== false);
  120. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ffff66">go</b>') !== false);
  121. }
  122. public function testHighlightWildcard1()
  123. {
  124. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t');
  125. $html = '<HTML>'
  126. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  127. . '<BODY>'
  128. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  129. . '</BODY>'
  130. . '</HTML>';
  131. $highlightedHTML = $query->highlightMatches($html);
  132. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  133. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  134. // Check that 'Testing' word is not highlighted
  135. $this->assertTrue(strpos($highlightedHTML, 'mark. Testing...') !== false);
  136. }
  137. public function testHighlightWildcard2()
  138. {
  139. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t*');
  140. $html = '<HTML>'
  141. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  142. . '<BODY>'
  143. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  144. . '</BODY>'
  145. . '</HTML>';
  146. $highlightedHTML = $query->highlightMatches($html);
  147. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  148. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  149. // Check that 'Testing' word is also highlighted
  150. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Testing</b>') !== false);
  151. }
  152. public function testHighlightFuzzy1()
  153. {
  154. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~');
  155. $html = '<HTML>'
  156. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  157. . '<BODY>'
  158. . 'Test of text fuzzy search terms highlighting. '
  159. . 'Words: test, text, latest, left, list, next, ...'
  160. . '</BODY>'
  161. . '</HTML>';
  162. $highlightedHTML = $query->highlightMatches($html);
  163. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  164. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  165. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  166. // Check that other words are not highlighted
  167. $this->assertTrue(strpos($highlightedHTML, 'latest, left, list, next, ...') !== false);
  168. }
  169. public function testHighlightFuzzy2()
  170. {
  171. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~0.4');
  172. $html = '<HTML>'
  173. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  174. . '<BODY>'
  175. . 'Test of text fuzzy search terms highlighting. '
  176. . 'Words: test, text, latest, left, list, next, ...'
  177. . '</BODY>'
  178. . '</HTML>';
  179. $highlightedHTML = $query->highlightMatches($html);
  180. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  181. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  182. // Check that other words are also highlighted
  183. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  184. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">latest</b>') !== false);
  185. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">left</b>') !== false);
  186. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">list</b>') !== false);
  187. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">next</b>') !== false);
  188. }
  189. public function testHighlightRangeInclusive()
  190. {
  191. $query = Zend_Search_Lucene_Search_QueryParser::parse('[business TO by]');
  192. $html = '<HTML>'
  193. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  194. . '<BODY>'
  195. . 'Test of text using range query. '
  196. . 'It has to match "business", "by", "buss" and "but" words, but has to skip "bus"'
  197. . '</BODY>'
  198. . '</HTML>';
  199. $highlightedHTML = $query->highlightMatches($html);
  200. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">business</b>') !== false);
  201. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">by</b>') !== false);
  202. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  203. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  204. // Check that "bus" word is skipped
  205. $this->assertTrue(strpos($highlightedHTML, 'has to skip "bus"') !== false);
  206. }
  207. public function testHighlightRangeNonInclusive()
  208. {
  209. $query = Zend_Search_Lucene_Search_QueryParser::parse('{business TO by}');
  210. $html = '<HTML>'
  211. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  212. . '<BODY>'
  213. . 'Test of text using range query. '
  214. . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"'
  215. . '</BODY>'
  216. . '</HTML>';
  217. $highlightedHTML = $query->highlightMatches($html);
  218. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  219. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  220. // Check that "bus" word is skipped
  221. $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
  222. }
  223. }