SearchHighlightTest.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * Zend_Search_Lucene
  24. */
  25. require_once 'Zend/Search/Lucene.php';
  26. /**
  27. * PHPUnit test case
  28. */
  29. require_once 'PHPUnit/Framework/TestCase.php';
  30. /**
  31. * @category Zend
  32. * @package Zend_Search_Lucene
  33. * @subpackage UnitTests
  34. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. * @group Zend_Search_Lucene
  37. */
  38. class Zend_Search_Lucene_SearchHighlightTest extends PHPUnit_Framework_TestCase
  39. {
  40. /**
  41. * Wildcard pattern minimum preffix
  42. *
  43. * @var integer
  44. */
  45. protected $_wildcardMinPrefix;
  46. /**
  47. * Fuzzy search default preffix length
  48. *
  49. * @var integer
  50. */
  51. protected $_defaultPrefixLength;
  52. public function setUp()
  53. {
  54. require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
  55. $this->_wildcardMinPrefix = Zend_Search_Lucene_Search_Query_Wildcard::getMinPrefixLength();
  56. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
  57. require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
  58. $this->_defaultPrefixLength = Zend_Search_Lucene_Search_Query_Fuzzy::getDefaultPrefixLength();
  59. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0);
  60. }
  61. public function tearDown()
  62. {
  63. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
  64. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
  65. }
  66. public function testHtmlFragmentHighlightMatches()
  67. {
  68. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  69. $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Text highlighting using Zend_Search_Lucene is the right way to go!');
  70. $this->assertEquals($highlightedHtmlFragment,
  71. 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  72. }
  73. // public function testHtmlFragmentHighlightMatchesCyrillic()
  74. // {
  75. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"некоторый текст" AND text:поехали');
  76. //
  77. // $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!');
  78. //
  79. // $this->assertEquals($highlightedHtmlFragment,
  80. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  81. // }
  82. //
  83. // public function testHtmlFragmentHighlightMatchesCyrillicWindows()
  84. // {
  85. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"Некоторый текст" AND text:поехали');
  86. //
  87. // $highlightedHtmlFragment =
  88. // $query->htmlFragmentHighlightMatches(iconv('UTF-8',
  89. // 'Windows-1251',
  90. // 'Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!'),
  91. // 'Windows-1251');
  92. //
  93. // $this->assertEquals($highlightedHtmlFragment,
  94. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  95. // }
  96. public function testHighlightPhrasePlusTerm()
  97. {
  98. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  99. $html = '<HTML>'
  100. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  101. . '<BODY>'
  102. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  103. . '</BODY>'
  104. . '</HTML>';
  105. $highlightedHTML = $query->highlightMatches($html);
  106. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">the</b>') !== false);
  107. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">right</b>') !== false);
  108. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">way</b>') !== false);
  109. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">go</b>') !== false);
  110. }
  111. public function testHighlightMultitermWithProhibitedTerms()
  112. {
  113. $query = Zend_Search_Lucene_Search_QueryParser::parse('+text +highlighting -using -right +go');
  114. $html = '<HTML>'
  115. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  116. . '<BODY>'
  117. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  118. . '</BODY>'
  119. . '</HTML>';
  120. $highlightedHTML = $query->highlightMatches($html);
  121. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Text</b>') !== false);
  122. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">highlighting</b>') !== false);
  123. $this->assertTrue(strpos($highlightedHTML, 'using Zend_Search_Lucene is the right way to') !== false);
  124. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ffff66">go</b>') !== false);
  125. }
  126. public function testHighlightWildcard1()
  127. {
  128. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t');
  129. $html = '<HTML>'
  130. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  131. . '<BODY>'
  132. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  133. . '</BODY>'
  134. . '</HTML>';
  135. $highlightedHTML = $query->highlightMatches($html);
  136. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  137. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  138. // Check that 'Testing' word is not highlighted
  139. $this->assertTrue(strpos($highlightedHTML, 'mark. Testing...') !== false);
  140. }
  141. public function testHighlightWildcard2()
  142. {
  143. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t*');
  144. $html = '<HTML>'
  145. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  146. . '<BODY>'
  147. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  148. . '</BODY>'
  149. . '</HTML>';
  150. $highlightedHTML = $query->highlightMatches($html);
  151. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  152. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  153. // Check that 'Testing' word is also highlighted
  154. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Testing</b>') !== false);
  155. }
  156. public function testHighlightFuzzy1()
  157. {
  158. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~');
  159. $html = '<HTML>'
  160. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  161. . '<BODY>'
  162. . 'Test of text fuzzy search terms highlighting. '
  163. . 'Words: test, text, latest, left, list, next, ...'
  164. . '</BODY>'
  165. . '</HTML>';
  166. $highlightedHTML = $query->highlightMatches($html);
  167. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  168. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  169. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  170. // Check that other words are not highlighted
  171. $this->assertTrue(strpos($highlightedHTML, 'latest, left, list, next, ...') !== false);
  172. }
  173. public function testHighlightFuzzy2()
  174. {
  175. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~0.4');
  176. $html = '<HTML>'
  177. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  178. . '<BODY>'
  179. . 'Test of text fuzzy search terms highlighting. '
  180. . 'Words: test, text, latest, left, list, next, ...'
  181. . '</BODY>'
  182. . '</HTML>';
  183. $highlightedHTML = $query->highlightMatches($html);
  184. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  185. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  186. // Check that other words are also highlighted
  187. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  188. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">latest</b>') !== false);
  189. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">left</b>') !== false);
  190. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">list</b>') !== false);
  191. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">next</b>') !== false);
  192. }
  193. public function testHighlightRangeInclusive()
  194. {
  195. $query = Zend_Search_Lucene_Search_QueryParser::parse('[business TO by]');
  196. $html = '<HTML>'
  197. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  198. . '<BODY>'
  199. . 'Test of text using range query. '
  200. . 'It has to match "business", "by", "buss" and "but" words, but has to skip "bus"'
  201. . '</BODY>'
  202. . '</HTML>';
  203. $highlightedHTML = $query->highlightMatches($html);
  204. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">business</b>') !== false);
  205. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">by</b>') !== false);
  206. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  207. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  208. // Check that "bus" word is skipped
  209. $this->assertTrue(strpos($highlightedHTML, 'has to skip "bus"') !== false);
  210. }
  211. public function testHighlightRangeNonInclusive()
  212. {
  213. $query = Zend_Search_Lucene_Search_QueryParser::parse('{business TO by}');
  214. $html = '<HTML>'
  215. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  216. . '<BODY>'
  217. . 'Test of text using range query. '
  218. . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"'
  219. . '</BODY>'
  220. . '</HTML>';
  221. $highlightedHTML = $query->highlightMatches($html);
  222. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  223. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  224. // Check that "bus" word is skipped
  225. $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
  226. }
  227. }