SearchHighlightTest.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * Zend_Search_Lucene
  24. */
  25. require_once 'Zend/Search/Lucene.php';
  26. /**
  27. * PHPUnit test case
  28. */
  29. require_once 'PHPUnit/Framework/TestCase.php';
  30. /**
  31. * @category Zend
  32. * @package Zend_Search_Lucene
  33. * @subpackage UnitTests
  34. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. * @group Zend_Search_Lucene
  37. */
  38. class Zend_Search_Lucene_SearchHighlightTest extends PHPUnit_Framework_TestCase
  39. {
  40. /**
  41. * Wildcard pattern minimum preffix
  42. *
  43. * @var integer
  44. */
  45. protected $_wildcardMinPrefix;
  46. /**
  47. * Fuzzy search default preffix length
  48. *
  49. * @var integer
  50. */
  51. protected $_defaultPrefixLength;
  52. public function setUp()
  53. {
  54. $this->_wildcardMinPrefix = Zend_Search_Lucene_Search_Query_Wildcard::getMinPrefixLength();
  55. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
  56. $this->_defaultPrefixLength = Zend_Search_Lucene_Search_Query_Fuzzy::getDefaultPrefixLength();
  57. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0);
  58. }
  59. public function tearDown()
  60. {
  61. Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
  62. Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
  63. }
  64. public function testHtmlFragmentHighlightMatches()
  65. {
  66. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  67. $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Text highlighting using Zend_Search_Lucene is the right way to go!');
  68. $this->assertEquals($highlightedHtmlFragment,
  69. 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  70. }
  71. // public function testHtmlFragmentHighlightMatchesCyrillic()
  72. // {
  73. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"некоторый текст" AND text:поехали');
  74. //
  75. // $highlightedHtmlFragment = $query->htmlFragmentHighlightMatches('Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!');
  76. //
  77. // $this->assertEquals($highlightedHtmlFragment,
  78. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  79. // }
  80. //
  81. // public function testHtmlFragmentHighlightMatchesCyrillicWindows()
  82. // {
  83. // $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"Некоторый текст" AND text:поехали');
  84. //
  85. // $highlightedHtmlFragment =
  86. // $query->htmlFragmentHighlightMatches(iconv('UTF-8',
  87. // 'Windows-1251',
  88. // 'Подсвечиваем некоторый текст с использованием Zend_Search_Lucene. Поехали!'),
  89. // 'Windows-1251');
  90. //
  91. // $this->assertEquals($highlightedHtmlFragment,
  92. // 'Text highlighting using Zend_Search_Lucene is <b style="color:black;background-color:#66ffff">the</b> <b style="color:black;background-color:#66ffff">right</b> <b style="color:black;background-color:#66ffff">way</b> to <b style="color:black;background-color:#ff66ff">go</b>!');
  93. // }
  94. public function testHighlightPhrasePlusTerm()
  95. {
  96. $query = Zend_Search_Lucene_Search_QueryParser::parse('title:"The Right Way" AND text:go');
  97. $html = '<HTML>'
  98. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  99. . '<BODY>'
  100. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  101. . '</BODY>'
  102. . '</HTML>';
  103. $highlightedHTML = $query->highlightMatches($html);
  104. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">the</b>') !== false);
  105. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">right</b>') !== false);
  106. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">way</b>') !== false);
  107. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">go</b>') !== false);
  108. }
  109. public function testHighlightMultitermWithProhibitedTerms()
  110. {
  111. $query = Zend_Search_Lucene_Search_QueryParser::parse('+text +highlighting -using -right +go');
  112. $html = '<HTML>'
  113. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  114. . '<BODY>'
  115. . 'Text highlighting using Zend_Search_Lucene is the right way to go!'
  116. . '</BODY>'
  117. . '</HTML>';
  118. $highlightedHTML = $query->highlightMatches($html);
  119. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Text</b>') !== false);
  120. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ff66ff">highlighting</b>') !== false);
  121. $this->assertTrue(strpos($highlightedHTML, 'using Zend_Search_Lucene is the right way to') !== false);
  122. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#ffff66">go</b>') !== false);
  123. }
  124. public function testHighlightWildcard1()
  125. {
  126. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t');
  127. $html = '<HTML>'
  128. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  129. . '<BODY>'
  130. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  131. . '</BODY>'
  132. . '</HTML>';
  133. $highlightedHTML = $query->highlightMatches($html);
  134. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  135. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  136. // Check that 'Testing' word is not highlighted
  137. $this->assertTrue(strpos($highlightedHTML, 'mark. Testing...') !== false);
  138. }
  139. public function testHighlightWildcard2()
  140. {
  141. $query = Zend_Search_Lucene_Search_QueryParser::parse('te?t*');
  142. $html = '<HTML>'
  143. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  144. . '<BODY>'
  145. . 'Test of text highlighting using wildcard query with question mark. Testing...'
  146. . '</BODY>'
  147. . '</HTML>';
  148. $highlightedHTML = $query->highlightMatches($html);
  149. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  150. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  151. // Check that 'Testing' word is also highlighted
  152. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Testing</b>') !== false);
  153. }
  154. public function testHighlightFuzzy1()
  155. {
  156. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~');
  157. $html = '<HTML>'
  158. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  159. . '<BODY>'
  160. . 'Test of text fuzzy search terms highlighting. '
  161. . 'Words: test, text, latest, left, list, next, ...'
  162. . '</BODY>'
  163. . '</HTML>';
  164. $highlightedHTML = $query->highlightMatches($html);
  165. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  166. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  167. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  168. // Check that other words are not highlighted
  169. $this->assertTrue(strpos($highlightedHTML, 'latest, left, list, next, ...') !== false);
  170. }
  171. public function testHighlightFuzzy2()
  172. {
  173. $query = Zend_Search_Lucene_Search_QueryParser::parse('test~0.4');
  174. $html = '<HTML>'
  175. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  176. . '<BODY>'
  177. . 'Test of text fuzzy search terms highlighting. '
  178. . 'Words: test, text, latest, left, list, next, ...'
  179. . '</BODY>'
  180. . '</HTML>';
  181. $highlightedHTML = $query->highlightMatches($html);
  182. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Test</b>') !== false);
  183. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">test</b>') !== false);
  184. // Check that other words are also highlighted
  185. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">text</b>') !== false);
  186. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">latest</b>') !== false);
  187. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">left</b>') !== false);
  188. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">list</b>') !== false);
  189. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">next</b>') !== false);
  190. }
  191. public function testHighlightRangeInclusive()
  192. {
  193. $query = Zend_Search_Lucene_Search_QueryParser::parse('[business TO by]');
  194. $html = '<HTML>'
  195. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  196. . '<BODY>'
  197. . 'Test of text using range query. '
  198. . 'It has to match "business", "by", "buss" and "but" words, but has to skip "bus"'
  199. . '</BODY>'
  200. . '</HTML>';
  201. $highlightedHTML = $query->highlightMatches($html);
  202. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">business</b>') !== false);
  203. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">by</b>') !== false);
  204. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  205. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  206. // Check that "bus" word is skipped
  207. $this->assertTrue(strpos($highlightedHTML, 'has to skip "bus"') !== false);
  208. }
  209. public function testHighlightRangeNonInclusive()
  210. {
  211. $query = Zend_Search_Lucene_Search_QueryParser::parse('{business TO by}');
  212. $html = '<HTML>'
  213. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  214. . '<BODY>'
  215. . 'Test of text using range query. '
  216. . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"'
  217. . '</BODY>'
  218. . '</HTML>';
  219. $highlightedHTML = $query->highlightMatches($html);
  220. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
  221. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
  222. // Check that "bus" word is skipped
  223. $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
  224. }
  225. }