DocumentTest.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * Zend_Search_Lucene_Document
  24. */
  25. require_once 'Zend/Search/Lucene/Document.php';
  26. /**
  27. * Zend_Search_Lucene_Document_Docx
  28. */
  29. require_once 'Zend/Search/Lucene/Document/Docx.php';
  30. /**
  31. * Zend_Search_Lucene_Document_Pptx
  32. */
  33. require_once 'Zend/Search/Lucene/Document/Pptx.php';
  34. /**
  35. * Zend_Search_Lucene_Document_Xlsx
  36. */
  37. require_once 'Zend/Search/Lucene/Document/Xlsx.php';
  38. /**
  39. * PHPUnit test case
  40. */
  41. require_once 'PHPUnit/Framework/TestCase.php';
  42. /**
  43. * @category Zend
  44. * @package Zend_Search_Lucene
  45. * @subpackage UnitTests
  46. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  47. * @license http://framework.zend.com/license/new-bsd New BSD License
  48. * @group Zend_Search_Lucene
  49. */
  50. class Zend_Search_Lucene_DocumentTest extends PHPUnit_Framework_TestCase
  51. {
  52. public function testCreate()
  53. {
  54. $document = new Zend_Search_Lucene_Document();
  55. $this->assertEquals($document->boost, 1);
  56. }
  57. public function testFields()
  58. {
  59. $document = new Zend_Search_Lucene_Document();
  60. $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title'));
  61. $document->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation'));
  62. $document->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...'));
  63. $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body'));
  64. $this->assertTrue(is_array($fieldnamesDiffArray));
  65. $this->assertEquals(count($fieldnamesDiffArray), 0);
  66. $this->assertEquals($document->title, 'Title');
  67. $this->assertEquals($document->annotation, 'Annotation');
  68. $this->assertEquals($document->body, 'Document body, document body, document body...');
  69. $this->assertEquals($document->getField('title')->value, 'Title');
  70. $this->assertEquals($document->getField('annotation')->value, 'Annotation');
  71. $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...');
  72. $this->assertEquals($document->getFieldValue('title'), 'Title');
  73. $this->assertEquals($document->getFieldValue('annotation'), 'Annotation');
  74. $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...');
  75. if (PHP_OS == 'AIX') {
  76. return; // tests below here not valid on AIX
  77. }
  78. $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...');
  79. $document->addField(Zend_Search_Lucene_Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1'));
  80. $this->assertEquals($document->description, $wordsWithUmlautsIso88591);
  81. $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...');
  82. }
  83. public function testAddFieldMethodChaining()
  84. {
  85. $document = new Zend_Search_Lucene_Document();
  86. $this->assertTrue($document->addField(Zend_Search_Lucene_Field::Text('title', 'Title')) instanceof Zend_Search_Lucene_Document);
  87. $document = new Zend_Search_Lucene_Document();
  88. $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title'))
  89. ->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation'))
  90. ->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...'));
  91. }
  92. public function testHtmlHighlighting()
  93. {
  94. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  95. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  96. $doc->highlight('document', '#66ffff');
  97. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#66ffff">Document</b> body.') !== false);
  98. }
  99. public function testHtmlExtendedHighlighting()
  100. {
  101. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  102. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  103. $doc->highlightExtended('document',
  104. array('Zend_Search_Lucene_DocumentTest_DocHighlightingContainer',
  105. 'extendedHighlightingCallback'),
  106. array('style="color:black;background-color:#ff66ff"',
  107. '(!!!)'));
  108. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b>(!!!) body.') !== false);
  109. }
  110. public function testHtmlWordsHighlighting()
  111. {
  112. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  113. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  114. $doc->highlight(array('document', 'body'), '#66ffff');
  115. $highlightedHTML = $doc->getHTML();
  116. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Document</b>') !== false);
  117. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">body</b>') !== false);
  118. }
  119. public function testHtmlExtendedHighlightingCorrectWrongHtml()
  120. {
  121. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  122. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  123. $doc->highlightExtended('document',
  124. array('Zend_Search_Lucene_DocumentTest_DocHighlightingContainer',
  125. 'extendedHighlightingCallback'),
  126. array('style="color:black;background-color:#ff66ff"',
  127. '<h3>(!!!)' /* Wrong HTML here, <h3> tag is not closed */));
  128. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b><h3>(!!!)</h3> body.') !== false);
  129. }
  130. public function testHtmlLinksProcessing()
  131. {
  132. $doc = Zend_Search_Lucene_Document_Html::loadHTMLFile(dirname(__FILE__) . '/_indexSource/_files/contributing.documentation.html', true);
  133. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  134. $this->assertTrue(array_values($doc->getHeaderLinks()) ==
  135. array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
  136. $this->assertTrue(array_values($doc->getLinks()) ==
  137. array('contributing.bugs.html',
  138. 'contributing.wishlist.html',
  139. 'developers.documentation.html',
  140. 'faq.translators-revision-tracking.html',
  141. 'index.html',
  142. 'contributing.html'));
  143. }
  144. public function testHtmlNoFollowLinks()
  145. {
  146. $html = '<HTML>'
  147. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  148. . '<BODY>'
  149. . 'Document body.'
  150. . '<a href="link1.html">Link 1</a>.'
  151. . '<a href="link2.html" rel="nofollow">Link 1</a>.'
  152. . '</BODY>'
  153. . '</HTML>';
  154. $oldNoFollowValue = Zend_Search_Lucene_Document_Html::getExcludeNoFollowLinks();
  155. Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(false);
  156. $doc1 = Zend_Search_Lucene_Document_Html::loadHTML($html);
  157. $this->assertTrue($doc1 instanceof Zend_Search_Lucene_Document_Html);
  158. $this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));
  159. Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(true);
  160. $doc2 = Zend_Search_Lucene_Document_Html::loadHTML($html);
  161. $this->assertTrue($doc2 instanceof Zend_Search_Lucene_Document_Html);
  162. $this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
  163. }
  164. public function testDocx()
  165. {
  166. if (!class_exists('ZipArchive')) {
  167. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  168. }
  169. $docxDocument = Zend_Search_Lucene_Document_Docx::loadDocxFile(dirname(__FILE__) . '/_openXmlDocuments/test.docx', true);
  170. $this->assertTrue($docxDocument instanceof Zend_Search_Lucene_Document_Docx);
  171. $this->assertEquals($docxDocument->getFieldValue('title'), 'Test document');
  172. $this->assertEquals($docxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  173. $this->assertTrue($docxDocument->getFieldValue('body') != '');
  174. try {
  175. $docxDocument1 = Zend_Search_Lucene_Document_Docx::loadDocxFile(dirname(__FILE__) . '/_openXmlDocuments/dummy.docx', true);
  176. $this->fail('File not readable exception is expected.');
  177. } catch (Zend_Search_Lucene_Document_Exception $e) {
  178. if (strpos($e->getMessage(), 'is not readable') === false) {
  179. // Passthrough exception
  180. throw $e;
  181. }
  182. }
  183. }
  184. public function testPptx()
  185. {
  186. if (!class_exists('ZipArchive')) {
  187. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  188. }
  189. $pptxDocument = Zend_Search_Lucene_Document_Pptx::loadPptxFile(dirname(__FILE__) . '/_openXmlDocuments/test.pptx', true);
  190. $this->assertTrue($pptxDocument instanceof Zend_Search_Lucene_Document_Pptx);
  191. $this->assertEquals($pptxDocument->getFieldValue('title'), 'Test document');
  192. $this->assertEquals($pptxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  193. $this->assertTrue($pptxDocument->getFieldValue('body') != '');
  194. }
  195. public function testXlsx()
  196. {
  197. if (!class_exists('ZipArchive')) {
  198. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  199. }
  200. $xlsxDocument = Zend_Search_Lucene_Document_Xlsx::loadXlsxFile(dirname(__FILE__) . '/_openXmlDocuments/test.xlsx', true);
  201. $this->assertTrue($xlsxDocument instanceof Zend_Search_Lucene_Document_Xlsx);
  202. $this->assertEquals($xlsxDocument->getFieldValue('title'), 'Test document');
  203. $this->assertEquals($xlsxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  204. $this->assertTrue($xlsxDocument->getFieldValue('body') != '');
  205. $this->assertTrue( strpos($xlsxDocument->getFieldValue('body'), 'ipsum') !== false );
  206. }
  207. }
  208. class Zend_Search_Lucene_DocumentTest_DocHighlightingContainer {
  209. public static function extendedHighlightingCallback($stringToHighlight, $param1, $param2)
  210. {
  211. return '<b ' . $param1 . '>' . $stringToHighlight . '</b>' . $param2;
  212. }
  213. }