2
0

DocumentTest.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. <?php
  2. /**
  3. * @category Zend
  4. * @package Zend_Search_Lucene
  5. * @subpackage UnitTests
  6. */
  7. /**
  8. * Zend_Search_Lucene_Document
  9. */
  10. require_once 'Zend/Search/Lucene/Document.php';
  11. /**
  12. * Zend_Search_Lucene_Document_Docx
  13. */
  14. require_once 'Zend/Search/Lucene/Document/Docx.php';
  15. /**
  16. * Zend_Search_Lucene_Document_Pptx
  17. */
  18. require_once 'Zend/Search/Lucene/Document/Pptx.php';
  19. /**
  20. * Zend_Search_Lucene_Document_Xlsx
  21. */
  22. require_once 'Zend/Search/Lucene/Document/Xlsx.php';
  23. /**
  24. * PHPUnit test case
  25. */
  26. require_once 'PHPUnit/Framework/TestCase.php';
  27. /**
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage UnitTests
  31. */
  32. class Zend_Search_Lucene_DocumentTest extends PHPUnit_Framework_TestCase
  33. {
  34. public function testCreate()
  35. {
  36. $document = new Zend_Search_Lucene_Document();
  37. $this->assertEquals($document->boost, 1);
  38. }
  39. public function testFields()
  40. {
  41. $document = new Zend_Search_Lucene_Document();
  42. $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title'));
  43. $document->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation'));
  44. $document->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...'));
  45. $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body'));
  46. $this->assertTrue(is_array($fieldnamesDiffArray));
  47. $this->assertEquals(count($fieldnamesDiffArray), 0);
  48. $this->assertEquals($document->title, 'Title');
  49. $this->assertEquals($document->annotation, 'Annotation');
  50. $this->assertEquals($document->body, 'Document body, document body, document body...');
  51. $this->assertEquals($document->getField('title')->value, 'Title');
  52. $this->assertEquals($document->getField('annotation')->value, 'Annotation');
  53. $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...');
  54. $this->assertEquals($document->getFieldValue('title'), 'Title');
  55. $this->assertEquals($document->getFieldValue('annotation'), 'Annotation');
  56. $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...');
  57. if (PHP_OS == 'AIX') {
  58. return; // tests below here not valid on AIX
  59. }
  60. $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...');
  61. $document->addField(Zend_Search_Lucene_Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1'));
  62. $this->assertEquals($document->description, $wordsWithUmlautsIso88591);
  63. $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...');
  64. }
  65. public function testAddFieldMethodChaining()
  66. {
  67. $document = new Zend_Search_Lucene_Document();
  68. $this->assertTrue($document->addField(Zend_Search_Lucene_Field::Text('title', 'Title')) instanceof Zend_Search_Lucene_Document);
  69. $document = new Zend_Search_Lucene_Document();
  70. $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title'))
  71. ->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation'))
  72. ->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...'));
  73. }
  74. public function testHtmlHighlighting()
  75. {
  76. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  77. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  78. $doc->highlight('document', '#66ffff');
  79. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#66ffff">Document</b> body.') !== false);
  80. }
  81. public function testHtmlExtendedHighlighting()
  82. {
  83. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  84. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  85. $doc->highlightExtended('document',
  86. array('Zend_Search_Lucene_DocumentTest_DocHighlightingContainer',
  87. 'extendedHighlightingCallback'),
  88. array('style="color:black;background-color:#ff66ff"',
  89. '(!!!)'));
  90. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b>(!!!) body.') !== false);
  91. }
  92. public function testHtmlWordsHighlighting()
  93. {
  94. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  95. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  96. $doc->highlight(array('document', 'body'), '#66ffff');
  97. $highlightedHTML = $doc->getHTML();
  98. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Document</b>') !== false);
  99. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">body</b>') !== false);
  100. }
  101. public function testHtmlExtendedHighlightingCorrectWrongHtml()
  102. {
  103. $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  104. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  105. $doc->highlightExtended('document',
  106. array('Zend_Search_Lucene_DocumentTest_DocHighlightingContainer',
  107. 'extendedHighlightingCallback'),
  108. array('style="color:black;background-color:#ff66ff"',
  109. '<h3>(!!!)' /* Wrong HTML here, <h3> tag is not closed */));
  110. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b><h3>(!!!)</h3> body.') !== false);
  111. }
  112. public function testHtmlLinksProcessing()
  113. {
  114. $doc = Zend_Search_Lucene_Document_Html::loadHTMLFile(dirname(__FILE__) . '/_indexSource/_files/contributing.documentation.html', true);
  115. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
  116. $this->assertTrue(array_values($doc->getHeaderLinks()) ==
  117. array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
  118. $this->assertTrue(array_values($doc->getLinks()) ==
  119. array('contributing.bugs.html',
  120. 'contributing.wishlist.html',
  121. 'developers.documentation.html',
  122. 'faq.translators-revision-tracking.html',
  123. 'index.html',
  124. 'contributing.html'));
  125. }
  126. public function testHtmlNoFollowLinks()
  127. {
  128. $html = '<HTML>'
  129. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  130. . '<BODY>'
  131. . 'Document body.'
  132. . '<a href="link1.html">Link 1</a>.'
  133. . '<a href="link2.html" rel="nofollow">Link 1</a>.'
  134. . '</BODY>'
  135. . '</HTML>';
  136. $oldNoFollowValue = Zend_Search_Lucene_Document_Html::getExcludeNoFollowLinks();
  137. Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(false);
  138. $doc1 = Zend_Search_Lucene_Document_Html::loadHTML($html);
  139. $this->assertTrue($doc1 instanceof Zend_Search_Lucene_Document_Html);
  140. $this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));
  141. Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(true);
  142. $doc2 = Zend_Search_Lucene_Document_Html::loadHTML($html);
  143. $this->assertTrue($doc2 instanceof Zend_Search_Lucene_Document_Html);
  144. $this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
  145. }
  146. public function testDocx()
  147. {
  148. if (!class_exists('ZipArchive')) {
  149. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  150. }
  151. $docxDocument = Zend_Search_Lucene_Document_Docx::loadDocxFile(dirname(__FILE__) . '/_openXmlDocuments/test.docx', true);
  152. $this->assertTrue($docxDocument instanceof Zend_Search_Lucene_Document_Docx);
  153. $this->assertEquals($docxDocument->getFieldValue('title'), 'Test document');
  154. $this->assertEquals($docxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  155. $this->assertTrue($docxDocument->getFieldValue('body') != '');
  156. try {
  157. $docxDocument1 = Zend_Search_Lucene_Document_Docx::loadDocxFile(dirname(__FILE__) . '/_openXmlDocuments/dummy.docx', true);
  158. $this->fail('File not readable exception is expected.');
  159. } catch (Zend_Search_Lucene_Document_Exception $e) {
  160. if (strpos($e->getMessage(), 'is not readable') === false) {
  161. // Passthrough exception
  162. throw $e;
  163. }
  164. }
  165. }
  166. public function testPptx()
  167. {
  168. if (!class_exists('ZipArchive')) {
  169. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  170. }
  171. $pptxDocument = Zend_Search_Lucene_Document_Pptx::loadPptxFile(dirname(__FILE__) . '/_openXmlDocuments/test.pptx', true);
  172. $this->assertTrue($pptxDocument instanceof Zend_Search_Lucene_Document_Pptx);
  173. $this->assertEquals($pptxDocument->getFieldValue('title'), 'Test document');
  174. $this->assertEquals($pptxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  175. $this->assertTrue($pptxDocument->getFieldValue('body') != '');
  176. }
  177. public function testXlsx()
  178. {
  179. if (!class_exists('ZipArchive')) {
  180. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  181. }
  182. $xlsxDocument = Zend_Search_Lucene_Document_Xlsx::loadXlsxFile(dirname(__FILE__) . '/_openXmlDocuments/test.xlsx', true);
  183. $this->assertTrue($xlsxDocument instanceof Zend_Search_Lucene_Document_Xlsx);
  184. $this->assertEquals($xlsxDocument->getFieldValue('title'), 'Test document');
  185. $this->assertEquals($xlsxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  186. $this->assertTrue($xlsxDocument->getFieldValue('body') != '');
  187. $this->assertTrue( strpos($xlsxDocument->getFieldValue('body'), 'ipsum') !== false );
  188. }
  189. }
  190. class Zend_Search_Lucene_DocumentTest_DocHighlightingContainer {
  191. public static function extendedHighlightingCallback($stringToHighlight, $param1, $param2)
  192. {
  193. return '<b ' . $param1 . '>' . $stringToHighlight . '</b>' . $param2;
  194. }
  195. }