LuceneTest.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. <?php
  2. /**
  3. * @category Zend
  4. * @package Zend_Search_Lucene
  5. * @subpackage UnitTests
  6. */
  7. if (!defined('PHPUnit_MAIN_METHOD')) {
  8. define('PHPUnit_MAIN_METHOD', 'Zend_Search_Lucene_LuceneTest::main');
  9. }
  10. /**
  11. * Test helper
  12. */
  13. require_once dirname(__FILE__) . '/../../../TestHelper.php';
  14. /**
  15. * Zend_Search_Lucene
  16. */
  17. require_once 'Zend/Search/Lucene.php';
  18. /**
  19. * @category Zend
  20. * @package Zend_Search_Lucene
  21. * @subpackage UnitTests
  22. */
  23. class Zend_Search_Lucene_LuceneTest extends PHPUnit_Framework_TestCase
  24. {
  25. public static function main()
  26. {
  27. $suite = new PHPUnit_Framework_TestSuite(__CLASS__);
  28. $result = PHPUnit_TextUI_TestRunner::run($suite);
  29. }
  30. private function _clearDirectory($dirName)
  31. {
  32. if (!file_exists($dirName) || !is_dir($dirName)) {
  33. return;
  34. }
  35. // remove files from temporary direcytory
  36. $dir = opendir($dirName);
  37. while (($file = readdir($dir)) !== false) {
  38. if (!is_dir($dirName . '/' . $file)) {
  39. @unlink($dirName . '/' . $file);
  40. }
  41. }
  42. closedir($dir);
  43. }
  44. public function testCreate()
  45. {
  46. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  47. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  48. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  49. }
  50. public function testOpen()
  51. {
  52. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  53. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  54. }
  55. public function testOpenNonCompound()
  56. {
  57. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_nonCompoundIndexFiles');
  58. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  59. }
  60. public function testDefaultSearchField()
  61. {
  62. $currentDefaultSearchField = Zend_Search_Lucene::getDefaultSearchField();
  63. $this->assertEquals($currentDefaultSearchField, null);
  64. Zend_Search_Lucene::setDefaultSearchField('anotherField');
  65. $this->assertEquals(Zend_Search_Lucene::getDefaultSearchField(), 'anotherField');
  66. Zend_Search_Lucene::setDefaultSearchField($currentDefaultSearchField);
  67. }
  68. public function testCount()
  69. {
  70. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  71. $this->assertEquals($index->count(), 10);
  72. }
  73. public function testMaxDoc()
  74. {
  75. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  76. $this->assertEquals($index->maxDoc(), 10);
  77. }
  78. public function testNumDocs()
  79. {
  80. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  81. $this->assertEquals($index->numDocs(), 9);
  82. }
  83. public function testIsDeleted()
  84. {
  85. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  86. $this->assertFalse($index->isDeleted(3));
  87. $this->assertTrue($index->isDeleted(6));
  88. }
  89. public function testMaxBufferedDocs()
  90. {
  91. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  92. $currentMaxBufferedDocs = $index->getMaxBufferedDocs();
  93. $index->setMaxBufferedDocs(234);
  94. $this->assertEquals($index->getMaxBufferedDocs(), 234);
  95. $index->setMaxBufferedDocs($currentMaxBufferedDocs);
  96. }
  97. public function testMaxMergeDocs()
  98. {
  99. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  100. $currentMaxMergeDocs = $index->getMaxMergeDocs();
  101. $index->setMaxMergeDocs(34);
  102. $this->assertEquals($index->getMaxMergeDocs(), 34);
  103. $index->setMaxMergeDocs($currentMaxMergeDocs);
  104. }
  105. public function testMergeFactor()
  106. {
  107. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  108. $currentMergeFactor = $index->getMergeFactor();
  109. $index->setMergeFactor(113);
  110. $this->assertEquals($index->getMergeFactor(), 113);
  111. $index->setMergeFactor($currentMergeFactor);
  112. }
  113. public function testFind()
  114. {
  115. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  116. $hits = $index->find('submitting');
  117. $this->assertEquals(count($hits), 3);
  118. }
  119. public function testGetFieldNames()
  120. {
  121. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  122. $this->assertTrue(array_values($index->getFieldNames()) == array('path', 'modified', 'contents'));
  123. }
  124. public function testGetDocument()
  125. {
  126. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  127. $doc = $index->getDocument(3);
  128. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document);
  129. $this->assertEquals($doc->path, 'IndexSource/about-pear.html');
  130. }
  131. public function testHasTerm()
  132. {
  133. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  134. $this->assertTrue($index->hasTerm(new Zend_Search_Lucene_Index_Term('packages', 'contents')));
  135. $this->assertFalse($index->hasTerm(new Zend_Search_Lucene_Index_Term('nonusedword', 'contents')));
  136. }
  137. public function testTermDocs()
  138. {
  139. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  140. $this->assertTrue(array_values( $index->termDocs(new Zend_Search_Lucene_Index_Term('packages', 'contents')) ) ==
  141. array(0, 2, 6, 7, 8));
  142. }
  143. public function testTermPositions()
  144. {
  145. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  146. $this->assertTrue($index->termPositions(new Zend_Search_Lucene_Index_Term('packages', 'contents')) ==
  147. array(0 => array(174),
  148. 2 => array(40, 742),
  149. 6 => array(6, 156, 163),
  150. 7 => array(194),
  151. 8 => array(55, 190, 405)));
  152. }
  153. public function testDocFreq()
  154. {
  155. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  156. $this->assertEquals($index->docFreq(new Zend_Search_Lucene_Index_Term('packages', 'contents')), 5);
  157. }
  158. public function testGetSimilarity()
  159. {
  160. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  161. $this->assertTrue($index->getSimilarity() instanceof Zend_Search_Lucene_Search_Similarity);
  162. }
  163. public function testNorm()
  164. {
  165. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  166. $this->assertTrue(abs($index->norm(3, 'contents') - 0.054688) < 0.000001);
  167. }
  168. public function testHasDeletions()
  169. {
  170. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  171. $this->assertTrue($index->hasDeletions());
  172. }
  173. public function testDelete()
  174. {
  175. // Copy index sample into _files directory
  176. $sampleIndexDir = dirname(__FILE__) . '/_indexSample/_files';
  177. $tempIndexDir = dirname(__FILE__) . '/_files';
  178. if (!is_dir($tempIndexDir)) {
  179. mkdir($tempIndexDir);
  180. }
  181. $this->_clearDirectory($tempIndexDir);
  182. $indexDir = opendir($sampleIndexDir);
  183. while (($file = readdir($indexDir)) !== false) {
  184. if (!is_dir($sampleIndexDir . '/' . $file)) {
  185. copy($sampleIndexDir . '/' . $file, $tempIndexDir . '/' . $file);
  186. }
  187. }
  188. closedir($indexDir);
  189. $index = Zend_Search_Lucene::open($tempIndexDir);
  190. $this->assertFalse($index->isDeleted(2));
  191. $index->delete(2);
  192. $this->assertTrue($index->isDeleted(2));
  193. $index->commit();
  194. unset($index);
  195. $index1 = Zend_Search_Lucene::open($tempIndexDir);
  196. $this->assertTrue($index1->isDeleted(2));
  197. unset($index1);
  198. $this->_clearDirectory($tempIndexDir);
  199. }
  200. public function testAddDocument()
  201. {
  202. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  203. $indexSourceDir = dirname(__FILE__) . '/_indexSource/_files';
  204. $dir = opendir($indexSourceDir);
  205. while (($file = readdir($dir)) !== false) {
  206. if (is_dir($indexSourceDir . '/' . $file)) {
  207. continue;
  208. }
  209. if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
  210. continue;
  211. }
  212. // Create new Document from a file
  213. $doc = new Zend_Search_Lucene_Document();
  214. $doc->addField(Zend_Search_Lucene_Field::Text('path', 'IndexSource/' . $file));
  215. $doc->addField(Zend_Search_Lucene_Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));
  216. $f = fopen($indexSourceDir . '/' . $file,'rb');
  217. $byteCount = filesize($indexSourceDir . '/' . $file);
  218. $data = '';
  219. while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
  220. $data .= $nextBlock;
  221. $byteCount -= strlen($nextBlock);
  222. }
  223. fclose($f);
  224. $doc->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO-8859-1'));
  225. // Add document to the index
  226. $index->addDocument($doc);
  227. }
  228. closedir($dir);
  229. unset($index);
  230. $index1 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  231. $this->assertTrue($index1 instanceof Zend_Search_Lucene_Interface);
  232. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  233. }
  234. public function testOptimize()
  235. {
  236. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  237. $index->setMaxBufferedDocs(2);
  238. $indexSourceDir = dirname(__FILE__) . '/_indexSource/_files';
  239. $dir = opendir($indexSourceDir);
  240. while (($file = readdir($dir)) !== false) {
  241. if (is_dir($indexSourceDir . '/' . $file)) {
  242. continue;
  243. }
  244. if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
  245. continue;
  246. }
  247. // Create new Document from a file
  248. $doc = new Zend_Search_Lucene_Document();
  249. $doc->addField(Zend_Search_Lucene_Field::Keyword('path', 'IndexSource/' . $file));
  250. $doc->addField(Zend_Search_Lucene_Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));
  251. $f = fopen($indexSourceDir . '/' . $file,'rb');
  252. $byteCount = filesize($indexSourceDir . '/' . $file);
  253. $data = '';
  254. while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
  255. $data .= $nextBlock;
  256. $byteCount -= strlen($nextBlock);
  257. }
  258. fclose($f);
  259. $doc->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO-8859-1'));
  260. // Add document to the index
  261. $index->addDocument($doc);
  262. }
  263. closedir($dir);
  264. unset($index);
  265. $index1 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  266. $this->assertTrue($index1 instanceof Zend_Search_Lucene_Interface);
  267. $pathTerm = new Zend_Search_Lucene_Index_Term('IndexSource/contributing.html', 'path');
  268. $contributingDocs = $index1->termDocs($pathTerm);
  269. foreach ($contributingDocs as $id) {
  270. $index1->delete($id);
  271. }
  272. $index1->optimize();
  273. unset($index1);
  274. $index2 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  275. $this->assertTrue($index2 instanceof Zend_Search_Lucene_Interface);
  276. $hits = $index2->find('submitting');
  277. $this->assertEquals(count($hits), 3);
  278. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  279. }
  280. public function testTerms()
  281. {
  282. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  283. $this->assertEquals(count($index->terms()), 607);
  284. }
  285. public function testTermsStreamInterface()
  286. {
  287. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  288. $terms = array();
  289. $index->resetTermsStream();
  290. while ($index->currentTerm() !== null) {
  291. $terms[] = $index->currentTerm();
  292. $index->nextTerm();
  293. }
  294. $this->assertEquals(count($terms), 607);
  295. }
  296. public function testTermsStreamInterfaceSkipTo()
  297. {
  298. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  299. $terms = array();
  300. $index->resetTermsStream();
  301. $index->skipTo(new Zend_Search_Lucene_Index_Term('one', 'contents'));
  302. while ($index->currentTerm() !== null) {
  303. $terms[] = $index->currentTerm();
  304. $index->nextTerm();
  305. }
  306. $this->assertEquals(count($terms), 244);
  307. }
  308. public function testTermsStreamInterfaceSkipToTermsRetrieving()
  309. {
  310. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  311. $terms = array();
  312. $index->resetTermsStream();
  313. $index->skipTo(new Zend_Search_Lucene_Index_Term('one', 'contents'));
  314. $terms[] = $index->currentTerm();
  315. $terms[] = $index->nextTerm();
  316. $terms[] = $index->nextTerm();
  317. $index->closeTermsStream();
  318. $this->assertTrue($terms ==
  319. array(new Zend_Search_Lucene_Index_Term('one', 'contents'),
  320. new Zend_Search_Lucene_Index_Term('only', 'contents'),
  321. new Zend_Search_Lucene_Index_Term('open', 'contents'),
  322. ));
  323. }
  324. public function testTermsStreamInterfaceSkipToTermsRetrievingZeroTermsCase()
  325. {
  326. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  327. // Zero terms
  328. $doc = new Zend_Search_Lucene_Document();
  329. $doc->addField(Zend_Search_Lucene_Field::Text('contents', ''));
  330. $index->addDocument($doc);
  331. unset($index);
  332. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  333. $index->resetTermsStream();
  334. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  335. $this->assertTrue($index->currentTerm() === null);
  336. $index->closeTermsStream();
  337. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  338. }
  339. public function testTermsStreamInterfaceSkipToTermsRetrievingOneTermsCase()
  340. {
  341. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  342. // Zero terms
  343. $doc = new Zend_Search_Lucene_Document();
  344. $doc->addField(Zend_Search_Lucene_Field::Text('contents', 'someterm'));
  345. $index->addDocument($doc);
  346. unset($index);
  347. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  348. $index->resetTermsStream();
  349. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  350. $this->assertTrue($index->currentTerm() === null);
  351. $index->closeTermsStream();
  352. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  353. }
  354. public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
  355. {
  356. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  357. // Zero terms
  358. $doc = new Zend_Search_Lucene_Document();
  359. $doc->addField(Zend_Search_Lucene_Field::Text('contents', 'someterm word'));
  360. $index->addDocument($doc);
  361. unset($index);
  362. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  363. $index->resetTermsStream();
  364. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  365. $this->assertTrue($index->currentTerm() == new Zend_Search_Lucene_Index_Term('word', 'contents'));
  366. $index->closeTermsStream();
  367. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  368. }
  369. }
  370. if (PHPUnit_MAIN_METHOD == 'Zend_Search_Lucene_LuceneTest::main') {
  371. Zend_Search_Lucene_LuceneTest::main();
  372. }