LuceneTest.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. if (!defined('PHPUnit_MAIN_METHOD')) {
  23. define('PHPUnit_MAIN_METHOD', 'Zend_Search_Lucene_LuceneTest::main');
  24. }
  25. /**
  26. * Test helper
  27. */
  28. require_once dirname(__FILE__) . '/../../../TestHelper.php';
  29. /**
  30. * Zend_Search_Lucene
  31. */
  32. require_once 'Zend/Search/Lucene.php';
  33. /**
  34. * @category Zend
  35. * @package Zend_Search_Lucene
  36. * @subpackage UnitTests
  37. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  38. * @license http://framework.zend.com/license/new-bsd New BSD License
  39. * @group Zend_Search_Lucene
  40. */
  41. class Zend_Search_Lucene_LuceneTest extends PHPUnit_Framework_TestCase
  42. {
  43. public static function main()
  44. {
  45. $suite = new PHPUnit_Framework_TestSuite(__CLASS__);
  46. $result = PHPUnit_TextUI_TestRunner::run($suite);
  47. }
  48. private function _clearDirectory($dirName)
  49. {
  50. if (!file_exists($dirName) || !is_dir($dirName)) {
  51. return;
  52. }
  53. // remove files from temporary direcytory
  54. $dir = opendir($dirName);
  55. while (($file = readdir($dir)) !== false) {
  56. if (!is_dir($dirName . '/' . $file)) {
  57. @unlink($dirName . '/' . $file);
  58. }
  59. }
  60. closedir($dir);
  61. }
  62. public function testCreate()
  63. {
  64. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  65. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  66. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  67. }
  68. public function testOpen()
  69. {
  70. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  71. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  72. }
  73. public function testOpenNonCompound()
  74. {
  75. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_nonCompoundIndexFiles');
  76. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  77. }
  78. public function testDefaultSearchField()
  79. {
  80. $currentDefaultSearchField = Zend_Search_Lucene::getDefaultSearchField();
  81. $this->assertEquals($currentDefaultSearchField, null);
  82. Zend_Search_Lucene::setDefaultSearchField('anotherField');
  83. $this->assertEquals(Zend_Search_Lucene::getDefaultSearchField(), 'anotherField');
  84. Zend_Search_Lucene::setDefaultSearchField($currentDefaultSearchField);
  85. }
  86. public function testCount()
  87. {
  88. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  89. $this->assertEquals($index->count(), 10);
  90. }
  91. public function testMaxDoc()
  92. {
  93. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  94. $this->assertEquals($index->maxDoc(), 10);
  95. }
  96. public function testNumDocs()
  97. {
  98. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  99. $this->assertEquals($index->numDocs(), 9);
  100. }
  101. public function testIsDeleted()
  102. {
  103. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  104. $this->assertFalse($index->isDeleted(3));
  105. $this->assertTrue($index->isDeleted(6));
  106. }
  107. public function testMaxBufferedDocs()
  108. {
  109. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  110. $currentMaxBufferedDocs = $index->getMaxBufferedDocs();
  111. $index->setMaxBufferedDocs(234);
  112. $this->assertEquals($index->getMaxBufferedDocs(), 234);
  113. $index->setMaxBufferedDocs($currentMaxBufferedDocs);
  114. }
  115. public function testMaxMergeDocs()
  116. {
  117. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  118. $currentMaxMergeDocs = $index->getMaxMergeDocs();
  119. $index->setMaxMergeDocs(34);
  120. $this->assertEquals($index->getMaxMergeDocs(), 34);
  121. $index->setMaxMergeDocs($currentMaxMergeDocs);
  122. }
  123. public function testMergeFactor()
  124. {
  125. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  126. $currentMergeFactor = $index->getMergeFactor();
  127. $index->setMergeFactor(113);
  128. $this->assertEquals($index->getMergeFactor(), 113);
  129. $index->setMergeFactor($currentMergeFactor);
  130. }
  131. public function testFind()
  132. {
  133. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  134. $hits = $index->find('submitting');
  135. $this->assertEquals(count($hits), 3);
  136. }
  137. public function testGetFieldNames()
  138. {
  139. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  140. $this->assertTrue(array_values($index->getFieldNames()) == array('path', 'modified', 'contents'));
  141. }
  142. public function testGetDocument()
  143. {
  144. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  145. $doc = $index->getDocument(3);
  146. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document);
  147. $this->assertEquals($doc->path, 'IndexSource/about-pear.html');
  148. }
  149. public function testHasTerm()
  150. {
  151. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  152. $this->assertTrue($index->hasTerm(new Zend_Search_Lucene_Index_Term('packages', 'contents')));
  153. $this->assertFalse($index->hasTerm(new Zend_Search_Lucene_Index_Term('nonusedword', 'contents')));
  154. }
  155. public function testTermDocs()
  156. {
  157. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  158. $this->assertTrue(array_values( $index->termDocs(new Zend_Search_Lucene_Index_Term('packages', 'contents')) ) ==
  159. array(0, 2, 6, 7, 8));
  160. }
  161. public function testTermPositions()
  162. {
  163. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  164. $this->assertTrue($index->termPositions(new Zend_Search_Lucene_Index_Term('packages', 'contents')) ==
  165. array(0 => array(174),
  166. 2 => array(40, 742),
  167. 6 => array(6, 156, 163),
  168. 7 => array(194),
  169. 8 => array(55, 190, 405)));
  170. }
  171. public function testDocFreq()
  172. {
  173. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  174. $this->assertEquals($index->docFreq(new Zend_Search_Lucene_Index_Term('packages', 'contents')), 5);
  175. }
  176. public function testGetSimilarity()
  177. {
  178. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  179. $this->assertTrue($index->getSimilarity() instanceof Zend_Search_Lucene_Search_Similarity);
  180. }
  181. public function testNorm()
  182. {
  183. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  184. $this->assertTrue(abs($index->norm(3, 'contents') - 0.054688) < 0.000001);
  185. }
  186. public function testHasDeletions()
  187. {
  188. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  189. $this->assertTrue($index->hasDeletions());
  190. }
  191. public function testDelete()
  192. {
  193. // Copy index sample into _files directory
  194. $sampleIndexDir = dirname(__FILE__) . '/_indexSample/_files';
  195. $tempIndexDir = dirname(__FILE__) . '/_files';
  196. if (!is_dir($tempIndexDir)) {
  197. mkdir($tempIndexDir);
  198. }
  199. $this->_clearDirectory($tempIndexDir);
  200. $indexDir = opendir($sampleIndexDir);
  201. while (($file = readdir($indexDir)) !== false) {
  202. if (!is_dir($sampleIndexDir . '/' . $file)) {
  203. copy($sampleIndexDir . '/' . $file, $tempIndexDir . '/' . $file);
  204. }
  205. }
  206. closedir($indexDir);
  207. $index = Zend_Search_Lucene::open($tempIndexDir);
  208. $this->assertFalse($index->isDeleted(2));
  209. $index->delete(2);
  210. $this->assertTrue($index->isDeleted(2));
  211. $index->commit();
  212. unset($index);
  213. $index1 = Zend_Search_Lucene::open($tempIndexDir);
  214. $this->assertTrue($index1->isDeleted(2));
  215. unset($index1);
  216. $this->_clearDirectory($tempIndexDir);
  217. }
  218. public function testAddDocument()
  219. {
  220. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  221. $indexSourceDir = dirname(__FILE__) . '/_indexSource/_files';
  222. $dir = opendir($indexSourceDir);
  223. while (($file = readdir($dir)) !== false) {
  224. if (is_dir($indexSourceDir . '/' . $file)) {
  225. continue;
  226. }
  227. if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
  228. continue;
  229. }
  230. // Create new Document from a file
  231. $doc = new Zend_Search_Lucene_Document();
  232. $doc->addField(Zend_Search_Lucene_Field::Text('path', 'IndexSource/' . $file));
  233. $doc->addField(Zend_Search_Lucene_Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));
  234. $f = fopen($indexSourceDir . '/' . $file,'rb');
  235. $byteCount = filesize($indexSourceDir . '/' . $file);
  236. $data = '';
  237. while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
  238. $data .= $nextBlock;
  239. $byteCount -= strlen($nextBlock);
  240. }
  241. fclose($f);
  242. $doc->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO-8859-1'));
  243. // Add document to the index
  244. $index->addDocument($doc);
  245. }
  246. closedir($dir);
  247. unset($index);
  248. $index1 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  249. $this->assertTrue($index1 instanceof Zend_Search_Lucene_Interface);
  250. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  251. }
  252. public function testOptimize()
  253. {
  254. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  255. $index->setMaxBufferedDocs(2);
  256. $indexSourceDir = dirname(__FILE__) . '/_indexSource/_files';
  257. $dir = opendir($indexSourceDir);
  258. while (($file = readdir($dir)) !== false) {
  259. if (is_dir($indexSourceDir . '/' . $file)) {
  260. continue;
  261. }
  262. if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
  263. continue;
  264. }
  265. // Create new Document from a file
  266. $doc = new Zend_Search_Lucene_Document();
  267. $doc->addField(Zend_Search_Lucene_Field::Keyword('path', 'IndexSource/' . $file));
  268. $doc->addField(Zend_Search_Lucene_Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));
  269. $f = fopen($indexSourceDir . '/' . $file,'rb');
  270. $byteCount = filesize($indexSourceDir . '/' . $file);
  271. $data = '';
  272. while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
  273. $data .= $nextBlock;
  274. $byteCount -= strlen($nextBlock);
  275. }
  276. fclose($f);
  277. $doc->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO-8859-1'));
  278. // Add document to the index
  279. $index->addDocument($doc);
  280. }
  281. closedir($dir);
  282. unset($index);
  283. $index1 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  284. $this->assertTrue($index1 instanceof Zend_Search_Lucene_Interface);
  285. $pathTerm = new Zend_Search_Lucene_Index_Term('IndexSource/contributing.html', 'path');
  286. $contributingDocs = $index1->termDocs($pathTerm);
  287. foreach ($contributingDocs as $id) {
  288. $index1->delete($id);
  289. }
  290. $index1->optimize();
  291. unset($index1);
  292. $index2 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  293. $this->assertTrue($index2 instanceof Zend_Search_Lucene_Interface);
  294. $hits = $index2->find('submitting');
  295. $this->assertEquals(count($hits), 3);
  296. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  297. }
  298. public function testTerms()
  299. {
  300. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  301. $this->assertEquals(count($index->terms()), 607);
  302. }
  303. public function testTermsStreamInterface()
  304. {
  305. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  306. $terms = array();
  307. $index->resetTermsStream();
  308. while ($index->currentTerm() !== null) {
  309. $terms[] = $index->currentTerm();
  310. $index->nextTerm();
  311. }
  312. $this->assertEquals(count($terms), 607);
  313. }
  314. public function testTermsStreamInterfaceSkipTo()
  315. {
  316. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  317. $terms = array();
  318. $index->resetTermsStream();
  319. $index->skipTo(new Zend_Search_Lucene_Index_Term('one', 'contents'));
  320. while ($index->currentTerm() !== null) {
  321. $terms[] = $index->currentTerm();
  322. $index->nextTerm();
  323. }
  324. $this->assertEquals(count($terms), 244);
  325. }
  326. public function testTermsStreamInterfaceSkipToTermsRetrieving()
  327. {
  328. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  329. $terms = array();
  330. $index->resetTermsStream();
  331. $index->skipTo(new Zend_Search_Lucene_Index_Term('one', 'contents'));
  332. $terms[] = $index->currentTerm();
  333. $terms[] = $index->nextTerm();
  334. $terms[] = $index->nextTerm();
  335. $index->closeTermsStream();
  336. $this->assertTrue($terms ==
  337. array(new Zend_Search_Lucene_Index_Term('one', 'contents'),
  338. new Zend_Search_Lucene_Index_Term('only', 'contents'),
  339. new Zend_Search_Lucene_Index_Term('open', 'contents'),
  340. ));
  341. }
  342. public function testTermsStreamInterfaceSkipToTermsRetrievingZeroTermsCase()
  343. {
  344. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  345. // Zero terms
  346. $doc = new Zend_Search_Lucene_Document();
  347. $doc->addField(Zend_Search_Lucene_Field::Text('contents', ''));
  348. $index->addDocument($doc);
  349. unset($index);
  350. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  351. $index->resetTermsStream();
  352. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  353. $this->assertTrue($index->currentTerm() === null);
  354. $index->closeTermsStream();
  355. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  356. }
  357. public function testTermsStreamInterfaceSkipToTermsRetrievingOneTermsCase()
  358. {
  359. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  360. // Zero terms
  361. $doc = new Zend_Search_Lucene_Document();
  362. $doc->addField(Zend_Search_Lucene_Field::Text('contents', 'someterm'));
  363. $index->addDocument($doc);
  364. unset($index);
  365. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  366. $index->resetTermsStream();
  367. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  368. $this->assertTrue($index->currentTerm() === null);
  369. $index->closeTermsStream();
  370. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  371. }
  372. public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
  373. {
  374. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  375. // Zero terms
  376. $doc = new Zend_Search_Lucene_Document();
  377. $doc->addField(Zend_Search_Lucene_Field::Text('contents', 'someterm word'));
  378. $index->addDocument($doc);
  379. unset($index);
  380. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  381. $index->resetTermsStream();
  382. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  383. $this->assertTrue($index->currentTerm() == new Zend_Search_Lucene_Index_Term('word', 'contents'));
  384. $index->closeTermsStream();
  385. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  386. }
  387. }
  388. if (PHPUnit_MAIN_METHOD == 'Zend_Search_Lucene_LuceneTest::main') {
  389. Zend_Search_Lucene_LuceneTest::main();
  390. }