MultiSearcher.php 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /** @see Zend_Search_Lucene_TermStreamsPriorityQueue */
  22. require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
  23. /** @see Zend_Search_Lucene_Interface */
  24. require_once 'Zend/Search/Lucene/Interface.php';
  25. /**
  26. * Multisearcher allows to search through several independent indexes.
  27. *
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Search_Lucene_Interface_MultiSearcher implements Zend_Search_Lucene_Interface
  34. {
  35. /**
  36. * List of indices for searching.
  37. * Array of Zend_Search_Lucene_Interface objects
  38. *
  39. * @var array
  40. */
  41. protected $_indices;
  42. /**
  43. * Object constructor.
  44. *
  45. * @param array $indices Arrays of indices for search
  46. * @throws Zend_Search_Lucene_Exception
  47. */
  48. public function __construct($indices = array())
  49. {
  50. $this->_indices = $indices;
  51. foreach ($this->_indices as $index) {
  52. if (!$index instanceof Zend_Search_Lucene_Interface) {
  53. require_once 'Zend/Search/Lucene/Exception.php';
  54. throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.');
  55. }
  56. }
  57. }
  58. /**
  59. * Add index for searching.
  60. *
  61. * @param Zend_Search_Lucene_Interface $index
  62. */
  63. public function addIndex(Zend_Search_Lucene_Interface $index)
  64. {
  65. $this->_indices[] = $index;
  66. }
  67. /**
  68. * Get current generation number
  69. *
  70. * Returns generation number
  71. * 0 means pre-2.1 index format
  72. * -1 means there are no segments files.
  73. *
  74. * @param Zend_Search_Lucene_Storage_Directory $directory
  75. * @return integer
  76. * @throws Zend_Search_Lucene_Exception
  77. */
  78. public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
  79. {
  80. require_once 'Zend/Search/Lucene/Exception.php';
  81. throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher");
  82. }
  83. /**
  84. * Get segments file name
  85. *
  86. * @param integer $generation
  87. * @return string
  88. */
  89. public static function getSegmentFileName($generation)
  90. {
  91. return Zend_Search_Lucene::getSegmentFileName($generation);
  92. }
  93. /**
  94. * Get index format version
  95. *
  96. * @return integer
  97. * @throws Zend_Search_Lucene_Exception
  98. */
  99. public function getFormatVersion()
  100. {
  101. require_once 'Zend/Search/Lucene/Exception.php';
  102. throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher");
  103. }
  104. /**
  105. * Set index format version.
  106. * Index is converted to this format at the nearest upfdate time
  107. *
  108. * @param int $formatVersion
  109. */
  110. public function setFormatVersion($formatVersion)
  111. {
  112. foreach ($this->_indices as $index) {
  113. $index->setFormatVersion($formatVersion);
  114. }
  115. }
  116. /**
  117. * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
  118. *
  119. * @return Zend_Search_Lucene_Storage_Directory
  120. */
  121. public function getDirectory()
  122. {
  123. require_once 'Zend/Search/Lucene/Exception.php';
  124. throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher");
  125. }
  126. /**
  127. * Returns the total number of documents in this index (including deleted documents).
  128. *
  129. * @return integer
  130. */
  131. public function count()
  132. {
  133. $count = 0;
  134. foreach ($this->_indices as $index) {
  135. $count += $this->_indices->count();
  136. }
  137. return $count;
  138. }
  139. /**
  140. * Returns one greater than the largest possible document number.
  141. * This may be used to, e.g., determine how big to allocate a structure which will have
  142. * an element for every document number in an index.
  143. *
  144. * @return integer
  145. */
  146. public function maxDoc()
  147. {
  148. return $this->count();
  149. }
  150. /**
  151. * Returns the total number of non-deleted documents in this index.
  152. *
  153. * @return integer
  154. */
  155. public function numDocs()
  156. {
  157. $docs = 0;
  158. foreach ($this->_indices as $index) {
  159. $docs += $this->_indices->numDocs();
  160. }
  161. return $docs;
  162. }
  163. /**
  164. * Checks, that document is deleted
  165. *
  166. * @param integer $id
  167. * @return boolean
  168. * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
  169. */
  170. public function isDeleted($id)
  171. {
  172. foreach ($this->_indices as $index) {
  173. $indexCount = $index->count();
  174. if ($indexCount > $id) {
  175. return $index->isDeleted($id);
  176. }
  177. $id -= $indexCount;
  178. }
  179. require_once 'Zend/Search/Lucene/Exception.php';
  180. throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
  181. }
  182. /**
  183. * Set default search field.
  184. *
  185. * Null means, that search is performed through all fields by default
  186. *
  187. * Default value is null
  188. *
  189. * @param string $fieldName
  190. */
  191. public static function setDefaultSearchField($fieldName)
  192. {
  193. foreach ($this->_indices as $index) {
  194. $index->setDefaultSearchField($fieldName);
  195. }
  196. }
  197. /**
  198. * Get default search field.
  199. *
  200. * Null means, that search is performed through all fields by default
  201. *
  202. * @return string
  203. * @throws Zend_Search_Lucene_Exception
  204. */
  205. public static function getDefaultSearchField()
  206. {
  207. if (count($this->_indices) == 0) {
  208. require_once 'Zend/Search/Lucene/Exception.php';
  209. throw new Zend_Search_Lucene_Exception('Indices list is empty');
  210. }
  211. $defaultSearchField = reset($this->_indices)->getDefaultSearchField();
  212. foreach ($this->_indices as $index) {
  213. if ($index->getDefaultSearchField() !== $defaultSearchField) {
  214. require_once 'Zend/Search/Lucene/Exception.php';
  215. throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
  216. }
  217. }
  218. return $defaultSearchField;
  219. }
  220. /**
  221. * Set result set limit.
  222. *
  223. * 0 (default) means no limit
  224. *
  225. * @param integer $limit
  226. */
  227. public static function setResultSetLimit($limit)
  228. {
  229. foreach ($this->_indices as $index) {
  230. $index->setResultSetLimit($limit);
  231. }
  232. }
  233. /**
  234. * Set result set limit.
  235. *
  236. * 0 means no limit
  237. *
  238. * @return integer
  239. * @throws Zend_Search_Lucene_Exception
  240. */
  241. public static function getResultSetLimit()
  242. {
  243. if (count($this->_indices) == 0) {
  244. require_once 'Zend/Search/Lucene/Exception.php';
  245. throw new Zend_Search_Lucene_Exception('Indices list is empty');
  246. }
  247. $defaultResultSetLimit = reset($this->_indices)->getResultSetLimit();
  248. foreach ($this->_indices as $index) {
  249. if ($index->getResultSetLimit() !== $defaultResultSetLimit) {
  250. require_once 'Zend/Search/Lucene/Exception.php';
  251. throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
  252. }
  253. }
  254. return $defaultResultSetLimit;
  255. }
  256. /**
  257. * Retrieve index maxBufferedDocs option
  258. *
  259. * maxBufferedDocs is a minimal number of documents required before
  260. * the buffered in-memory documents are written into a new Segment
  261. *
  262. * Default value is 10
  263. *
  264. * @return integer
  265. * @throws Zend_Search_Lucene_Exception
  266. */
  267. public function getMaxBufferedDocs()
  268. {
  269. if (count($this->_indices) == 0) {
  270. require_once 'Zend/Search/Lucene/Exception.php';
  271. throw new Zend_Search_Lucene_Exception('Indices list is empty');
  272. }
  273. $maxBufferedDocs = reset($this->_indices)->getMaxBufferedDocs();
  274. foreach ($this->_indices as $index) {
  275. if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) {
  276. require_once 'Zend/Search/Lucene/Exception.php';
  277. throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
  278. }
  279. }
  280. return $maxBufferedDocs;
  281. }
  282. /**
  283. * Set index maxBufferedDocs option
  284. *
  285. * maxBufferedDocs is a minimal number of documents required before
  286. * the buffered in-memory documents are written into a new Segment
  287. *
  288. * Default value is 10
  289. *
  290. * @param integer $maxBufferedDocs
  291. */
  292. public function setMaxBufferedDocs($maxBufferedDocs)
  293. {
  294. foreach ($this->_indices as $index) {
  295. $index->setMaxBufferedDocs($maxBufferedDocs);
  296. }
  297. }
  298. /**
  299. * Retrieve index maxMergeDocs option
  300. *
  301. * maxMergeDocs is a largest number of documents ever merged by addDocument().
  302. * Small values (e.g., less than 10,000) are best for interactive indexing,
  303. * as this limits the length of pauses while indexing to a few seconds.
  304. * Larger values are best for batched indexing and speedier searches.
  305. *
  306. * Default value is PHP_INT_MAX
  307. *
  308. * @return integer
  309. * @throws Zend_Search_Lucene_Exception
  310. */
  311. public function getMaxMergeDocs()
  312. {
  313. if (count($this->_indices) == 0) {
  314. require_once 'Zend/Search/Lucene/Exception.php';
  315. throw new Zend_Search_Lucene_Exception('Indices list is empty');
  316. }
  317. $maxMergeDocs = reset($this->_indices)->getMaxMergeDocs();
  318. foreach ($this->_indices as $index) {
  319. if ($index->getMaxMergeDocs() !== $maxMergeDocs) {
  320. require_once 'Zend/Search/Lucene/Exception.php';
  321. throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
  322. }
  323. }
  324. return $maxMergeDocs;
  325. }
  326. /**
  327. * Set index maxMergeDocs option
  328. *
  329. * maxMergeDocs is a largest number of documents ever merged by addDocument().
  330. * Small values (e.g., less than 10,000) are best for interactive indexing,
  331. * as this limits the length of pauses while indexing to a few seconds.
  332. * Larger values are best for batched indexing and speedier searches.
  333. *
  334. * Default value is PHP_INT_MAX
  335. *
  336. * @param integer $maxMergeDocs
  337. */
  338. public function setMaxMergeDocs($maxMergeDocs)
  339. {
  340. foreach ($this->_indices as $index) {
  341. $index->setMaxMergeDocs($maxMergeDocs);
  342. }
  343. }
  344. /**
  345. * Retrieve index mergeFactor option
  346. *
  347. * mergeFactor determines how often segment indices are merged by addDocument().
  348. * With smaller values, less RAM is used while indexing,
  349. * and searches on unoptimized indices are faster,
  350. * but indexing speed is slower.
  351. * With larger values, more RAM is used during indexing,
  352. * and while searches on unoptimized indices are slower,
  353. * indexing is faster.
  354. * Thus larger values (> 10) are best for batch index creation,
  355. * and smaller values (< 10) for indices that are interactively maintained.
  356. *
  357. * Default value is 10
  358. *
  359. * @return integer
  360. * @throws Zend_Search_Lucene_Exception
  361. */
  362. public function getMergeFactor()
  363. {
  364. if (count($this->_indices) == 0) {
  365. require_once 'Zend/Search/Lucene/Exception.php';
  366. throw new Zend_Search_Lucene_Exception('Indices list is empty');
  367. }
  368. $mergeFactor = reset($this->_indices)->getMergeFactor();
  369. foreach ($this->_indices as $index) {
  370. if ($index->getMergeFactor() !== $mergeFactor) {
  371. require_once 'Zend/Search/Lucene/Exception.php';
  372. throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
  373. }
  374. }
  375. return $mergeFactor;
  376. }
  377. /**
  378. * Set index mergeFactor option
  379. *
  380. * mergeFactor determines how often segment indices are merged by addDocument().
  381. * With smaller values, less RAM is used while indexing,
  382. * and searches on unoptimized indices are faster,
  383. * but indexing speed is slower.
  384. * With larger values, more RAM is used during indexing,
  385. * and while searches on unoptimized indices are slower,
  386. * indexing is faster.
  387. * Thus larger values (> 10) are best for batch index creation,
  388. * and smaller values (< 10) for indices that are interactively maintained.
  389. *
  390. * Default value is 10
  391. *
  392. * @param integer $maxMergeDocs
  393. */
  394. public function setMergeFactor($mergeFactor)
  395. {
  396. foreach ($this->_indices as $index) {
  397. $index->setMaxMergeDocs($maxMergeDocs);
  398. }
  399. }
  400. /**
  401. * Performs a query against the index and returns an array
  402. * of Zend_Search_Lucene_Search_QueryHit objects.
  403. * Input is a string or Zend_Search_Lucene_Search_Query.
  404. *
  405. * @param mixed $query
  406. * @return array Zend_Search_Lucene_Search_QueryHit
  407. * @throws Zend_Search_Lucene_Exception
  408. */
  409. public function find($query)
  410. {
  411. $hitsList = array();
  412. $indexShift = 0;
  413. foreach ($this->_indices as $index) {
  414. $hits = $index->find($query);
  415. if ($indexShift != 0) {
  416. foreach ($hits as $hit) {
  417. $hit->id += $indexShift;
  418. }
  419. }
  420. $indexShift += $index->count();
  421. $hitsList[] = $hits;
  422. }
  423. /** @todo Implement advanced sorting */
  424. return call_user_func_array('array_merge', $hitsList);
  425. }
  426. /**
  427. * Returns a list of all unique field names that exist in this index.
  428. *
  429. * @param boolean $indexed
  430. * @return array
  431. */
  432. public function getFieldNames($indexed = false)
  433. {
  434. $fieldNamesList = array();
  435. foreach ($this->_indices as $index) {
  436. $fieldNamesList[] = $index->getFieldNames($indexed);
  437. }
  438. return array_unique(call_user_func_array('array_merge', $fieldNamesList));
  439. }
  440. /**
  441. * Returns a Zend_Search_Lucene_Document object for the document
  442. * number $id in this index.
  443. *
  444. * @param integer|Zend_Search_Lucene_Search_QueryHit $id
  445. * @return Zend_Search_Lucene_Document
  446. * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
  447. */
  448. public function getDocument($id)
  449. {
  450. if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
  451. /* @var $id Zend_Search_Lucene_Search_QueryHit */
  452. $id = $id->id;
  453. }
  454. foreach ($this->_indices as $index) {
  455. $indexCount = $index->count();
  456. if ($indexCount > $id) {
  457. return $index->getDocument($id);
  458. }
  459. $id -= $indexCount;
  460. }
  461. require_once 'Zend/Search/Lucene/Exception.php';
  462. throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
  463. }
  464. /**
  465. * Returns true if index contain documents with specified term.
  466. *
  467. * Is used for query optimization.
  468. *
  469. * @param Zend_Search_Lucene_Index_Term $term
  470. * @return boolean
  471. */
  472. public function hasTerm(Zend_Search_Lucene_Index_Term $term)
  473. {
  474. foreach ($this->_indices as $index) {
  475. if ($index->hasTerm($term)) {
  476. return true;
  477. }
  478. }
  479. return false;
  480. }
  481. /**
  482. * Returns IDs of all the documents containing term.
  483. *
  484. * @param Zend_Search_Lucene_Index_Term $term
  485. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  486. * @return array
  487. * @throws Zend_Search_Lucene_Exception
  488. */
  489. public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
  490. {
  491. if ($docsFilter != null) {
  492. require_once 'Zend/Search/Lucene/Exception.php';
  493. throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
  494. }
  495. $docsList = array();
  496. $indexShift = 0;
  497. foreach ($this->_indices as $index) {
  498. $docs = $index->termDocs($term);
  499. if ($indexShift != 0) {
  500. foreach ($docs as $id => $docId) {
  501. $docs[$id] += $indexShift;
  502. }
  503. }
  504. $indexShift += $index->count();
  505. $docsList[] = $docs;
  506. }
  507. return call_user_func_array('array_merge', $docsList);
  508. }
  509. /**
  510. * Returns documents filter for all documents containing term.
  511. *
  512. * It performs the same operation as termDocs, but return result as
  513. * Zend_Search_Lucene_Index_DocsFilter object
  514. *
  515. * @param Zend_Search_Lucene_Index_Term $term
  516. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  517. * @return Zend_Search_Lucene_Index_DocsFilter
  518. * @throws Zend_Search_Lucene_Exception
  519. */
  520. public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
  521. {
  522. require_once 'Zend/Search/Lucene/Exception.php';
  523. throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
  524. }
  525. /**
  526. * Returns an array of all term freqs.
  527. * Return array structure: array( docId => freq, ...)
  528. *
  529. * @param Zend_Search_Lucene_Index_Term $term
  530. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  531. * @return integer
  532. * @throws Zend_Search_Lucene_Exception
  533. */
  534. public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
  535. {
  536. if ($docsFilter != null) {
  537. require_once 'Zend/Search/Lucene/Exception.php';
  538. throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
  539. }
  540. $freqsList = array();
  541. $indexShift = 0;
  542. foreach ($this->_indices as $index) {
  543. $freqs = $index->termFreqs($term);
  544. if ($indexShift != 0) {
  545. $freqsShifted = array();
  546. foreach ($freqs as $docId => $freq) {
  547. $freqsShifted[$docId + $indexShift] = $freq;
  548. }
  549. $freqs = $freqsShifted;
  550. }
  551. $indexShift += $index->count();
  552. $freqsList[] = $freqs;
  553. }
  554. return call_user_func_array('array_merge', $freqsList);
  555. }
  556. /**
  557. * Returns an array of all term positions in the documents.
  558. * Return array structure: array( docId => array( pos1, pos2, ...), ...)
  559. *
  560. * @param Zend_Search_Lucene_Index_Term $term
  561. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  562. * @return array
  563. * @throws Zend_Search_Lucene_Exception
  564. */
  565. public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
  566. {
  567. if ($docsFilter != null) {
  568. require_once 'Zend/Search/Lucene/Exception.php';
  569. throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
  570. }
  571. $termPositionsList = array();
  572. $indexShift = 0;
  573. foreach ($this->_indices as $index) {
  574. $termPositions = $index->termPositions($term);
  575. if ($indexShift != 0) {
  576. $termPositionsShifted = array();
  577. foreach ($termPositions as $docId => $positions) {
  578. $termPositions[$docId + $indexShift] = $positions;
  579. }
  580. $termPositions = $termPositionsShifted;
  581. }
  582. $indexShift += $index->count();
  583. $termPositionsList[] = $termPositions;
  584. }
  585. return call_user_func_array('array_merge', $termPositions);
  586. }
  587. /**
  588. * Returns the number of documents in this index containing the $term.
  589. *
  590. * @param Zend_Search_Lucene_Index_Term $term
  591. * @return integer
  592. */
  593. public function docFreq(Zend_Search_Lucene_Index_Term $term)
  594. {
  595. $docFreq = 0;
  596. foreach ($this->_indices as $index) {
  597. $docFreq += $index->docFreq($term);
  598. }
  599. return $docFreq;
  600. }
  601. /**
  602. * Retrive similarity used by index reader
  603. *
  604. * @return Zend_Search_Lucene_Search_Similarity
  605. * @throws Zend_Search_Lucene_Exception
  606. */
  607. public function getSimilarity()
  608. {
  609. if (count($this->_indices) == 0) {
  610. require_once 'Zend/Search/Lucene/Exception.php';
  611. throw new Zend_Search_Lucene_Exception('Indices list is empty');
  612. }
  613. $similarity = reset($this->_indices)->getSimilarity();
  614. foreach ($this->_indices as $index) {
  615. if ($index->getSimilarity() !== $similarity) {
  616. require_once 'Zend/Search/Lucene/Exception.php';
  617. throw new Zend_Search_Lucene_Exception('Indices have different similarity.');
  618. }
  619. }
  620. return $similarity;
  621. }
  622. /**
  623. * Returns a normalization factor for "field, document" pair.
  624. *
  625. * @param integer $id
  626. * @param string $fieldName
  627. * @return float
  628. */
  629. public function norm($id, $fieldName)
  630. {
  631. foreach ($this->_indices as $index) {
  632. $indexCount = $index->count();
  633. if ($indexCount > $id) {
  634. return $index->norm($id, $fieldName);
  635. }
  636. $id -= $indexCount;
  637. }
  638. return null;
  639. }
  640. /**
  641. * Returns true if any documents have been deleted from this index.
  642. *
  643. * @return boolean
  644. */
  645. public function hasDeletions()
  646. {
  647. foreach ($this->_indices as $index) {
  648. if ($index->hasDeletions()) {
  649. return true;
  650. }
  651. }
  652. return false;
  653. }
  654. /**
  655. * Deletes a document from the index.
  656. * $id is an internal document id
  657. *
  658. * @param integer|Zend_Search_Lucene_Search_QueryHit $id
  659. * @throws Zend_Search_Lucene_Exception
  660. */
  661. public function delete($id)
  662. {
  663. foreach ($this->_indices as $index) {
  664. $indexCount = $index->count();
  665. if ($indexCount > $id) {
  666. $index->delete($id);
  667. return;
  668. }
  669. $id -= $indexCount;
  670. }
  671. require_once 'Zend/Search/Lucene/Exception.php';
  672. throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
  673. }
  674. /**
  675. * Callback used to choose target index for new documents
  676. *
  677. * Function/method signature:
  678. * Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices);
  679. *
  680. * null means "default documents distributing algorithm"
  681. *
  682. * @var callback
  683. */
  684. protected $_documentDistributorCallBack = null;
  685. /**
  686. * Set callback for choosing target index.
  687. *
  688. * @param callback $callback
  689. */
  690. public function setDocumentDistributorCallback($callback)
  691. {
  692. if ($callback !== null && !is_callable($callback))
  693. $this->_documentDistributorCallBack = $callback;
  694. }
  695. /**
  696. * Get callback for choosing target index.
  697. *
  698. * @return callback
  699. */
  700. public function getDocumentDistributorCallback()
  701. {
  702. return $this->_documentDistributorCallBack;
  703. }
  704. /**
  705. * Adds a document to this index.
  706. *
  707. * @param Zend_Search_Lucene_Document $document
  708. * @throws Zend_Search_Lucene_Exception
  709. */
  710. public function addDocument(Zend_Search_Lucene_Document $document)
  711. {
  712. if ($this->_documentDistributorCallBack !== null) {
  713. $index = call_user_func($this->_documentDistributorCallBack, $document, $this->_indices);
  714. } else {
  715. $index = $this->_indices[ array_rand($this->_indices) ];
  716. }
  717. $index->addDocument($document);
  718. }
  719. /**
  720. * Commit changes resulting from delete() or undeleteAll() operations.
  721. */
  722. public function commit()
  723. {
  724. foreach ($this->_indices as $index) {
  725. $index->commit();
  726. }
  727. }
  728. /**
  729. * Optimize index.
  730. *
  731. * Merges all segments into one
  732. */
  733. public function optimize()
  734. {
  735. foreach ($this->_indices as $index) {
  736. $index->_optimise();
  737. }
  738. }
  739. /**
  740. * Returns an array of all terms in this index.
  741. *
  742. * @return array
  743. */
  744. public function terms()
  745. {
  746. $termsList = array();
  747. foreach ($this->_indices as $index) {
  748. $termsList[] = $index->terms();
  749. }
  750. return array_unique(call_user_func_array('array_merge', $termsList));
  751. }
  752. /**
  753. * Terms stream priority queue object
  754. *
  755. * @var Zend_Search_Lucene_TermStreamsPriorityQueue
  756. */
  757. private $_termsStream = null;
  758. /**
  759. * Reset terms stream.
  760. */
  761. public function resetTermsStream()
  762. {
  763. if ($this->_termsStream === null) {
  764. $this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices);
  765. } else {
  766. $this->_termsStream->resetTermsStream();
  767. }
  768. }
  769. /**
  770. * Skip terms stream up to specified term preffix.
  771. *
  772. * Prefix contains fully specified field info and portion of searched term
  773. *
  774. * @param Zend_Search_Lucene_Index_Term $prefix
  775. */
  776. public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
  777. {
  778. $this->_termsStream->skipTo($prefix);
  779. }
  780. /**
  781. * Scans terms dictionary and returns next term
  782. *
  783. * @return Zend_Search_Lucene_Index_Term|null
  784. */
  785. public function nextTerm()
  786. {
  787. return $this->_termsStream->nextTerm();
  788. }
  789. /**
  790. * Returns term in current position
  791. *
  792. * @return Zend_Search_Lucene_Index_Term|null
  793. */
  794. public function currentTerm()
  795. {
  796. return $this->_termsStream->currentTerm();
  797. }
  798. /**
  799. * Close terms stream
  800. *
  801. * Should be used for resources clean up if stream is not read up to the end
  802. */
  803. public function closeTermsStream()
  804. {
  805. $this->_termsStream->closeTermsStream();
  806. $this->_termsStream = null;
  807. }
  808. /**
  809. * Undeletes all documents currently marked as deleted in this index.
  810. */
  811. public function undeleteAll()
  812. {
  813. foreach ($this->_indices as $index) {
  814. $index->undeleteAll();
  815. }
  816. }
  817. /**
  818. * Add reference to the index object
  819. *
  820. * @internal
  821. */
  822. public function addReference()
  823. {
  824. // Do nothing, since it's never referenced by indices
  825. }
  826. /**
  827. * Remove reference from the index object
  828. *
  829. * When reference count becomes zero, index is closed and resources are cleaned up
  830. *
  831. * @internal
  832. */
  833. public function removeReference()
  834. {
  835. // Do nothing, since it's never referenced by indices
  836. }
  837. }