Writer.php 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
  23. require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
  24. /** Zend_Search_Lucene_Index_SegmentInfo */
  25. require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
  26. /** Zend_Search_Lucene_Index_SegmentMerger */
  27. require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
  28. /** Zend_Search_Lucene_LockManager */
  29. require_once 'Zend/Search/Lucene/LockManager.php';
  30. /**
  31. * @category Zend
  32. * @package Zend_Search_Lucene
  33. * @subpackage Index
  34. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. */
  37. class Zend_Search_Lucene_Index_Writer
  38. {
  39. /**
  40. * @todo Implement Analyzer substitution
  41. * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
  42. * temporary index files
  43. * @todo Directory lock processing
  44. */
  45. /**
  46. * Number of documents required before the buffered in-memory
  47. * documents are written into a new Segment
  48. *
  49. * Default value is 10
  50. *
  51. * @var integer
  52. */
  53. public $maxBufferedDocs = 10;
  54. /**
  55. * Largest number of documents ever merged by addDocument().
  56. * Small values (e.g., less than 10,000) are best for interactive indexing,
  57. * as this limits the length of pauses while indexing to a few seconds.
  58. * Larger values are best for batched indexing and speedier searches.
  59. *
  60. * Default value is PHP_INT_MAX
  61. *
  62. * @var integer
  63. */
  64. public $maxMergeDocs = PHP_INT_MAX;
  65. /**
  66. * Determines how often segment indices are merged by addDocument().
  67. *
  68. * With smaller values, less RAM is used while indexing,
  69. * and searches on unoptimized indices are faster,
  70. * but indexing speed is slower.
  71. *
  72. * With larger values, more RAM is used during indexing,
  73. * and while searches on unoptimized indices are slower,
  74. * indexing is faster.
  75. *
  76. * Thus larger values (> 10) are best for batch index creation,
  77. * and smaller values (< 10) for indices that are interactively maintained.
  78. *
  79. * Default value is 10
  80. *
  81. * @var integer
  82. */
  83. public $mergeFactor = 10;
  84. /**
  85. * File system adapter.
  86. *
  87. * @var Zend_Search_Lucene_Storage_Directory
  88. */
  89. private $_directory = null;
  90. /**
  91. * Changes counter.
  92. *
  93. * @var integer
  94. */
  95. private $_versionUpdate = 0;
  96. /**
  97. * List of the segments, created by index writer
  98. * Array of Zend_Search_Lucene_Index_SegmentInfo objects
  99. *
  100. * @var array
  101. */
  102. private $_newSegments = array();
  103. /**
  104. * List of segments to be deleted on commit
  105. *
  106. * @var array
  107. */
  108. private $_segmentsToDelete = array();
  109. /**
  110. * Current segment to add documents
  111. *
  112. * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
  113. */
  114. private $_currentSegment = null;
  115. /**
  116. * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
  117. *
  118. * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
  119. *
  120. * @var array Zend_Search_Lucene_Index_SegmentInfo
  121. */
  122. private $_segmentInfos;
  123. /**
  124. * Index target format version
  125. *
  126. * @var integer
  127. */
  128. private $_targetFormatVersion;
  129. /**
  130. * List of indexfiles extensions
  131. *
  132. * @var array
  133. */
  134. private static $_indexExtensions = array('.cfs' => '.cfs',
  135. '.cfx' => '.cfx',
  136. '.fnm' => '.fnm',
  137. '.fdx' => '.fdx',
  138. '.fdt' => '.fdt',
  139. '.tis' => '.tis',
  140. '.tii' => '.tii',
  141. '.frq' => '.frq',
  142. '.prx' => '.prx',
  143. '.tvx' => '.tvx',
  144. '.tvd' => '.tvd',
  145. '.tvf' => '.tvf',
  146. '.del' => '.del',
  147. '.sti' => '.sti' );
  148. /**
  149. * Create empty index
  150. *
  151. * @param Zend_Search_Lucene_Storage_Directory $directory
  152. * @param integer $generation
  153. * @param integer $nameCount
  154. */
  155. public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
  156. {
  157. if ($generation == 0) {
  158. // Create index in pre-2.1 mode
  159. foreach ($directory->fileList() as $file) {
  160. if ($file == 'deletable' ||
  161. $file == 'segments' ||
  162. isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
  163. preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
  164. $directory->deleteFile($file);
  165. }
  166. }
  167. $segmentsFile = $directory->createFile('segments');
  168. $segmentsFile->writeInt((int)0xFFFFFFFF);
  169. // write version (initialized by current time)
  170. $segmentsFile->writeLong(round(microtime(true)));
  171. // write name counter
  172. $segmentsFile->writeInt($nameCount);
  173. // write segment counter
  174. $segmentsFile->writeInt(0);
  175. $deletableFile = $directory->createFile('deletable');
  176. // write counter
  177. $deletableFile->writeInt(0);
  178. } else {
  179. $genFile = $directory->createFile('segments.gen');
  180. $genFile->writeInt((int)0xFFFFFFFE);
  181. // Write generation two times
  182. $genFile->writeLong($generation);
  183. $genFile->writeLong($generation);
  184. $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
  185. $segmentsFile->writeInt((int)0xFFFFFFFD);
  186. // write version (initialized by current time)
  187. $segmentsFile->writeLong(round(microtime(true)));
  188. // write name counter
  189. $segmentsFile->writeInt($nameCount);
  190. // write segment counter
  191. $segmentsFile->writeInt(0);
  192. }
  193. }
  194. /**
  195. * Open the index for writing
  196. *
  197. * @param Zend_Search_Lucene_Storage_Directory $directory
  198. * @param array $segmentInfos
  199. * @param integer $targetFormatVersion
  200. * @param Zend_Search_Lucene_Storage_File $cleanUpLock
  201. */
  202. public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
  203. {
  204. $this->_directory = $directory;
  205. $this->_segmentInfos = &$segmentInfos;
  206. $this->_targetFormatVersion = $targetFormatVersion;
  207. }
  208. /**
  209. * Adds a document to this index.
  210. *
  211. * @param Zend_Search_Lucene_Document $document
  212. */
  213. public function addDocument(Zend_Search_Lucene_Document $document)
  214. {
  215. if ($this->_currentSegment === null) {
  216. $this->_currentSegment =
  217. new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
  218. }
  219. $this->_currentSegment->addDocument($document);
  220. if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
  221. $this->commit();
  222. }
  223. $this->_maybeMergeSegments();
  224. $this->_versionUpdate++;
  225. }
  226. /**
  227. * Check if we have anything to merge
  228. *
  229. * @return boolean
  230. */
  231. private function _hasAnythingToMerge()
  232. {
  233. $segmentSizes = array();
  234. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  235. $segmentSizes[$segName] = $segmentInfo->count();
  236. }
  237. $mergePool = array();
  238. $poolSize = 0;
  239. $sizeToMerge = $this->maxBufferedDocs;
  240. asort($segmentSizes, SORT_NUMERIC);
  241. foreach ($segmentSizes as $segName => $size) {
  242. // Check, if segment comes into a new merging block
  243. while ($size >= $sizeToMerge) {
  244. // Merge previous block if it's large enough
  245. if ($poolSize >= $sizeToMerge) {
  246. return true;
  247. }
  248. $mergePool = array();
  249. $poolSize = 0;
  250. $sizeToMerge *= $this->mergeFactor;
  251. if ($sizeToMerge > $this->maxMergeDocs) {
  252. return false;
  253. }
  254. }
  255. $mergePool[] = $this->_segmentInfos[$segName];
  256. $poolSize += $size;
  257. }
  258. if ($poolSize >= $sizeToMerge) {
  259. return true;
  260. }
  261. return false;
  262. }
  263. /**
  264. * Merge segments if necessary
  265. */
  266. private function _maybeMergeSegments()
  267. {
  268. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  269. return;
  270. }
  271. if (!$this->_hasAnythingToMerge()) {
  272. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  273. return;
  274. }
  275. // Update segments list to be sure all segments are not merged yet by another process
  276. //
  277. // Segment merging functionality is concentrated in this class and surrounded
  278. // by optimization lock obtaining/releasing.
  279. // _updateSegments() refreshes segments list from the latest index generation.
  280. // So only new segments can be added to the index while we are merging some already existing
  281. // segments.
  282. // Newly added segments will be also included into the index by the _updateSegments() call
  283. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  284. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  285. $this->_updateSegments();
  286. // Perform standard auto-optimization procedure
  287. $segmentSizes = array();
  288. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  289. $segmentSizes[$segName] = $segmentInfo->count();
  290. }
  291. $mergePool = array();
  292. $poolSize = 0;
  293. $sizeToMerge = $this->maxBufferedDocs;
  294. asort($segmentSizes, SORT_NUMERIC);
  295. foreach ($segmentSizes as $segName => $size) {
  296. // Check, if segment comes into a new merging block
  297. while ($size >= $sizeToMerge) {
  298. // Merge previous block if it's large enough
  299. if ($poolSize >= $sizeToMerge) {
  300. $this->_mergeSegments($mergePool);
  301. }
  302. $mergePool = array();
  303. $poolSize = 0;
  304. $sizeToMerge *= $this->mergeFactor;
  305. if ($sizeToMerge > $this->maxMergeDocs) {
  306. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  307. return;
  308. }
  309. }
  310. $mergePool[] = $this->_segmentInfos[$segName];
  311. $poolSize += $size;
  312. }
  313. if ($poolSize >= $sizeToMerge) {
  314. $this->_mergeSegments($mergePool);
  315. }
  316. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  317. }
  318. /**
  319. * Merge specified segments
  320. *
  321. * $segments is an array of SegmentInfo objects
  322. *
  323. * @param array $segments
  324. */
  325. private function _mergeSegments($segments)
  326. {
  327. $newName = $this->_newSegmentName();
  328. $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
  329. $newName);
  330. foreach ($segments as $segmentInfo) {
  331. $merger->addSource($segmentInfo);
  332. $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
  333. }
  334. $newSegment = $merger->merge();
  335. if ($newSegment !== null) {
  336. $this->_newSegments[$newSegment->getName()] = $newSegment;
  337. }
  338. $this->commit();
  339. }
  340. /**
  341. * Update segments file by adding current segment to a list
  342. *
  343. * @throws Zend_Search_Lucene_Exception
  344. */
  345. private function _updateSegments()
  346. {
  347. // Get an exclusive index lock
  348. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  349. // Write down changes for the segments
  350. foreach ($this->_segmentInfos as $segInfo) {
  351. $segInfo->writeChanges();
  352. }
  353. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  354. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  355. $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
  356. try {
  357. $genFile = $this->_directory->getFileObject('segments.gen', false);
  358. } catch (Zend_Search_Lucene_Exception $e) {
  359. if (strpos($e->getMessage(), 'is not readable') !== false) {
  360. $genFile = $this->_directory->createFile('segments.gen');
  361. } else {
  362. throw $e;
  363. }
  364. }
  365. $genFile->writeInt((int)0xFFFFFFFE);
  366. // Write generation (first copy)
  367. $genFile->writeLong($generation);
  368. try {
  369. // Write format marker
  370. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
  371. $newSegmentFile->writeInt((int)0xFFFFFFFD);
  372. } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  373. $newSegmentFile->writeInt((int)0xFFFFFFFC);
  374. }
  375. // Read src file format identifier
  376. $format = $segmentsFile->readInt();
  377. if ($format == (int)0xFFFFFFFF) {
  378. $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
  379. } else if ($format == (int)0xFFFFFFFD) {
  380. $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
  381. } else if ($format == (int)0xFFFFFFFC) {
  382. $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
  383. } else {
  384. throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
  385. }
  386. $version = $segmentsFile->readLong() + $this->_versionUpdate;
  387. $this->_versionUpdate = 0;
  388. $newSegmentFile->writeLong($version);
  389. // Write segment name counter
  390. $newSegmentFile->writeInt($segmentsFile->readInt());
  391. // Get number of segments offset
  392. $numOfSegmentsOffset = $newSegmentFile->tell();
  393. // Write dummy data (segment counter)
  394. $newSegmentFile->writeInt(0);
  395. // Read number of segemnts
  396. $segmentsCount = $segmentsFile->readInt();
  397. $segments = array();
  398. for ($count = 0; $count < $segmentsCount; $count++) {
  399. $segName = $segmentsFile->readString();
  400. $segSize = $segmentsFile->readInt();
  401. if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
  402. // pre-2.1 index format
  403. $delGen = 0;
  404. $hasSingleNormFile = false;
  405. $numField = (int)0xFFFFFFFF;
  406. $isCompoundByte = 0;
  407. $docStoreOptions = null;
  408. } else {
  409. $delGen = $segmentsFile->readLong();
  410. if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
  411. $docStoreOffset = $segmentsFile->readInt();
  412. if ($docStoreOffset != (int)0xFFFFFFFF) {
  413. $docStoreSegment = $segmentsFile->readString();
  414. $docStoreIsCompoundFile = $segmentsFile->readByte();
  415. $docStoreOptions = array('offset' => $docStoreOffset,
  416. 'segment' => $docStoreSegment,
  417. 'isCompound' => ($docStoreIsCompoundFile == 1));
  418. } else {
  419. $docStoreOptions = null;
  420. }
  421. } else {
  422. $docStoreOptions = null;
  423. }
  424. $hasSingleNormFile = $segmentsFile->readByte();
  425. $numField = $segmentsFile->readInt();
  426. $normGens = array();
  427. if ($numField != (int)0xFFFFFFFF) {
  428. for ($count1 = 0; $count1 < $numField; $count1++) {
  429. $normGens[] = $segmentsFile->readLong();
  430. }
  431. }
  432. $isCompoundByte = $segmentsFile->readByte();
  433. }
  434. if (!in_array($segName, $this->_segmentsToDelete)) {
  435. // Load segment if necessary
  436. if (!isset($this->_segmentInfos[$segName])) {
  437. if ($isCompoundByte == 0xFF) {
  438. // The segment is not a compound file
  439. $isCompound = false;
  440. } else if ($isCompoundByte == 0x00) {
  441. // The status is unknown
  442. $isCompound = null;
  443. } else if ($isCompoundByte == 0x01) {
  444. // The segment is a compound file
  445. $isCompound = true;
  446. }
  447. $this->_segmentInfos[$segName] =
  448. new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
  449. $segName,
  450. $segSize,
  451. $delGen,
  452. $docStoreOptions,
  453. $hasSingleNormFile,
  454. $isCompound);
  455. } else {
  456. // Retrieve actual deletions file generation number
  457. $delGen = $this->_segmentInfos[$segName]->getDelGen();
  458. }
  459. $newSegmentFile->writeString($segName);
  460. $newSegmentFile->writeInt($segSize);
  461. $newSegmentFile->writeLong($delGen);
  462. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  463. if ($docStoreOptions !== null) {
  464. $newSegmentFile->writeInt($docStoreOffset);
  465. $newSegmentFile->writeString($docStoreSegment);
  466. $newSegmentFile->writeByte($docStoreIsCompoundFile);
  467. } else {
  468. // Set DocStoreOffset to -1
  469. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  470. }
  471. } else if ($docStoreOptions !== null) {
  472. // Release index write lock
  473. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  474. throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
  475. }
  476. $newSegmentFile->writeByte($hasSingleNormFile);
  477. $newSegmentFile->writeInt($numField);
  478. if ($numField != (int)0xFFFFFFFF) {
  479. foreach ($normGens as $normGen) {
  480. $newSegmentFile->writeLong($normGen);
  481. }
  482. }
  483. $newSegmentFile->writeByte($isCompoundByte);
  484. $segments[$segName] = $segSize;
  485. }
  486. }
  487. $segmentsFile->close();
  488. $segmentsCount = count($segments) + count($this->_newSegments);
  489. foreach ($this->_newSegments as $segName => $segmentInfo) {
  490. $newSegmentFile->writeString($segName);
  491. $newSegmentFile->writeInt($segmentInfo->count());
  492. // delete file generation: -1 (there is no delete file yet)
  493. $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
  494. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  495. // docStoreOffset: -1 (segment doesn't use shared doc store)
  496. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  497. }
  498. // HasSingleNormFile
  499. $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
  500. // NumField
  501. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  502. // IsCompoundFile
  503. $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
  504. $segments[$segmentInfo->getName()] = $segmentInfo->count();
  505. $this->_segmentInfos[$segName] = $segmentInfo;
  506. }
  507. $this->_newSegments = array();
  508. $newSegmentFile->seek($numOfSegmentsOffset);
  509. $newSegmentFile->writeInt($segmentsCount); // Update segments count
  510. $newSegmentFile->close();
  511. } catch (Exception $e) {
  512. /** Restore previous index generation */
  513. $generation--;
  514. $genFile->seek(4, SEEK_SET);
  515. // Write generation number twice
  516. $genFile->writeLong($generation); $genFile->writeLong($generation);
  517. // Release index write lock
  518. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  519. // Throw the exception
  520. throw $e;
  521. }
  522. // Write generation (second copy)
  523. $genFile->writeLong($generation);
  524. // Check if another update or read process is not running now
  525. // If yes, skip clean-up procedure
  526. if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
  527. /**
  528. * Clean-up directory
  529. */
  530. $filesToDelete = array();
  531. $filesTypes = array();
  532. $filesNumbers = array();
  533. // list of .del files of currently used segments
  534. // each segment can have several generations of .del files
  535. // only last should not be deleted
  536. $delFiles = array();
  537. foreach ($this->_directory->fileList() as $file) {
  538. if ($file == 'deletable') {
  539. // 'deletable' file
  540. $filesToDelete[] = $file;
  541. $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1
  542. $filesNumbers[] = 0;
  543. } else if ($file == 'segments') {
  544. // 'segments' file
  545. $filesToDelete[] = $file;
  546. $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
  547. $filesNumbers[] = 0;
  548. } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
  549. // 'segments_xxx' file
  550. // Check if it's not a just created generation file
  551. if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
  552. $filesToDelete[] = $file;
  553. $filesTypes[] = 2; // first group of files for deletions
  554. $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
  555. }
  556. } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
  557. // one of per segment files ('<segment_name>.f<decimal_number>')
  558. // Check if it's not one of the segments in the current segments set
  559. if (!isset($segments[$matches[1]])) {
  560. $filesToDelete[] = $file;
  561. $filesTypes[] = 3; // second group of files for deletions
  562. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  563. }
  564. } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
  565. // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
  566. // Check if it's not one of the segments in the current segments set
  567. if (!isset($segments[$matches[1]])) {
  568. $filesToDelete[] = $file;
  569. $filesTypes[] = 3; // second group of files for deletions
  570. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  571. } else {
  572. $segmentNumber = (int)base_convert($matches[2], 36, 10);
  573. $delGeneration = (int)base_convert($matches[4], 36, 10);
  574. if (!isset($delFiles[$segmentNumber])) {
  575. $delFiles[$segmentNumber] = array();
  576. }
  577. $delFiles[$segmentNumber][$delGeneration] = $file;
  578. }
  579. } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
  580. // one of per segment files ('<segment_name>.<ext>')
  581. $segmentName = substr($file, 0, strlen($file) - 4);
  582. // Check if it's not one of the segments in the current segments set
  583. if (!isset($segments[$segmentName]) &&
  584. ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
  585. $filesToDelete[] = $file;
  586. $filesTypes[] = 3; // second group of files for deletions
  587. $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
  588. }
  589. }
  590. }
  591. $maxGenNumber = 0;
  592. // process .del files of currently used segments
  593. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  594. ksort($delFiles[$segmentNumber], SORT_NUMERIC);
  595. array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
  596. end($delFiles[$segmentNumber]);
  597. $lastGenNumber = key($delFiles[$segmentNumber]);
  598. if ($lastGenNumber > $maxGenNumber) {
  599. $maxGenNumber = $lastGenNumber;
  600. }
  601. }
  602. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  603. foreach ($segmentDelFiles as $delGeneration => $file) {
  604. $filesToDelete[] = $file;
  605. $filesTypes[] = 4; // third group of files for deletions
  606. $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
  607. }
  608. }
  609. // Reorder files for deleting
  610. array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC,
  611. $filesNumbers, SORT_ASC, SORT_NUMERIC,
  612. $filesToDelete, SORT_ASC, SORT_STRING);
  613. foreach ($filesToDelete as $file) {
  614. try {
  615. /** Skip shared docstore segments deleting */
  616. /** @todo Process '.cfx' files to check if them are already unused */
  617. if (substr($file, strlen($file)-4) != '.cfx') {
  618. $this->_directory->deleteFile($file);
  619. }
  620. } catch (Zend_Search_Lucene_Exception $e) {
  621. if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
  622. // That's not "file is under processing or already deleted" exception
  623. // Pass it through
  624. throw $e;
  625. }
  626. }
  627. }
  628. // Return read lock into the previous state
  629. Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
  630. } else {
  631. // Only release resources if another index reader is running now
  632. foreach ($this->_segmentsToDelete as $segName) {
  633. foreach (self::$_indexExtensions as $ext) {
  634. $this->_directory->purgeFile($segName . $ext);
  635. }
  636. }
  637. }
  638. // Clean-up _segmentsToDelete container
  639. $this->_segmentsToDelete = array();
  640. // Release index write lock
  641. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  642. // Remove unused segments from segments list
  643. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  644. if (!isset($segments[$segName])) {
  645. unset($this->_segmentInfos[$segName]);
  646. }
  647. }
  648. }
  649. /**
  650. * Commit current changes
  651. */
  652. public function commit()
  653. {
  654. if ($this->_currentSegment !== null) {
  655. $newSegment = $this->_currentSegment->close();
  656. if ($newSegment !== null) {
  657. $this->_newSegments[$newSegment->getName()] = $newSegment;
  658. }
  659. $this->_currentSegment = null;
  660. }
  661. $this->_updateSegments();
  662. }
  663. /**
  664. * Merges the provided indexes into this index.
  665. *
  666. * @param array $readers
  667. * @return void
  668. */
  669. public function addIndexes($readers)
  670. {
  671. /**
  672. * @todo implementation
  673. */
  674. }
  675. /**
  676. * Merges all segments together into new one
  677. *
  678. * Returns true on success and false if another optimization or auto-optimization process
  679. * is running now
  680. *
  681. * @return boolean
  682. */
  683. public function optimize()
  684. {
  685. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  686. return false;
  687. }
  688. // Update segments list to be sure all segments are not merged yet by another process
  689. //
  690. // Segment merging functionality is concentrated in this class and surrounded
  691. // by optimization lock obtaining/releasing.
  692. // _updateSegments() refreshes segments list from the latest index generation.
  693. // So only new segments can be added to the index while we are merging some already existing
  694. // segments.
  695. // Newly added segments will be also included into the index by the _updateSegments() call
  696. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  697. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  698. $this->_updateSegments();
  699. $this->_mergeSegments($this->_segmentInfos);
  700. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  701. return true;
  702. }
  703. /**
  704. * Get name for new segment
  705. *
  706. * @return string
  707. */
  708. private function _newSegmentName()
  709. {
  710. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  711. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  712. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  713. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  714. $segmentNameCounter = $segmentsFile->readInt();
  715. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  716. $segmentsFile->writeInt($segmentNameCounter + 1);
  717. // Flash output to guarantee that wrong value will not be loaded between unlock and
  718. // return (which calls $segmentsFile destructor)
  719. $segmentsFile->flush();
  720. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  721. return '_' . base_convert($segmentNameCounter, 10, 36);
  722. }
  723. }