2
0

Writer.php 35 KB


  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
  22. require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
  23. /** Zend_Search_Lucene_Index_SegmentInfo */
  24. require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
  25. /** Zend_Search_Lucene_Index_SegmentMerger */
  26. require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
  27. /** Zend_Search_Lucene_LockManager */
  28. require_once 'Zend/Search/Lucene/LockManager.php';
  29. /**
  30. * @category Zend
  31. * @package Zend_Search_Lucene
  32. * @subpackage Index
  33. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. */
  36. class Zend_Search_Lucene_Index_Writer
  37. {
  38. /**
  39. * @todo Implement Analyzer substitution
  40. * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
  41. * temporary index files
  42. * @todo Directory lock processing
  43. */
  44. /**
  45. * Number of documents required before the buffered in-memory
  46. * documents are written into a new Segment
  47. *
  48. * Default value is 10
  49. *
  50. * @var integer
  51. */
  52. public $maxBufferedDocs = 10;
  53. /**
  54. * Largest number of documents ever merged by addDocument().
  55. * Small values (e.g., less than 10,000) are best for interactive indexing,
  56. * as this limits the length of pauses while indexing to a few seconds.
  57. * Larger values are best for batched indexing and speedier searches.
  58. *
  59. * Default value is PHP_INT_MAX
  60. *
  61. * @var integer
  62. */
  63. public $maxMergeDocs = PHP_INT_MAX;
  64. /**
  65. * Determines how often segment indices are merged by addDocument().
  66. *
  67. * With smaller values, less RAM is used while indexing,
  68. * and searches on unoptimized indices are faster,
  69. * but indexing speed is slower.
  70. *
  71. * With larger values, more RAM is used during indexing,
  72. * and while searches on unoptimized indices are slower,
  73. * indexing is faster.
  74. *
  75. * Thus larger values (> 10) are best for batch index creation,
  76. * and smaller values (< 10) for indices that are interactively maintained.
  77. *
  78. * Default value is 10
  79. *
  80. * @var integer
  81. */
  82. public $mergeFactor = 10;
  83. /**
  84. * File system adapter.
  85. *
  86. * @var Zend_Search_Lucene_Storage_Directory
  87. */
  88. private $_directory = null;
  89. /**
  90. * Changes counter.
  91. *
  92. * @var integer
  93. */
  94. private $_versionUpdate = 0;
  95. /**
  96. * List of the segments, created by index writer
  97. * Array of Zend_Search_Lucene_Index_SegmentInfo objects
  98. *
  99. * @var array
  100. */
  101. private $_newSegments = array();
  102. /**
  103. * List of segments to be deleted on commit
  104. *
  105. * @var array
  106. */
  107. private $_segmentsToDelete = array();
  108. /**
  109. * Current segment to add documents
  110. *
  111. * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
  112. */
  113. private $_currentSegment = null;
  114. /**
  115. * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
  116. *
  117. * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
  118. *
  119. * @var array Zend_Search_Lucene_Index_SegmentInfo
  120. */
  121. private $_segmentInfos;
  122. /**
  123. * Index target format version
  124. *
  125. * @var integer
  126. */
  127. private $_targetFormatVersion;
  128. /**
  129. * List of indexfiles extensions
  130. *
  131. * @var array
  132. */
  133. private static $_indexExtensions = array('.cfs' => '.cfs',
  134. '.cfx' => '.cfx',
  135. '.fnm' => '.fnm',
  136. '.fdx' => '.fdx',
  137. '.fdt' => '.fdt',
  138. '.tis' => '.tis',
  139. '.tii' => '.tii',
  140. '.frq' => '.frq',
  141. '.prx' => '.prx',
  142. '.tvx' => '.tvx',
  143. '.tvd' => '.tvd',
  144. '.tvf' => '.tvf',
  145. '.del' => '.del',
  146. '.sti' => '.sti' );
  147. /**
  148. * Create empty index
  149. *
  150. * @param Zend_Search_Lucene_Storage_Directory $directory
  151. * @param integer $generation
  152. * @param integer $nameCount
  153. */
  154. public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
  155. {
  156. if ($generation == 0) {
  157. // Create index in pre-2.1 mode
  158. foreach ($directory->fileList() as $file) {
  159. if ($file == 'deletable' ||
  160. $file == 'segments' ||
  161. isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
  162. preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
  163. $directory->deleteFile($file);
  164. }
  165. }
  166. $segmentsFile = $directory->createFile('segments');
  167. $segmentsFile->writeInt((int)0xFFFFFFFF);
  168. // write version (is initialized by current time
  169. // $segmentsFile->writeLong((int)microtime(true));
  170. $version = microtime(true);
  171. $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
  172. $segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
  173. // write name counter
  174. $segmentsFile->writeInt($nameCount);
  175. // write segment counter
  176. $segmentsFile->writeInt(0);
  177. $deletableFile = $directory->createFile('deletable');
  178. // write counter
  179. $deletableFile->writeInt(0);
  180. } else {
  181. $genFile = $directory->createFile('segments.gen');
  182. $genFile->writeInt((int)0xFFFFFFFE);
  183. // Write generation two times
  184. $genFile->writeLong($generation);
  185. $genFile->writeLong($generation);
  186. $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
  187. $segmentsFile->writeInt((int)0xFFFFFFFD);
  188. // write version (is initialized by current time
  189. // $segmentsFile->writeLong((int)microtime(true));
  190. $version = microtime(true);
  191. $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
  192. $segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
  193. // write name counter
  194. $segmentsFile->writeInt($nameCount);
  195. // write segment counter
  196. $segmentsFile->writeInt(0);
  197. }
  198. }
  199. /**
  200. * Open the index for writing
  201. *
  202. * @param Zend_Search_Lucene_Storage_Directory $directory
  203. * @param array $segmentInfos
  204. * @param integer $targetFormatVersion
  205. * @param Zend_Search_Lucene_Storage_File $cleanUpLock
  206. */
  207. public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
  208. {
  209. $this->_directory = $directory;
  210. $this->_segmentInfos = &$segmentInfos;
  211. $this->_targetFormatVersion = $targetFormatVersion;
  212. }
  213. /**
  214. * Adds a document to this index.
  215. *
  216. * @param Zend_Search_Lucene_Document $document
  217. */
  218. public function addDocument(Zend_Search_Lucene_Document $document)
  219. {
  220. if ($this->_currentSegment === null) {
  221. $this->_currentSegment =
  222. new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
  223. }
  224. $this->_currentSegment->addDocument($document);
  225. if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
  226. $this->commit();
  227. }
  228. $this->_maybeMergeSegments();
  229. $this->_versionUpdate++;
  230. }
  231. /**
  232. * Check if we have anything to merge
  233. *
  234. * @return boolean
  235. */
  236. private function _hasAnythingToMerge()
  237. {
  238. $segmentSizes = array();
  239. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  240. $segmentSizes[$segName] = $segmentInfo->count();
  241. }
  242. $mergePool = array();
  243. $poolSize = 0;
  244. $sizeToMerge = $this->maxBufferedDocs;
  245. asort($segmentSizes, SORT_NUMERIC);
  246. foreach ($segmentSizes as $segName => $size) {
  247. // Check, if segment comes into a new merging block
  248. while ($size >= $sizeToMerge) {
  249. // Merge previous block if it's large enough
  250. if ($poolSize >= $sizeToMerge) {
  251. return true;
  252. }
  253. $mergePool = array();
  254. $poolSize = 0;
  255. $sizeToMerge *= $this->mergeFactor;
  256. if ($sizeToMerge > $this->maxMergeDocs) {
  257. return false;
  258. }
  259. }
  260. $mergePool[] = $this->_segmentInfos[$segName];
  261. $poolSize += $size;
  262. }
  263. if ($poolSize >= $sizeToMerge) {
  264. return true;
  265. }
  266. return false;
  267. }
  268. /**
  269. * Merge segments if necessary
  270. */
  271. private function _maybeMergeSegments()
  272. {
  273. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  274. return;
  275. }
  276. if (!$this->_hasAnythingToMerge()) {
  277. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  278. return;
  279. }
  280. // Update segments list to be sure all segments are not merged yet by another process
  281. //
  282. // Segment merging functionality is concentrated in this class and surrounded
  283. // by optimization lock obtaining/releasing.
  284. // _updateSegments() refreshes segments list from the latest index generation.
  285. // So only new segments can be added to the index while we are merging some already existing
  286. // segments.
  287. // Newly added segments will be also included into the index by the _updateSegments() call
  288. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  289. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  290. $this->_updateSegments();
  291. // Perform standard auto-optimization procedure
  292. $segmentSizes = array();
  293. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  294. $segmentSizes[$segName] = $segmentInfo->count();
  295. }
  296. $mergePool = array();
  297. $poolSize = 0;
  298. $sizeToMerge = $this->maxBufferedDocs;
  299. asort($segmentSizes, SORT_NUMERIC);
  300. foreach ($segmentSizes as $segName => $size) {
  301. // Check, if segment comes into a new merging block
  302. while ($size >= $sizeToMerge) {
  303. // Merge previous block if it's large enough
  304. if ($poolSize >= $sizeToMerge) {
  305. $this->_mergeSegments($mergePool);
  306. }
  307. $mergePool = array();
  308. $poolSize = 0;
  309. $sizeToMerge *= $this->mergeFactor;
  310. if ($sizeToMerge > $this->maxMergeDocs) {
  311. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  312. return;
  313. }
  314. }
  315. $mergePool[] = $this->_segmentInfos[$segName];
  316. $poolSize += $size;
  317. }
  318. if ($poolSize >= $sizeToMerge) {
  319. $this->_mergeSegments($mergePool);
  320. }
  321. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  322. }
  323. /**
  324. * Merge specified segments
  325. *
  326. * $segments is an array of SegmentInfo objects
  327. *
  328. * @param array $segments
  329. */
  330. private function _mergeSegments($segments)
  331. {
  332. $newName = $this->_newSegmentName();
  333. $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
  334. $newName);
  335. foreach ($segments as $segmentInfo) {
  336. $merger->addSource($segmentInfo);
  337. $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
  338. }
  339. $newSegment = $merger->merge();
  340. if ($newSegment !== null) {
  341. $this->_newSegments[$newSegment->getName()] = $newSegment;
  342. }
  343. $this->commit();
  344. }
  345. /**
  346. * Update segments file by adding current segment to a list
  347. *
  348. * @throws Zend_Search_Lucene_Exception
  349. */
  350. private function _updateSegments()
  351. {
  352. // Get an exclusive index lock
  353. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  354. // Write down changes for the segments
  355. foreach ($this->_segmentInfos as $segInfo) {
  356. $segInfo->writeChanges();
  357. }
  358. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  359. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  360. $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
  361. try {
  362. $genFile = $this->_directory->getFileObject('segments.gen', false);
  363. } catch (Zend_Search_Lucene_Exception $e) {
  364. if (strpos($e->getMessage(), 'is not readable') !== false) {
  365. $genFile = $this->_directory->createFile('segments.gen');
  366. } else {
  367. throw $e;
  368. }
  369. }
  370. $genFile->writeInt((int)0xFFFFFFFE);
  371. // Write generation (first copy)
  372. $genFile->writeLong($generation);
  373. try {
  374. // Write format marker
  375. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
  376. $newSegmentFile->writeInt((int)0xFFFFFFFD);
  377. } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  378. $newSegmentFile->writeInt((int)0xFFFFFFFC);
  379. }
  380. // Read src file format identifier
  381. $format = $segmentsFile->readInt();
  382. if ($format == (int)0xFFFFFFFF) {
  383. $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
  384. } else if ($format == (int)0xFFFFFFFD) {
  385. $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
  386. } else if ($format == (int)0xFFFFFFFC) {
  387. $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
  388. } else {
  389. throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
  390. }
  391. // $version = $segmentsFile->readLong() + $this->_versionUpdate;
  392. // Process version on 32-bit platforms
  393. $versionHigh = $segmentsFile->readInt();
  394. $versionLow = $segmentsFile->readInt();
  395. $version = $versionHigh * ((double)0xFFFFFFFF + 1) +
  396. (($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
  397. $version += $this->_versionUpdate;
  398. $this->_versionUpdate = 0;
  399. $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
  400. $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
  401. // Write segment name counter
  402. $newSegmentFile->writeInt($segmentsFile->readInt());
  403. // Get number of segments offset
  404. $numOfSegmentsOffset = $newSegmentFile->tell();
  405. // Write dummy data (segment counter)
  406. $newSegmentFile->writeInt(0);
  407. // Read number of segemnts
  408. $segmentsCount = $segmentsFile->readInt();
  409. $segments = array();
  410. for ($count = 0; $count < $segmentsCount; $count++) {
  411. $segName = $segmentsFile->readString();
  412. $segSize = $segmentsFile->readInt();
  413. if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
  414. // pre-2.1 index format
  415. $delGenHigh = 0;
  416. $delGenLow = 0;
  417. $hasSingleNormFile = false;
  418. $numField = (int)0xFFFFFFFF;
  419. $isCompoundByte = 0;
  420. $docStoreOptions = null;
  421. } else {
  422. //$delGen = $segmentsFile->readLong();
  423. $delGenHigh = $segmentsFile->readInt();
  424. $delGenLow = $segmentsFile->readInt();
  425. if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
  426. $docStoreOffset = $segmentsFile->readInt();
  427. if ($docStoreOffset != -1) {
  428. $docStoreSegment = $segmentsFile->readString();
  429. $docStoreIsCompoundFile = $segmentsFile->readByte();
  430. $docStoreOptions = array('offset' => $docStoreOffset,
  431. 'segment' => $docStoreSegment,
  432. 'isCompound' => ($docStoreIsCompoundFile == 1));
  433. } else {
  434. $docStoreOptions = null;
  435. }
  436. } else {
  437. $docStoreOptions = null;
  438. }
  439. $hasSingleNormFile = $segmentsFile->readByte();
  440. $numField = $segmentsFile->readInt();
  441. $normGens = array();
  442. if ($numField != (int)0xFFFFFFFF) {
  443. for ($count1 = 0; $count1 < $numField; $count1++) {
  444. $normGens[] = $segmentsFile->readLong();
  445. }
  446. }
  447. $isCompoundByte = $segmentsFile->readByte();
  448. }
  449. if (!in_array($segName, $this->_segmentsToDelete)) {
  450. // Load segment if necessary
  451. if (!isset($this->_segmentInfos[$segName])) {
  452. if (PHP_INT_SIZE > 4) {
  453. // 64-bit system
  454. $delGen = $delGenHigh << 32 |
  455. $delGenLow;
  456. } else {
  457. $delGen = $delGenHigh * ((double)0xFFFFFFFF + 1) +
  458. (($delGenLow < 0)? (double)0xFFFFFFFF - (-1 - $delGenLow) : $delGenLow);
  459. }
  460. if ($isCompoundByte == 0xFF) {
  461. // The segment is not a compound file
  462. $isCompound = false;
  463. } else if ($isCompoundByte == 0x00) {
  464. // The status is unknown
  465. $isCompound = null;
  466. } else if ($isCompoundByte == 0x01) {
  467. // The segment is a compound file
  468. $isCompound = true;
  469. }
  470. $this->_segmentInfos[$segName] =
  471. new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
  472. $segName,
  473. $segSize,
  474. $delGen,
  475. $docStoreOptions,
  476. $hasSingleNormFile,
  477. $isCompound);
  478. } else {
  479. // Retrieve actual deletions file generation number
  480. $delGen = $this->_segmentInfos[$segName]->getDelGen();
  481. if ($delGen >= 0) {
  482. if (PHP_INT_SIZE > 4) {
  483. // 64-bit system
  484. $delGenHigh = $delGen >> 32 & 0xFFFFFFFF;
  485. $delGenLow = $delGen & 0xFFFFFFFF;
  486. } else {
  487. $delGenHigh = (int)($delGen/((double)0xFFFFFFFF + 1));
  488. $delGenLow =(int)($delGen & 0xFFFFFFFF);
  489. }
  490. } else {
  491. $delGenHigh = $delGenLow = (int)0xFFFFFFFF;
  492. }
  493. }
  494. $newSegmentFile->writeString($segName);
  495. $newSegmentFile->writeInt($segSize);
  496. $newSegmentFile->writeInt($delGenHigh);
  497. $newSegmentFile->writeInt($delGenLow);
  498. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  499. if ($docStoreOptions !== null) {
  500. $newSegmentFile->writeInt($docStoreOffset);
  501. $newSegmentFile->writeString($docStoreSegment);
  502. $newSegmentFile->writeByte($docStoreIsCompoundFile);
  503. } else {
  504. // Set DocStoreOffset to -1
  505. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  506. }
  507. } else if ($docStoreOptions !== null) {
  508. // Release index write lock
  509. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  510. throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
  511. }
  512. $newSegmentFile->writeByte($hasSingleNormFile);
  513. $newSegmentFile->writeInt($numField);
  514. if ($numField != (int)0xFFFFFFFF) {
  515. foreach ($normGens as $normGen) {
  516. $newSegmentFile->writeLong($normGen);
  517. }
  518. }
  519. $newSegmentFile->writeByte($isCompoundByte);
  520. $segments[$segName] = $segSize;
  521. }
  522. }
  523. $segmentsFile->close();
  524. $segmentsCount = count($segments) + count($this->_newSegments);
  525. foreach ($this->_newSegments as $segName => $segmentInfo) {
  526. $newSegmentFile->writeString($segName);
  527. $newSegmentFile->writeInt($segmentInfo->count());
  528. // delete file generation: -1 (there is no delete file yet)
  529. $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
  530. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  531. // docStoreOffset: -1 (segment doesn't use shared doc store)
  532. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  533. }
  534. // HasSingleNormFile
  535. $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
  536. // NumField
  537. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  538. // IsCompoundFile
  539. $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
  540. $segments[$segmentInfo->getName()] = $segmentInfo->count();
  541. $this->_segmentInfos[$segName] = $segmentInfo;
  542. }
  543. $this->_newSegments = array();
  544. $newSegmentFile->seek($numOfSegmentsOffset);
  545. $newSegmentFile->writeInt($segmentsCount); // Update segments count
  546. $newSegmentFile->close();
  547. } catch (Exception $e) {
  548. /** Restore previous index generation */
  549. $generation--;
  550. $genFile->seek(4, SEEK_SET);
  551. // Write generation number twice
  552. $genFile->writeLong($generation); $genFile->writeLong($generation);
  553. // Release index write lock
  554. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  555. // Throw the exception
  556. throw $e;
  557. }
  558. // Write generation (second copy)
  559. $genFile->writeLong($generation);
  560. // Check if another update or read process is not running now
  561. // If yes, skip clean-up procedure
  562. if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
  563. /**
  564. * Clean-up directory
  565. */
  566. $filesToDelete = array();
  567. $filesTypes = array();
  568. $filesNumbers = array();
  569. // list of .del files of currently used segments
  570. // each segment can have several generations of .del files
  571. // only last should not be deleted
  572. $delFiles = array();
  573. foreach ($this->_directory->fileList() as $file) {
  574. if ($file == 'deletable') {
  575. // 'deletable' file
  576. $filesToDelete[] = $file;
  577. $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1
  578. $filesNumbers[] = 0;
  579. } else if ($file == 'segments') {
  580. // 'segments' file
  581. $filesToDelete[] = $file;
  582. $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
  583. $filesNumbers[] = 0;
  584. } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
  585. // 'segments_xxx' file
  586. // Check if it's not a just created generation file
  587. if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
  588. $filesToDelete[] = $file;
  589. $filesTypes[] = 2; // first group of files for deletions
  590. $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
  591. }
  592. } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
  593. // one of per segment files ('<segment_name>.f<decimal_number>')
  594. // Check if it's not one of the segments in the current segments set
  595. if (!isset($segments[$matches[1]])) {
  596. $filesToDelete[] = $file;
  597. $filesTypes[] = 3; // second group of files for deletions
  598. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  599. }
  600. } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
  601. // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
  602. // Check if it's not one of the segments in the current segments set
  603. if (!isset($segments[$matches[1]])) {
  604. $filesToDelete[] = $file;
  605. $filesTypes[] = 3; // second group of files for deletions
  606. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  607. } else {
  608. $segmentNumber = (int)base_convert($matches[2], 36, 10);
  609. $delGeneration = (int)base_convert($matches[4], 36, 10);
  610. if (!isset($delFiles[$segmentNumber])) {
  611. $delFiles[$segmentNumber] = array();
  612. }
  613. $delFiles[$segmentNumber][$delGeneration] = $file;
  614. }
  615. } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
  616. // one of per segment files ('<segment_name>.<ext>')
  617. $segmentName = substr($file, 0, strlen($file) - 4);
  618. // Check if it's not one of the segments in the current segments set
  619. if (!isset($segments[$segmentName]) &&
  620. ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
  621. $filesToDelete[] = $file;
  622. $filesTypes[] = 3; // second group of files for deletions
  623. $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
  624. }
  625. }
  626. }
  627. $maxGenNumber = 0;
  628. // process .del files of currently used segments
  629. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  630. ksort($delFiles[$segmentNumber], SORT_NUMERIC);
  631. array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
  632. end($delFiles[$segmentNumber]);
  633. $lastGenNumber = key($delFiles[$segmentNumber]);
  634. if ($lastGenNumber > $maxGenNumber) {
  635. $maxGenNumber = $lastGenNumber;
  636. }
  637. }
  638. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  639. foreach ($segmentDelFiles as $delGeneration => $file) {
  640. $filesToDelete[] = $file;
  641. $filesTypes[] = 4; // third group of files for deletions
  642. $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
  643. }
  644. }
  645. // Reorder files for deleting
  646. array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC,
  647. $filesNumbers, SORT_ASC, SORT_NUMERIC,
  648. $filesToDelete, SORT_ASC, SORT_STRING);
  649. foreach ($filesToDelete as $file) {
  650. try {
  651. /** Skip shared docstore segments deleting */
  652. /** @todo Process '.cfx' files to check if them are already unused */
  653. if (substr($file, strlen($file)-4) != '.cfx') {
  654. $this->_directory->deleteFile($file);
  655. }
  656. } catch (Zend_Search_Lucene_Exception $e) {
  657. if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
  658. // That's not "file is under processing or already deleted" exception
  659. // Pass it through
  660. throw $e;
  661. }
  662. }
  663. }
  664. // Return read lock into the previous state
  665. Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
  666. } else {
  667. // Only release resources if another index reader is running now
  668. foreach ($this->_segmentsToDelete as $segName) {
  669. foreach (self::$_indexExtensions as $ext) {
  670. $this->_directory->purgeFile($segName . $ext);
  671. }
  672. }
  673. }
  674. // Clean-up _segmentsToDelete container
  675. $this->_segmentsToDelete = array();
  676. // Release index write lock
  677. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  678. // Remove unused segments from segments list
  679. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  680. if (!isset($segments[$segName])) {
  681. unset($this->_segmentInfos[$segName]);
  682. }
  683. }
  684. }
  685. /**
  686. * Commit current changes
  687. */
  688. public function commit()
  689. {
  690. if ($this->_currentSegment !== null) {
  691. $newSegment = $this->_currentSegment->close();
  692. if ($newSegment !== null) {
  693. $this->_newSegments[$newSegment->getName()] = $newSegment;
  694. }
  695. $this->_currentSegment = null;
  696. }
  697. $this->_updateSegments();
  698. }
  699. /**
  700. * Merges the provided indexes into this index.
  701. *
  702. * @param array $readers
  703. * @return void
  704. */
  705. public function addIndexes($readers)
  706. {
  707. /**
  708. * @todo implementation
  709. */
  710. }
  711. /**
  712. * Merges all segments together into new one
  713. *
  714. * Returns true on success and false if another optimization or auto-optimization process
  715. * is running now
  716. *
  717. * @return boolean
  718. */
  719. public function optimize()
  720. {
  721. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  722. return false;
  723. }
  724. // Update segments list to be sure all segments are not merged yet by another process
  725. //
  726. // Segment merging functionality is concentrated in this class and surrounded
  727. // by optimization lock obtaining/releasing.
  728. // _updateSegments() refreshes segments list from the latest index generation.
  729. // So only new segments can be added to the index while we are merging some already existing
  730. // segments.
  731. // Newly added segments will be also included into the index by the _updateSegments() call
  732. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  733. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  734. $this->_updateSegments();
  735. $this->_mergeSegments($this->_segmentInfos);
  736. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  737. return true;
  738. }
  739. /**
  740. * Get name for new segment
  741. *
  742. * @return string
  743. */
  744. private function _newSegmentName()
  745. {
  746. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  747. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  748. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  749. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  750. $segmentNameCounter = $segmentsFile->readInt();
  751. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  752. $segmentsFile->writeInt($segmentNameCounter + 1);
  753. // Flash output to guarantee that wrong value will not be loaded between unlock and
  754. // return (which calls $segmentsFile destructor)
  755. $segmentsFile->flush();
  756. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  757. return '_' . base_convert($segmentNameCounter, 10, 36);
  758. }
  759. }