Writer.php 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
  22. require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
  23. /** Zend_Search_Lucene_Index_SegmentInfo */
  24. require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
  25. /** Zend_Search_Lucene_Index_SegmentMerger */
  26. require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
  27. /** Zend_Search_Lucene_LockManager */
  28. require_once 'Zend/Search/Lucene/LockManager.php';
  29. /**
  30. * @category Zend
  31. * @package Zend_Search_Lucene
  32. * @subpackage Index
  33. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. */
  36. class Zend_Search_Lucene_Index_Writer
  37. {
  38. /**
  39. * @todo Implement Analyzer substitution
  40. * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
  41. * temporary index files
  42. * @todo Directory lock processing
  43. */
  44. /**
  45. * Number of documents required before the buffered in-memory
  46. * documents are written into a new Segment
  47. *
  48. * Default value is 10
  49. *
  50. * @var integer
  51. */
  52. public $maxBufferedDocs = 10;
  53. /**
  54. * Largest number of documents ever merged by addDocument().
  55. * Small values (e.g., less than 10,000) are best for interactive indexing,
  56. * as this limits the length of pauses while indexing to a few seconds.
  57. * Larger values are best for batched indexing and speedier searches.
  58. *
  59. * Default value is PHP_INT_MAX
  60. *
  61. * @var integer
  62. */
  63. public $maxMergeDocs = PHP_INT_MAX;
  64. /**
  65. * Determines how often segment indices are merged by addDocument().
  66. *
  67. * With smaller values, less RAM is used while indexing,
  68. * and searches on unoptimized indices are faster,
  69. * but indexing speed is slower.
  70. *
  71. * With larger values, more RAM is used during indexing,
  72. * and while searches on unoptimized indices are slower,
  73. * indexing is faster.
  74. *
  75. * Thus larger values (> 10) are best for batch index creation,
  76. * and smaller values (< 10) for indices that are interactively maintained.
  77. *
  78. * Default value is 10
  79. *
  80. * @var integer
  81. */
  82. public $mergeFactor = 10;
  83. /**
  84. * File system adapter.
  85. *
  86. * @var Zend_Search_Lucene_Storage_Directory
  87. */
  88. private $_directory = null;
  89. /**
  90. * Changes counter.
  91. *
  92. * @var integer
  93. */
  94. private $_versionUpdate = 0;
  95. /**
  96. * List of the segments, created by index writer
  97. * Array of Zend_Search_Lucene_Index_SegmentInfo objects
  98. *
  99. * @var array
  100. */
  101. private $_newSegments = array();
  102. /**
  103. * List of segments to be deleted on commit
  104. *
  105. * @var array
  106. */
  107. private $_segmentsToDelete = array();
  108. /**
  109. * Current segment to add documents
  110. *
  111. * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
  112. */
  113. private $_currentSegment = null;
  114. /**
  115. * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
  116. *
  117. * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
  118. *
  119. * @var array Zend_Search_Lucene_Index_SegmentInfo
  120. */
  121. private $_segmentInfos;
  122. /**
  123. * Index target format version
  124. *
  125. * @var integer
  126. */
  127. private $_targetFormatVersion;
  128. /**
  129. * List of indexfiles extensions
  130. *
  131. * @var array
  132. */
  133. private static $_indexExtensions = array('.cfs' => '.cfs',
  134. '.cfx' => '.cfx',
  135. '.fnm' => '.fnm',
  136. '.fdx' => '.fdx',
  137. '.fdt' => '.fdt',
  138. '.tis' => '.tis',
  139. '.tii' => '.tii',
  140. '.frq' => '.frq',
  141. '.prx' => '.prx',
  142. '.tvx' => '.tvx',
  143. '.tvd' => '.tvd',
  144. '.tvf' => '.tvf',
  145. '.del' => '.del',
  146. '.sti' => '.sti' );
  147. /**
  148. * Create empty index
  149. *
  150. * @param Zend_Search_Lucene_Storage_Directory $directory
  151. * @param integer $generation
  152. * @param integer $nameCount
  153. */
  154. public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
  155. {
  156. if ($generation == 0) {
  157. // Create index in pre-2.1 mode
  158. foreach ($directory->fileList() as $file) {
  159. if ($file == 'deletable' ||
  160. $file == 'segments' ||
  161. isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
  162. preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
  163. $directory->deleteFile($file);
  164. }
  165. }
  166. $segmentsFile = $directory->createFile('segments');
  167. $segmentsFile->writeInt((int)0xFFFFFFFF);
  168. // write version (initialized by current time)
  169. $segmentsFile->writeLong(round(microtime(true)));
  170. // write name counter
  171. $segmentsFile->writeInt($nameCount);
  172. // write segment counter
  173. $segmentsFile->writeInt(0);
  174. $deletableFile = $directory->createFile('deletable');
  175. // write counter
  176. $deletableFile->writeInt(0);
  177. } else {
  178. $genFile = $directory->createFile('segments.gen');
  179. $genFile->writeInt((int)0xFFFFFFFE);
  180. // Write generation two times
  181. $genFile->writeLong($generation);
  182. $genFile->writeLong($generation);
  183. $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
  184. $segmentsFile->writeInt((int)0xFFFFFFFD);
  185. // write version (initialized by current time)
  186. $segmentsFile->writeLong(round(microtime(true)));
  187. // write name counter
  188. $segmentsFile->writeInt($nameCount);
  189. // write segment counter
  190. $segmentsFile->writeInt(0);
  191. }
  192. }
  193. /**
  194. * Open the index for writing
  195. *
  196. * @param Zend_Search_Lucene_Storage_Directory $directory
  197. * @param array $segmentInfos
  198. * @param integer $targetFormatVersion
  199. * @param Zend_Search_Lucene_Storage_File $cleanUpLock
  200. */
  201. public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
  202. {
  203. $this->_directory = $directory;
  204. $this->_segmentInfos = &$segmentInfos;
  205. $this->_targetFormatVersion = $targetFormatVersion;
  206. }
  207. /**
  208. * Adds a document to this index.
  209. *
  210. * @param Zend_Search_Lucene_Document $document
  211. */
  212. public function addDocument(Zend_Search_Lucene_Document $document)
  213. {
  214. if ($this->_currentSegment === null) {
  215. $this->_currentSegment =
  216. new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
  217. }
  218. $this->_currentSegment->addDocument($document);
  219. if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
  220. $this->commit();
  221. }
  222. $this->_maybeMergeSegments();
  223. $this->_versionUpdate++;
  224. }
  225. /**
  226. * Check if we have anything to merge
  227. *
  228. * @return boolean
  229. */
  230. private function _hasAnythingToMerge()
  231. {
  232. $segmentSizes = array();
  233. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  234. $segmentSizes[$segName] = $segmentInfo->count();
  235. }
  236. $mergePool = array();
  237. $poolSize = 0;
  238. $sizeToMerge = $this->maxBufferedDocs;
  239. asort($segmentSizes, SORT_NUMERIC);
  240. foreach ($segmentSizes as $segName => $size) {
  241. // Check, if segment comes into a new merging block
  242. while ($size >= $sizeToMerge) {
  243. // Merge previous block if it's large enough
  244. if ($poolSize >= $sizeToMerge) {
  245. return true;
  246. }
  247. $mergePool = array();
  248. $poolSize = 0;
  249. $sizeToMerge *= $this->mergeFactor;
  250. if ($sizeToMerge > $this->maxMergeDocs) {
  251. return false;
  252. }
  253. }
  254. $mergePool[] = $this->_segmentInfos[$segName];
  255. $poolSize += $size;
  256. }
  257. if ($poolSize >= $sizeToMerge) {
  258. return true;
  259. }
  260. return false;
  261. }
  262. /**
  263. * Merge segments if necessary
  264. */
  265. private function _maybeMergeSegments()
  266. {
  267. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  268. return;
  269. }
  270. if (!$this->_hasAnythingToMerge()) {
  271. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  272. return;
  273. }
  274. // Update segments list to be sure all segments are not merged yet by another process
  275. //
  276. // Segment merging functionality is concentrated in this class and surrounded
  277. // by optimization lock obtaining/releasing.
  278. // _updateSegments() refreshes segments list from the latest index generation.
  279. // So only new segments can be added to the index while we are merging some already existing
  280. // segments.
  281. // Newly added segments will be also included into the index by the _updateSegments() call
  282. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  283. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  284. $this->_updateSegments();
  285. // Perform standard auto-optimization procedure
  286. $segmentSizes = array();
  287. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  288. $segmentSizes[$segName] = $segmentInfo->count();
  289. }
  290. $mergePool = array();
  291. $poolSize = 0;
  292. $sizeToMerge = $this->maxBufferedDocs;
  293. asort($segmentSizes, SORT_NUMERIC);
  294. foreach ($segmentSizes as $segName => $size) {
  295. // Check, if segment comes into a new merging block
  296. while ($size >= $sizeToMerge) {
  297. // Merge previous block if it's large enough
  298. if ($poolSize >= $sizeToMerge) {
  299. $this->_mergeSegments($mergePool);
  300. }
  301. $mergePool = array();
  302. $poolSize = 0;
  303. $sizeToMerge *= $this->mergeFactor;
  304. if ($sizeToMerge > $this->maxMergeDocs) {
  305. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  306. return;
  307. }
  308. }
  309. $mergePool[] = $this->_segmentInfos[$segName];
  310. $poolSize += $size;
  311. }
  312. if ($poolSize >= $sizeToMerge) {
  313. $this->_mergeSegments($mergePool);
  314. }
  315. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  316. }
  317. /**
  318. * Merge specified segments
  319. *
  320. * $segments is an array of SegmentInfo objects
  321. *
  322. * @param array $segments
  323. */
  324. private function _mergeSegments($segments)
  325. {
  326. $newName = $this->_newSegmentName();
  327. $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
  328. $newName);
  329. foreach ($segments as $segmentInfo) {
  330. $merger->addSource($segmentInfo);
  331. $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
  332. }
  333. $newSegment = $merger->merge();
  334. if ($newSegment !== null) {
  335. $this->_newSegments[$newSegment->getName()] = $newSegment;
  336. }
  337. $this->commit();
  338. }
  339. /**
  340. * Update segments file by adding current segment to a list
  341. *
  342. * @throws Zend_Search_Lucene_Exception
  343. */
  344. private function _updateSegments()
  345. {
  346. // Get an exclusive index lock
  347. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  348. // Write down changes for the segments
  349. foreach ($this->_segmentInfos as $segInfo) {
  350. $segInfo->writeChanges();
  351. }
  352. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  353. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  354. $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
  355. try {
  356. $genFile = $this->_directory->getFileObject('segments.gen', false);
  357. } catch (Zend_Search_Lucene_Exception $e) {
  358. if (strpos($e->getMessage(), 'is not readable') !== false) {
  359. $genFile = $this->_directory->createFile('segments.gen');
  360. } else {
  361. throw $e;
  362. }
  363. }
  364. $genFile->writeInt((int)0xFFFFFFFE);
  365. // Write generation (first copy)
  366. $genFile->writeLong($generation);
  367. try {
  368. // Write format marker
  369. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
  370. $newSegmentFile->writeInt((int)0xFFFFFFFD);
  371. } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  372. $newSegmentFile->writeInt((int)0xFFFFFFFC);
  373. }
  374. // Read src file format identifier
  375. $format = $segmentsFile->readInt();
  376. if ($format == (int)0xFFFFFFFF) {
  377. $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
  378. } else if ($format == (int)0xFFFFFFFD) {
  379. $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
  380. } else if ($format == (int)0xFFFFFFFC) {
  381. $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
  382. } else {
  383. throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
  384. }
  385. $version = $segmentsFile->readLong() + $this->_versionUpdate;
  386. $this->_versionUpdate = 0;
  387. $newSegmentFile->writeLong($version);
  388. // Write segment name counter
  389. $newSegmentFile->writeInt($segmentsFile->readInt());
  390. // Get number of segments offset
  391. $numOfSegmentsOffset = $newSegmentFile->tell();
  392. // Write dummy data (segment counter)
  393. $newSegmentFile->writeInt(0);
  394. // Read number of segemnts
  395. $segmentsCount = $segmentsFile->readInt();
  396. $segments = array();
  397. for ($count = 0; $count < $segmentsCount; $count++) {
  398. $segName = $segmentsFile->readString();
  399. $segSize = $segmentsFile->readInt();
  400. if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
  401. // pre-2.1 index format
  402. $delGen = 0;
  403. $hasSingleNormFile = false;
  404. $numField = (int)0xFFFFFFFF;
  405. $isCompoundByte = 0;
  406. $docStoreOptions = null;
  407. } else {
  408. $delGen = $segmentsFile->readLong();
  409. if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
  410. $docStoreOffset = $segmentsFile->readInt();
  411. if ($docStoreOffset != (int)0xFFFFFFFF) {
  412. $docStoreSegment = $segmentsFile->readString();
  413. $docStoreIsCompoundFile = $segmentsFile->readByte();
  414. $docStoreOptions = array('offset' => $docStoreOffset,
  415. 'segment' => $docStoreSegment,
  416. 'isCompound' => ($docStoreIsCompoundFile == 1));
  417. } else {
  418. $docStoreOptions = null;
  419. }
  420. } else {
  421. $docStoreOptions = null;
  422. }
  423. $hasSingleNormFile = $segmentsFile->readByte();
  424. $numField = $segmentsFile->readInt();
  425. $normGens = array();
  426. if ($numField != (int)0xFFFFFFFF) {
  427. for ($count1 = 0; $count1 < $numField; $count1++) {
  428. $normGens[] = $segmentsFile->readLong();
  429. }
  430. }
  431. $isCompoundByte = $segmentsFile->readByte();
  432. }
  433. if (!in_array($segName, $this->_segmentsToDelete)) {
  434. // Load segment if necessary
  435. if (!isset($this->_segmentInfos[$segName])) {
  436. if ($isCompoundByte == 0xFF) {
  437. // The segment is not a compound file
  438. $isCompound = false;
  439. } else if ($isCompoundByte == 0x00) {
  440. // The status is unknown
  441. $isCompound = null;
  442. } else if ($isCompoundByte == 0x01) {
  443. // The segment is a compound file
  444. $isCompound = true;
  445. }
  446. $this->_segmentInfos[$segName] =
  447. new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
  448. $segName,
  449. $segSize,
  450. $delGen,
  451. $docStoreOptions,
  452. $hasSingleNormFile,
  453. $isCompound);
  454. } else {
  455. // Retrieve actual deletions file generation number
  456. $delGen = $this->_segmentInfos[$segName]->getDelGen();
  457. }
  458. $newSegmentFile->writeString($segName);
  459. $newSegmentFile->writeInt($segSize);
  460. $newSegmentFile->writeLong($delGen);
  461. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  462. if ($docStoreOptions !== null) {
  463. $newSegmentFile->writeInt($docStoreOffset);
  464. $newSegmentFile->writeString($docStoreSegment);
  465. $newSegmentFile->writeByte($docStoreIsCompoundFile);
  466. } else {
  467. // Set DocStoreOffset to -1
  468. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  469. }
  470. } else if ($docStoreOptions !== null) {
  471. // Release index write lock
  472. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  473. throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
  474. }
  475. $newSegmentFile->writeByte($hasSingleNormFile);
  476. $newSegmentFile->writeInt($numField);
  477. if ($numField != (int)0xFFFFFFFF) {
  478. foreach ($normGens as $normGen) {
  479. $newSegmentFile->writeLong($normGen);
  480. }
  481. }
  482. $newSegmentFile->writeByte($isCompoundByte);
  483. $segments[$segName] = $segSize;
  484. }
  485. }
  486. $segmentsFile->close();
  487. $segmentsCount = count($segments) + count($this->_newSegments);
  488. foreach ($this->_newSegments as $segName => $segmentInfo) {
  489. $newSegmentFile->writeString($segName);
  490. $newSegmentFile->writeInt($segmentInfo->count());
  491. // delete file generation: -1 (there is no delete file yet)
  492. $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
  493. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  494. // docStoreOffset: -1 (segment doesn't use shared doc store)
  495. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  496. }
  497. // HasSingleNormFile
  498. $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
  499. // NumField
  500. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  501. // IsCompoundFile
  502. $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
  503. $segments[$segmentInfo->getName()] = $segmentInfo->count();
  504. $this->_segmentInfos[$segName] = $segmentInfo;
  505. }
  506. $this->_newSegments = array();
  507. $newSegmentFile->seek($numOfSegmentsOffset);
  508. $newSegmentFile->writeInt($segmentsCount); // Update segments count
  509. $newSegmentFile->close();
  510. } catch (Exception $e) {
  511. /** Restore previous index generation */
  512. $generation--;
  513. $genFile->seek(4, SEEK_SET);
  514. // Write generation number twice
  515. $genFile->writeLong($generation); $genFile->writeLong($generation);
  516. // Release index write lock
  517. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  518. // Throw the exception
  519. throw $e;
  520. }
  521. // Write generation (second copy)
  522. $genFile->writeLong($generation);
  523. // Check if another update or read process is not running now
  524. // If yes, skip clean-up procedure
  525. if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
  526. /**
  527. * Clean-up directory
  528. */
  529. $filesToDelete = array();
  530. $filesTypes = array();
  531. $filesNumbers = array();
  532. // list of .del files of currently used segments
  533. // each segment can have several generations of .del files
  534. // only last should not be deleted
  535. $delFiles = array();
  536. foreach ($this->_directory->fileList() as $file) {
  537. if ($file == 'deletable') {
  538. // 'deletable' file
  539. $filesToDelete[] = $file;
  540. $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1
  541. $filesNumbers[] = 0;
  542. } else if ($file == 'segments') {
  543. // 'segments' file
  544. $filesToDelete[] = $file;
  545. $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
  546. $filesNumbers[] = 0;
  547. } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
  548. // 'segments_xxx' file
  549. // Check if it's not a just created generation file
  550. if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
  551. $filesToDelete[] = $file;
  552. $filesTypes[] = 2; // first group of files for deletions
  553. $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
  554. }
  555. } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
  556. // one of per segment files ('<segment_name>.f<decimal_number>')
  557. // Check if it's not one of the segments in the current segments set
  558. if (!isset($segments[$matches[1]])) {
  559. $filesToDelete[] = $file;
  560. $filesTypes[] = 3; // second group of files for deletions
  561. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  562. }
  563. } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
  564. // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
  565. // Check if it's not one of the segments in the current segments set
  566. if (!isset($segments[$matches[1]])) {
  567. $filesToDelete[] = $file;
  568. $filesTypes[] = 3; // second group of files for deletions
  569. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  570. } else {
  571. $segmentNumber = (int)base_convert($matches[2], 36, 10);
  572. $delGeneration = (int)base_convert($matches[4], 36, 10);
  573. if (!isset($delFiles[$segmentNumber])) {
  574. $delFiles[$segmentNumber] = array();
  575. }
  576. $delFiles[$segmentNumber][$delGeneration] = $file;
  577. }
  578. } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
  579. // one of per segment files ('<segment_name>.<ext>')
  580. $segmentName = substr($file, 0, strlen($file) - 4);
  581. // Check if it's not one of the segments in the current segments set
  582. if (!isset($segments[$segmentName]) &&
  583. ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
  584. $filesToDelete[] = $file;
  585. $filesTypes[] = 3; // second group of files for deletions
  586. $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
  587. }
  588. }
  589. }
  590. $maxGenNumber = 0;
  591. // process .del files of currently used segments
  592. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  593. ksort($delFiles[$segmentNumber], SORT_NUMERIC);
  594. array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
  595. end($delFiles[$segmentNumber]);
  596. $lastGenNumber = key($delFiles[$segmentNumber]);
  597. if ($lastGenNumber > $maxGenNumber) {
  598. $maxGenNumber = $lastGenNumber;
  599. }
  600. }
  601. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  602. foreach ($segmentDelFiles as $delGeneration => $file) {
  603. $filesToDelete[] = $file;
  604. $filesTypes[] = 4; // third group of files for deletions
  605. $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
  606. }
  607. }
  608. // Reorder files for deleting
  609. array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC,
  610. $filesNumbers, SORT_ASC, SORT_NUMERIC,
  611. $filesToDelete, SORT_ASC, SORT_STRING);
  612. foreach ($filesToDelete as $file) {
  613. try {
  614. /** Skip shared docstore segments deleting */
  615. /** @todo Process '.cfx' files to check if them are already unused */
  616. if (substr($file, strlen($file)-4) != '.cfx') {
  617. $this->_directory->deleteFile($file);
  618. }
  619. } catch (Zend_Search_Lucene_Exception $e) {
  620. if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
  621. // That's not "file is under processing or already deleted" exception
  622. // Pass it through
  623. throw $e;
  624. }
  625. }
  626. }
  627. // Return read lock into the previous state
  628. Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
  629. } else {
  630. // Only release resources if another index reader is running now
  631. foreach ($this->_segmentsToDelete as $segName) {
  632. foreach (self::$_indexExtensions as $ext) {
  633. $this->_directory->purgeFile($segName . $ext);
  634. }
  635. }
  636. }
  637. // Clean-up _segmentsToDelete container
  638. $this->_segmentsToDelete = array();
  639. // Release index write lock
  640. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  641. // Remove unused segments from segments list
  642. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  643. if (!isset($segments[$segName])) {
  644. unset($this->_segmentInfos[$segName]);
  645. }
  646. }
  647. }
  648. /**
  649. * Commit current changes
  650. */
  651. public function commit()
  652. {
  653. if ($this->_currentSegment !== null) {
  654. $newSegment = $this->_currentSegment->close();
  655. if ($newSegment !== null) {
  656. $this->_newSegments[$newSegment->getName()] = $newSegment;
  657. }
  658. $this->_currentSegment = null;
  659. }
  660. $this->_updateSegments();
  661. }
  662. /**
  663. * Merges the provided indexes into this index.
  664. *
  665. * @param array $readers
  666. * @return void
  667. */
  668. public function addIndexes($readers)
  669. {
  670. /**
  671. * @todo implementation
  672. */
  673. }
  674. /**
  675. * Merges all segments together into new one
  676. *
  677. * Returns true on success and false if another optimization or auto-optimization process
  678. * is running now
  679. *
  680. * @return boolean
  681. */
  682. public function optimize()
  683. {
  684. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  685. return false;
  686. }
  687. // Update segments list to be sure all segments are not merged yet by another process
  688. //
  689. // Segment merging functionality is concentrated in this class and surrounded
  690. // by optimization lock obtaining/releasing.
  691. // _updateSegments() refreshes segments list from the latest index generation.
  692. // So only new segments can be added to the index while we are merging some already existing
  693. // segments.
  694. // Newly added segments will be also included into the index by the _updateSegments() call
  695. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  696. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  697. $this->_updateSegments();
  698. $this->_mergeSegments($this->_segmentInfos);
  699. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  700. return true;
  701. }
  702. /**
  703. * Get name for new segment
  704. *
  705. * @return string
  706. */
  707. private function _newSegmentName()
  708. {
  709. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  710. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  711. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  712. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  713. $segmentNameCounter = $segmentsFile->readInt();
  714. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  715. $segmentsFile->writeInt($segmentNameCounter + 1);
  716. // Flash output to guarantee that wrong value will not be loaded between unlock and
  717. // return (which calls $segmentsFile destructor)
  718. $segmentsFile->flush();
  719. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  720. return '_' . base_convert($segmentNameCounter, 10, 36);
  721. }
  722. }