Pārlūkot izejas kodu

Zend_Search_Lucene: improved 32-bit systems support. Closes [ZF-4113], [ZF-4071] and [ZF-5141].

git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@16167 44c647ce-9c0f-0410-b52a-842ac1e357ba
alexander 16 gadi atpakaļ
vecāks
revīzija
43d512dba4

+ 4 - 13
library/Zend/Search/Lucene.php

@@ -351,8 +351,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
         }
         }
 
 
         // read version
         // read version
-        // $segmentsFile->readLong();
-        $segmentsFile->readInt(); $segmentsFile->readInt();
+        $segmentsFile->readLong();
 
 
         // read segment name counter
         // read segment name counter
         $segmentsFile->readInt();
         $segmentsFile->readInt();
@@ -398,8 +397,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
         }
         }
 
 
         // read version
         // read version
-        // $segmentsFile->readLong();
-        $segmentsFile->readInt(); $segmentsFile->readInt();
+        $segmentsFile->readLong();
 
 
         // read segment name counter
         // read segment name counter
         $segmentsFile->readInt();
         $segmentsFile->readInt();
@@ -414,19 +412,12 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
             $segSize = $segmentsFile->readInt();
             $segSize = $segmentsFile->readInt();
 
 
             // 2.1+ specific properties
             // 2.1+ specific properties
-            //$delGen          = $segmentsFile->readLong();
-            $delGenHigh        = $segmentsFile->readInt();
-            $delGenLow         = $segmentsFile->readInt();
-            if ($delGenHigh == (int)0xFFFFFFFF  && $delGenLow == (int)0xFFFFFFFF) {
-                $delGen = -1; // There are no deletes
-            } else {
-                $delGen = ($delGenHigh << 32) | $delGenLow;
-            }
+            $delGen = $segmentsFile->readLong();
 
 
             if ($this->_formatVersion == self::FORMAT_2_3) {
             if ($this->_formatVersion == self::FORMAT_2_3) {
                 $docStoreOffset = $segmentsFile->readInt();
                 $docStoreOffset = $segmentsFile->readInt();
 
 
-                if ($docStoreOffset != -1) {
+                if ($docStoreOffset != (int)0xFFFFFFFF) {
                     $docStoreSegment        = $segmentsFile->readString();
                     $docStoreSegment        = $segmentsFile->readString();
                     $docStoreIsCompoundFile = $segmentsFile->readByte();
                     $docStoreIsCompoundFile = $segmentsFile->readByte();
 
 

+ 4 - 2
library/Zend/Search/Lucene/Index/DictionaryLoader.php

@@ -254,12 +254,14 @@ class Zend_Search_Lucene_Index_DictionaryLoader
         if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
         if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
             require_once 'Zend/Search/Lucene/Exception.php';
             require_once 'Zend/Search/Lucene/Exception.php';
             throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
             throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
-        } else if (PHP_INT_SIZE > 4){
+        }
+
+        if (PHP_INT_SIZE > 4) {
             // Treat 64-bit 0xFFFFFFFF as -1
             // Treat 64-bit 0xFFFFFFFF as -1
             $termDictionary[0][0] = -1;
             $termDictionary[0][0] = -1;
         }
         }
 
 
-        return array(&$termDictionary, &$termInfos);
+        return array($termDictionary, $termInfos);
     }
     }
 }
 }
 
 

+ 10 - 48
library/Zend/Search/Lucene/Index/Writer.php

@@ -189,11 +189,8 @@ class Zend_Search_Lucene_Index_Writer
             $segmentsFile = $directory->createFile('segments');
             $segmentsFile = $directory->createFile('segments');
             $segmentsFile->writeInt((int)0xFFFFFFFF);
             $segmentsFile->writeInt((int)0xFFFFFFFF);
 
 
-            // write version (is initialized by current time
-            // $segmentsFile->writeLong((int)microtime(true));
-            $version = microtime(true);
-            $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
-            $segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
+            // write version (initialized by current time)
+            $segmentsFile->writeLong(round(microtime(true)));
 
 
             // write name counter
             // write name counter
             $segmentsFile->writeInt($nameCount);
             $segmentsFile->writeInt($nameCount);
@@ -214,11 +211,8 @@ class Zend_Search_Lucene_Index_Writer
             $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
             $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
             $segmentsFile->writeInt((int)0xFFFFFFFD);
             $segmentsFile->writeInt((int)0xFFFFFFFD);
 
 
-            // write version (is initialized by current time
-            // $segmentsFile->writeLong((int)microtime(true));
-            $version = microtime(true);
-            $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
-            $segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
+            // write version (initialized by current time)
+            $segmentsFile->writeLong(round(microtime(true)));
 
 
             // write name counter
             // write name counter
             $segmentsFile->writeInt($nameCount);
             $segmentsFile->writeInt($nameCount);
@@ -453,16 +447,9 @@ class Zend_Search_Lucene_Index_Writer
                 throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
                 throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
             }
             }
 
 
-            // $version = $segmentsFile->readLong() + $this->_versionUpdate;
-            // Process version on 32-bit platforms
-            $versionHigh = $segmentsFile->readInt();
-            $versionLow  = $segmentsFile->readInt();
-            $version = $versionHigh * ((double)0xFFFFFFFF + 1) +
-                       (($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
-            $version += $this->_versionUpdate;
+            $version = $segmentsFile->readLong() + $this->_versionUpdate;
             $this->_versionUpdate = 0;
             $this->_versionUpdate = 0;
-            $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
-            $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
+            $newSegmentFile->writeLong($version);
 
 
             // Write segment name counter
             // Write segment name counter
             $newSegmentFile->writeInt($segmentsFile->readInt());
             $newSegmentFile->writeInt($segmentsFile->readInt());
@@ -482,21 +469,18 @@ class Zend_Search_Lucene_Index_Writer
 
 
                 if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
                 if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
                     // pre-2.1 index format
                     // pre-2.1 index format
-                    $delGenHigh        = 0;
-                    $delGenLow         = 0;
+                    $delGen            = 0;
                     $hasSingleNormFile = false;
                     $hasSingleNormFile = false;
                     $numField          = (int)0xFFFFFFFF;
                     $numField          = (int)0xFFFFFFFF;
                     $isCompoundByte    = 0;
                     $isCompoundByte    = 0;
                     $docStoreOptions   = null;
                     $docStoreOptions   = null;
                 } else {
                 } else {
-                    //$delGen          = $segmentsFile->readLong();
-                    $delGenHigh        = $segmentsFile->readInt();
-                    $delGenLow         = $segmentsFile->readInt();
+                    $delGen = $segmentsFile->readLong();
 
 
                     if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
                     if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
                         $docStoreOffset = $segmentsFile->readInt();
                         $docStoreOffset = $segmentsFile->readInt();
 
 
-                        if ($docStoreOffset != -1) {
+                        if ($docStoreOffset != (int)0xFFFFFFFF) {
                             $docStoreSegment        = $segmentsFile->readString();
                             $docStoreSegment        = $segmentsFile->readString();
                             $docStoreIsCompoundFile = $segmentsFile->readByte();
                             $docStoreIsCompoundFile = $segmentsFile->readByte();
 
 
@@ -525,14 +509,6 @@ class Zend_Search_Lucene_Index_Writer
                 if (!in_array($segName, $this->_segmentsToDelete)) {
                 if (!in_array($segName, $this->_segmentsToDelete)) {
                     // Load segment if necessary
                     // Load segment if necessary
                     if (!isset($this->_segmentInfos[$segName])) {
                     if (!isset($this->_segmentInfos[$segName])) {
-                        if (PHP_INT_SIZE > 4) {
-                        	// 64-bit system
-                        	$delGen = $delGenHigh << 32  |
-                        	          $delGenLow;
-                        } else {
-                        	$delGen = $delGenHigh * ((double)0xFFFFFFFF + 1) +
-                                         (($delGenLow < 0)? (double)0xFFFFFFFF - (-1 - $delGenLow) : $delGenLow);
-                        }
                         if ($isCompoundByte == 0xFF) {
                         if ($isCompoundByte == 0xFF) {
                             // The segment is not a compound file
                             // The segment is not a compound file
                             $isCompound = false;
                             $isCompound = false;
@@ -555,25 +531,11 @@ class Zend_Search_Lucene_Index_Writer
                     } else {
                     } else {
                         // Retrieve actual deletions file generation number
                         // Retrieve actual deletions file generation number
                         $delGen = $this->_segmentInfos[$segName]->getDelGen();
                         $delGen = $this->_segmentInfos[$segName]->getDelGen();
-
-                        if ($delGen >= 0) {
-                            if (PHP_INT_SIZE > 4) {
-                                // 64-bit system
-                                $delGenHigh = $delGen >> 32  & 0xFFFFFFFF;
-                                $delGenLow  = $delGen        & 0xFFFFFFFF;
-                            } else {
-                                $delGenHigh = (int)($delGen/((double)0xFFFFFFFF + 1));
-                                $delGenLow  =(int)($delGen & 0xFFFFFFFF);
-                            }
-                        } else {
-                            $delGenHigh = $delGenLow = (int)0xFFFFFFFF;
-                        }
                     }
                     }
 
 
                     $newSegmentFile->writeString($segName);
                     $newSegmentFile->writeString($segName);
                     $newSegmentFile->writeInt($segSize);
                     $newSegmentFile->writeInt($segSize);
-                    $newSegmentFile->writeInt($delGenHigh);
-                    $newSegmentFile->writeInt($delGenLow);
+                    $newSegmentFile->writeLong($delGen);
                     if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                     if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                         if ($docStoreOptions !== null) {
                         if ($docStoreOptions !== null) {
                             $newSegmentFile->writeInt($docStoreOffset);
                             $newSegmentFile->writeInt($docStoreOffset);

+ 71 - 23
library/Zend/Search/Lucene/Storage/File.php

@@ -178,18 +178,18 @@ abstract class Zend_Search_Lucene_Storage_File
      * Returns a long integer from the current position in the file
      * Returns a long integer from the current position in the file
      * and advances the file pointer.
      * and advances the file pointer.
      *
      *
-     * @return integer
+     * @return integer|float
      * @throws Zend_Search_Lucene_Exception
      * @throws Zend_Search_Lucene_Exception
      */
      */
     public function readLong()
     public function readLong()
     {
     {
-        $str = $this->_fread(8);
-
         /**
         /**
          * Check, that we work in 64-bit mode.
          * Check, that we work in 64-bit mode.
          * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
          * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
          */
          */
         if (PHP_INT_SIZE > 4) {
         if (PHP_INT_SIZE > 4) {
+            $str = $this->_fread(8);
+
             return  ord($str[0]) << 56  |
             return  ord($str[0]) << 56  |
                     ord($str[1]) << 48  |
                     ord($str[1]) << 48  |
                     ord($str[2]) << 40  |
                     ord($str[2]) << 40  |
@@ -199,19 +199,7 @@ abstract class Zend_Search_Lucene_Storage_File
                     ord($str[6]) << 8   |
                     ord($str[6]) << 8   |
                     ord($str[7]);
                     ord($str[7]);
         } else {
         } else {
-            if ((ord($str[0])          != 0) ||
-                (ord($str[1])          != 0) ||
-                (ord($str[2])          != 0) ||
-                (ord($str[3])          != 0) ||
-                ((ord($str[0]) & 0x80) != 0)) {
-                require_once 'Zend/Search/Lucene/Exception.php';
-                throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
-            }
-
-            return  ord($str[4]) << 24  |
-                    ord($str[5]) << 16  |
-                    ord($str[6]) << 8   |
-                    ord($str[7]);
+            return $this->readLong32Bit();
         }
         }
     }
     }
 
 
@@ -238,20 +226,80 @@ abstract class Zend_Search_Lucene_Storage_File
                             chr($value>>8  & 0xFF) .
                             chr($value>>8  & 0xFF) .
                             chr($value     & 0xFF),   8  );
                             chr($value     & 0xFF),   8  );
         } else {
         } else {
-            if ($value > 0x7FFFFFFF) {
+            $this->writeLong32Bit($value);
+        }
+    }
+
+
+    /**
+     * Returns a long integer from the current position in the file,
+     * advances the file pointer and return it as float (for 32-bit platforms).
+     *
+     * @return integer|float
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function readLong32Bit()
+    {
+        $wordHigh = $this->readInt();
+        $wordLow  = $this->readInt();
+
+        if ($wordHigh & (int)0x80000000) {
+            // It's a negative value since the highest bit is set
+            if ($wordHigh == (int)0xFFFFFFFF  &&  ($wordLow & (int)0x80000000)) {
+                return $wordLow;
+            } else {
                 require_once 'Zend/Search/Lucene/Exception.php';
                 require_once 'Zend/Search/Lucene/Exception.php';
-                throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+                throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
             }
             }
 
 
-            $this->_fwrite( "\x00\x00\x00\x00"     .
-                            chr($value>>24 & 0xFF) .
-                            chr($value>>16 & 0xFF) .
-                            chr($value>>8  & 0xFF) .
-                            chr($value     & 0xFF),   8  );
         }
         }
+
+        if ($wordLow < 0) {
+            // Value is large than 0x7FFF FFFF. Represent low word as float.
+            $wordLow &= 0x7FFFFFFF;
+            $wordLow += (float)0x80000000;
+        }
+
+        if ($wordHigh == 0) {
+            // Return value as integer if possible
+            return $wordLow;
+        }
+
+        return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
     }
     }
 
 
 
 
+    /**
+     * Writes long integer to the end of file (32-bit platforms implementation)
+     *
+     * @param integer|float $value
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function writeLong32Bit($value)
+    {
+        if ($value < (int)0x80000000) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
+        }
+
+        if ($value < 0) {
+            $wordHigh = (int)0xFFFFFFFF;
+            $wordLow  = (int)$value;
+        } else {
+            $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
+            $wordLow  = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
+
+            if ($wordLow > 0x7FFFFFFF) {
+                // Highest bit of low word is set. Translate it to the corresponding negative integer value
+                $wordLow -= 0x80000000;
+                $wordLow |= 0x80000000;
+            }
+        }
+
+        $this->writeInt($wordHigh);
+        $this->writeInt($wordLow);
+    }
+
 
 
     /**
     /**
      * Returns a variable-length integer from the current
      * Returns a variable-length integer from the current

+ 71 - 24
library/Zend/Search/Lucene/Storage/File/Memory.php

@@ -289,14 +289,14 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
      */
      */
     public function readLong()
     public function readLong()
     {
     {
-        $str = substr($this->_data, $this->_position, 8);
-        $this->_position += 8;
-
         /**
         /**
          * Check, that we work in 64-bit mode.
          * Check, that we work in 64-bit mode.
          * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
          * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
          */
          */
         if (PHP_INT_SIZE > 4) {
         if (PHP_INT_SIZE > 4) {
+            $str = substr($this->_data, $this->_position, 8);
+            $this->_position += 8;
+
             return  ord($str[0]) << 56  |
             return  ord($str[0]) << 56  |
                     ord($str[1]) << 48  |
                     ord($str[1]) << 48  |
                     ord($str[2]) << 40  |
                     ord($str[2]) << 40  |
@@ -306,19 +306,7 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
                     ord($str[6]) << 8   |
                     ord($str[6]) << 8   |
                     ord($str[7]);
                     ord($str[7]);
         } else {
         } else {
-            if ((ord($str[0])          != 0) ||
-                (ord($str[1])          != 0) ||
-                (ord($str[2])          != 0) ||
-                (ord($str[3])          != 0) ||
-                ((ord($str[0]) & 0x80) != 0)) {
-                    require_once 'Zend/Search/Lucene/Exception.php';
-                    throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
-            }
-
-            return  ord($str[4]) << 24  |
-                    ord($str[5]) << 16  |
-                    ord($str[6]) << 8   |
-                    ord($str[7]);
+            return $this->readLong32Bit();
         }
         }
     }
     }
 
 
@@ -348,22 +336,81 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
                             chr($value>>8  & 0xFF) .
                             chr($value>>8  & 0xFF) .
                             chr($value     & 0xFF);
                             chr($value     & 0xFF);
         } else {
         } else {
-            if ($value > 0x7FFFFFFF) {
+            $this->writeLong32Bit($value);
+        }
+
+        $this->_position = strlen($this->_data);
+    }
+
+
+    /**
+     * Returns a long integer from the current position in the file,
+     * advances the file pointer and return it as float (for 32-bit platforms).
+     *
+     * @return integer|float
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function readLong32Bit()
+    {
+        $wordHigh = $this->readInt();
+        $wordLow  = $this->readInt();
+
+        if ($wordHigh & (int)0x80000000) {
+            // It's a negative value since the highest bit is set
+            if ($wordHigh == (int)0xFFFFFFFF  &&  ($wordLow & (int)0x80000000)) {
+                return $wordLow;
+            } else {
                 require_once 'Zend/Search/Lucene/Exception.php';
                 require_once 'Zend/Search/Lucene/Exception.php';
-                throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+                throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
             }
             }
 
 
-            $this->_data .= chr(0) . chr(0) . chr(0) . chr(0) .
-                            chr($value>>24 & 0xFF) .
-                            chr($value>>16 & 0xFF) .
-                            chr($value>>8  & 0xFF) .
-                            chr($value     & 0xFF);
         }
         }
 
 
-        $this->_position = strlen($this->_data);
+        if ($wordLow < 0) {
+            // Value is large than 0x7FFF FFFF. Represent low word as float.
+            $wordLow &= 0x7FFFFFFF;
+            $wordLow += (float)0x80000000;
+        }
+
+        if ($wordHigh == 0) {
+            // Return value as integer if possible
+            return $wordLow;
+        }
+
+        return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
     }
     }
 
 
 
 
+    /**
+     * Writes long integer to the end of file (32-bit platforms implementation)
+     *
+     * @param integer|float $value
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function writeLong32Bit($value)
+    {
+        if ($value < (int)0x80000000) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
+        }
+
+        if ($value < 0) {
+            $wordHigh = (int)0xFFFFFFFF;
+            $wordLow  = (int)$value;
+        } else {
+            $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
+            $wordLow  = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
+
+            if ($wordLow > 0x7FFFFFFF) {
+                // Highest bit of low word is set. Translate it to the corresponding negative integer value
+                $wordLow -= 0x80000000;
+                $wordLow |= 0x80000000;
+            }
+        }
+
+        $this->writeInt($wordHigh);
+        $this->writeInt($wordLow);
+    }
 
 
     /**
     /**
      * Returns a variable-length integer from the current
      * Returns a variable-length integer from the current