Pārlūkot izejas kodu

Zend_Pdf performance improvement. ZF-8304.

git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@18984 44c647ce-9c0f-0410-b52a-842ac1e357ba
alexander 16 gadi atpakaļ
vecāks
revīzija
e9a259a5bc

+ 111 - 94
library/Zend/Pdf/Element/String.php

@@ -78,170 +78,187 @@ class Zend_Pdf_Element_String extends Zend_Pdf_Element
     /**
      * Escape string according to the PDF rules
      *
-     * @param string $inStr
+     * @param string $str
      * @return string
      */
-    public static function escape($inStr)
+    public static function escape($str)
     {
-        $outStr = '';
-        $lastNL = 0;
+        $outEntries = array();
+
+        foreach (str_split($str, 128) as $chunk) {
+            // Collect sequence of unescaped characters
+            $offset = strcspn($chunk, "\n\r\t\x08\x0C()\\");
+            $chunkOut = substr($chunk, 0, $offset);
+
+            while ($offset < strlen($chunk)) {
+                $nextCode = ord($chunk[$offset++]);
+                switch ($nextCode) {
+                    // "\n" - line feed (LF)
+                    case 10:
+                        $chunkOut .= '\\n';
+                        break;
 
-        for ($count = 0; $count < strlen($inStr); $count++) {
-            if (strlen($outStr) - $lastNL > 128)  {
-                $outStr .= "\\\n";
-                $lastNL = strlen($outStr);
-            }
+                    // "\r" - carriage return (CR)
+                    case 13:
+                        $chunkOut .= '\\r';
+                        break;
+
+                    // "\t" - horizontal tab (HT)
+                    case 9:
+                        $chunkOut .= '\\t';
+                        break;
+
+                    // "\b" - backspace (BS)
+                    case 8:
+                        $chunkOut .= '\\b';
+                        break;
+
+                    // "\f" - form feed (FF)
+                    case 12:
+                        $chunkOut .= '\\f';
+                        break;
 
-            $nextCode = ord($inStr[$count]);
-            switch ($nextCode) {
-                // "\n" - line feed (LF)
-                case 10:
-                    $outStr .= '\\n';
-                    break;
-
-                // "\r" - carriage return (CR)
-                case 13:
-                    $outStr .= '\\r';
-                    break;
-
-                // "\t" - horizontal tab (HT)
-                case 9:
-                    $outStr .= '\\t';
-                    break;
-
-                // "\b" - backspace (BS)
-                case 8:
-                    $outStr .= '\\b';
-                    break;
-
-                // "\f" - form feed (FF)
-                case 12:
-                    $outStr .= '\\f';
-                    break;
-
-                // '(' - left paranthesis
-                case 40:
-                    $outStr .= '\\(';
-                    break;
-
-                // ')' - right paranthesis
-                case 41:
-                    $outStr .= '\\)';
-                    break;
-
-                // '\' - backslash
-                case 92:
-                    $outStr .= '\\\\';
-                    break;
-
-                default:
-                    // Don't use non-ASCII characters escaping
-                    // if ($nextCode >= 32 && $nextCode <= 126 ) {
-                    //     // Visible ASCII symbol
-                    //     $outStr .= $inStr[$count];
-                    // } else {
-                    //     $outStr .= sprintf('\\%03o', $nextCode);
-                    // }
-                    $outStr .= $inStr[$count];
-
-                    break;
+                    // '(' - left paranthesis
+                    case 40:
+                        $chunkOut .= '\\(';
+                        break;
+
+                    // ')' - right paranthesis
+                    case 41:
+                        $chunkOut .= '\\)';
+                        break;
+
+                    // '\' - backslash
+                    case 92:
+                        $chunkOut .= '\\\\';
+                        break;
+
+                    default:
+                        // This code is never executed extually
+                        //
+                        // Don't use non-ASCII characters escaping
+                        // if ($nextCode >= 32 && $nextCode <= 126 ) {
+                        //     // Visible ASCII symbol
+                        //     $chunkEntries[] = chr($nextCode);
+                        // } else {
+                        //     $chunkEntries[] = sprintf('\\%03o', $nextCode);
+                        // }
+
+                        break;
+                }
+
+                // Collect sequence of unescaped characters
+                $start = $offset;
+                $offset += strcspn($chunk, "\n\r\t\x08\x0C()\\", $offset);
+                $chunkOut .= substr($chunk, $start, $offset - $start);
             }
+
+            $outEntries[] = $chunkOut;
         }
 
-        return $outStr;
+        return implode("\\\n", $outEntries);
     }
 
 
     /**
      * Unescape string according to the PDF rules
      *
-     * @param string $inStr
+     * @param string $str
      * @return string
      */
-    public static function unescape($inStr)
+    public static function unescape($str)
     {
-        $outStr = '';
-
-        for ($count = 0; $count < strlen($inStr); $count++) {
-            if ($inStr[$count] != '\\' || $count == strlen($inStr)-1)  {
-                $outStr .= $inStr[$count];
-            } else { // Escape sequence
-                switch ($inStr{++$count}) {
+        $outEntries = array();
+
+        $offset = 0;
+        while ($offset < strlen($str)) {
+            // Searche for the next escaped character/sequence
+            $escapeCharOffset = strpos($str, '\\', $offset);
+            if ($escapeCharOffset === false  ||  $escapeCharOffset == strlen($str) - 1) {
+                // There are no escaped characters or '\' char has came at the end of string
+                $outEntries[] = substr($str, $offset);
+                break;
+            } else {
+                // Collect unescaped characters sequence
+                $outEntries[] = substr($str, $offset, $escapeCharOffset - $offset);
+                // Go to the escaped character
+                $offset = $escapeCharOffset + 1;
+
+                switch ($str[$offset]) {
                     // '\\n' - line feed (LF)
                     case 'n':
-                        $outStr .= "\n";
+                        $outEntries[] = "\n";
                         break;
 
                     // '\\r' - carriage return (CR)
                     case 'r':
-                        $outStr .= "\r";
+                        $outEntries[] = "\r";
                         break;
 
                     // '\\t' - horizontal tab (HT)
                     case 't':
-                        $outStr .= "\t";
+                        $outEntries[] = "\t";
                         break;
 
                     // '\\b' - backspace (BS)
                     case 'b':
-                        $outStr .= "\x08";
+                        $outEntries[] = "\x08";
                         break;
 
                     // '\\f' - form feed (FF)
                     case 'f':
-                        $outStr .= "\x0C";
+                        $outEntries[] = "\x0C";
                         break;
 
                     // '\\(' - left paranthesis
                     case '(':
-                        $outStr .= '(';
+                        $outEntries[] = '(';
                         break;
 
                     // '\\)' - right paranthesis
                     case ')':
-                        $outStr .= ')';
+                        $outEntries[] = ')';
                         break;
 
                     // '\\\\' - backslash
                     case '\\':
-                        $outStr .= '\\';
+                        $outEntries[] = '\\';
                         break;
 
                     // "\\\n" or "\\\n\r"
                     case "\n":
                         // skip new line symbol
-                        if ($inStr[$count+1] == "\r") {
-                            $count++;
+                        if ($str[$offset + 1] == "\r") {
+                            $offset++;
                         }
                         break;
 
                     default:
-                        if (ord($inStr[$count]) >= ord('0') &&
-                            ord($inStr[$count]) <= ord('9')) {
+                        if (strpos('0123456789', $str[$offset]) !== false) {
                             // Character in octal representation
                             // '\\xxx'
-                            $nextCode = '0' . $inStr[$count];
+                            $nextCode = '0' . $str[$offset];
 
-                            if (ord($inStr[$count+1]) >= ord('0') &&
-                                ord($inStr[$count+1]) <= ord('9')) {
-                                $nextCode .= $inStr{++$count};
+                            if (strpos('0123456789', $str[$offset + 1]) !== false) {
+                                $nextCode .= $str[++$offset];
 
-                                if (ord($inStr[$count+1]) >= ord('0') &&
-                                    ord($inStr[$count+1]) <= ord('9')) {
-                                    $nextCode .= $inStr{++$count};
+                                if (strpos('0123456789', $str[$offset + 1]) !== false) {
+                                    $nextCode .= $str[++$offset];
                                 }
                             }
 
-                            $outStr .= chr($nextCode);
+                            $outEntries[] = chr($nextCode);
                         } else {
-                            $outStr .= $inStr[$count];
+                            $outEntries[] = $str[$offset];
                         }
                         break;
                 }
+
+                $offset++;
             }
         }
 
-        return $outStr;
+        return implode($outEntries);
     }
 
 }

+ 16 - 29
library/Zend/Pdf/Element/String/Binary.php

@@ -50,12 +50,7 @@ class Zend_Pdf_Element_String_Binary extends Zend_Pdf_Element_String
      */
     public static function escape($inStr)
     {
-        $outStr = '';
-
-        for ($count = 0; $count < strlen($inStr); $count++) {
-            $outStr .= sprintf('%02X', ord($inStr[$count]));
-        }
-        return $outStr;
+        return strtoupper(bin2hex($inStr));
     }
 
 
@@ -67,34 +62,26 @@ class Zend_Pdf_Element_String_Binary extends Zend_Pdf_Element_String
      */
     public static function unescape($inStr)
     {
-        $outStr = '';
-        $nextHexCode = '';
-
-        for ($count = 0; $count < strlen($inStr); $count++) {
-            $nextCharCode = ord($inStr[$count]);
-
-            if( ($nextCharCode >= 48  /*'0'*/ &&
-                 $nextCharCode <= 57  /*'9'*/   ) ||
-                ($nextCharCode >= 97  /*'a'*/ &&
-                 $nextCharCode <= 102 /*'f'*/   ) ||
-                ($nextCharCode >= 65  /*'A'*/ &&
-                 $nextCharCode <= 70  /*'F'*/   ) ) {
-                $nextHexCode .= $inStr[$count];
-            }
-
-            if (strlen($nextHexCode) == 2) {
-                $outStr .= chr(intval($nextHexCode, 16));
-                $nextHexCode = '';
-            }
+        $chunks = array();
+        $offset = 0;
+        $length = 0;
+        while ($offset < strlen($inStr)) {
+            // Collect hexadecimal characters
+            $start = $offset;
+            $offset += strspn($inStr, "0123456789abcdefABCDEF", $offset);
+            $chunks[] = substr($inStr, $start, $offset - $start);
+            $length += strlen(end($chunks));
+
+            // Skip non-hexadecimal characters
+            $offset += strcspn($inStr, "0123456789abcdefABCDEF", $offset);
         }
-
-        if ($nextHexCode != '') {
+        if ($length % 2 != 0) {
             // We have odd number of digits.
             // Final digit is assumed to be '0'
-            $outStr .= chr(base_convert($nextHexCode . '0', 16, 10));
+            $chunks[] = '0';
         }
 
-        return $outStr;
+        return pack('H*' , implode($chunks));
     }
 
 

+ 86 - 84
library/Zend/Pdf/StringParser.php

@@ -20,51 +20,19 @@
  */
 
 
-/** Zend_Pdf_Element */
-require_once 'Zend/Pdf/Element.php';
-
-/** Zend_Pdf_Element_Array */
+/** Internally used classes */
 require_once 'Zend/Pdf/Element/Array.php';
-
-/** Zend_Pdf_Element_String_Binary */
 require_once 'Zend/Pdf/Element/String/Binary.php';
-
-/** Zend_Pdf_Element_Boolean */
 require_once 'Zend/Pdf/Element/Boolean.php';
-
-/** Zend_Pdf_Element_Dictionary */
 require_once 'Zend/Pdf/Element/Dictionary.php';
-
-/** Zend_Pdf_Element_Name */
 require_once 'Zend/Pdf/Element/Name.php';
-
-/** Zend_Pdf_Element_Numeric */
+require_once 'Zend/Pdf/Element/Null.php';
 require_once 'Zend/Pdf/Element/Numeric.php';
-
-/** Zend_Pdf_Element_Object */
 require_once 'Zend/Pdf/Element/Object.php';
-
-/** Zend_Pdf_Element_Reference */
-require_once 'Zend/Pdf/Element/Reference.php';
-
-/** Zend_Pdf_Element_Object_Stream */
 require_once 'Zend/Pdf/Element/Object/Stream.php';
-
-/** Zend_Pdf_Element_String */
+require_once 'Zend/Pdf/Element/Reference.php';
 require_once 'Zend/Pdf/Element/String.php';
 
-/** Zend_Pdf_Element_Null */
-require_once 'Zend/Pdf/Element/Null.php';
-
-/** Zend_Pdf_Element_Reference_Context */
-require_once 'Zend/Pdf/Element/Reference/Context.php';
-
-/** Zend_Pdf_Element_Reference_Table */
-require_once 'Zend/Pdf/Element/Reference/Table.php';
-
-/** Zend_Pdf_ElementFactory_Interface */
-require_once 'Zend/Pdf/ElementFactory/Interface.php';
-
 
 /**
  * PDF string parser
@@ -178,15 +146,33 @@ class Zend_Pdf_StringParser
      */
     public function skipWhiteSpace($skipComment = true)
     {
-        while ($this->offset < strlen($this->data)) {
-            if (self::isWhiteSpace( ord($this->data[$this->offset]) )) {
-                $this->offset++;
-            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
-                $this->skipComment();
-            } else {
-                return;
+        if ($skipComment) {
+            while (true) {
+                $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
+
+                if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
+                    // Skip comment
+                    $this->offset += strcspn($this->data, "\r\n", $this->offset);
+                } else {
+                    // Non white space character not equal to '%' is found
+                    return;
+                }
             }
+        } else {
+            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
         }
+
+//        /** Original (non-optimized) implementation. */
+//
+//        while ($this->offset < strlen($this->data)) {
+//            if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
+//                $this->offset++;
+//            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
+//                $this->skipComment();
+//            } else {
+//                return;
+//            }
+//        }
     }
 
 
@@ -247,13 +233,8 @@ class Zend_Pdf_StringParser
         while (true) {
             $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
 
-            if ($this->data[$this->offset] == '%') {
-                preg_match('/[\r\n]/', $this->data, $matches, PREG_OFFSET_CAPTURE, $this->offset);
-                if (count($matches) > 0) {
-                    $this->offset += strlen($matches[0][0]) + $matches[0][1];
-                } else {
-                    $this->offset = strlen($this->data);
-                }
+            if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
+                $this->offset += strcspn($this->data, "\r\n", $this->offset);
             } else {
                 break;
             }
@@ -263,25 +244,27 @@ class Zend_Pdf_StringParser
             return '';
         }
 
-        $start = $this->offset;
+        if ( /* self::isDelimiter( ord($this->data[$start]) ) */
+             strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
 
-        if (self::isDelimiter( ord($this->data[$start]) )) {
-            if ($this->data[$start] == '<' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '<') {
-                $this->offset += 2;
-                return '<<';
-            } else if ($this->data[$start] == '>' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '>') {
-                $this->offset += 2;
-                return '>>';
-            } else {
-                $this->offset++;
-                return $this->data[$start];
+            switch (substr($this->data, $this->offset, 2)) {
+                case '<<':
+                    $this->offset += 2;
+                    return '<<';
+                    break;
+
+                case '>>':
+                    $this->offset += 2;
+                    return '>>';
+                    break;
+
+                default:
+                    return $this->data[$this->offset++];
+                    break;
             }
         } else {
-            while ( ($this->offset < strlen($this->data)) &&
-                    (!self::isDelimiter(  ord($this->data[$this->offset]) )) &&
-                    (!self::isWhiteSpace( ord($this->data[$this->offset]) ))   ) {
-                $this->offset++;
-            }
+            $start = $this->offset;
+            $this->offset += strcspn($this->data, "()<>[]{}/%\x00\t\n\f\r ", $this->offset);
 
             return substr($this->data, $start, $this->offset - $start);
         }
@@ -314,7 +297,7 @@ class Zend_Pdf_StringParser
 
             case '/':
                 return ($this->_elements[] = new Zend_Pdf_Element_Name(
-                                                Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
+                                                    Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
                                                                       ));
 
             case '[':
@@ -334,6 +317,7 @@ class Zend_Pdf_StringParser
             case '{':
                 // fall through to next case
             case '}':
+                require_once 'Zend/Pdf/Exception.php';
                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
                                                 $this->offset));
 
@@ -368,32 +352,38 @@ class Zend_Pdf_StringParser
         $start = $this->offset;
         $openedBrackets = 1;
 
+        $this->offset += strcspn($this->data, '()\\', $this->offset);
+
         while ($this->offset < strlen($this->data)) {
             switch (ord( $this->data[$this->offset] )) {
                 case 0x28: // '(' - opened bracket in the string, needs balanced pair.
+                    $this->offset++;
                     $openedBrackets++;
                     break;
 
                 case 0x29: // ')' - pair to the opened bracket
+                    $this->offset++;
                     $openedBrackets--;
                     break;
 
                 case 0x5C: // '\\' - escape sequence, skip next char from a check
-                    $this->offset++;
+                    $this->offset += 2;
             }
 
-            $this->offset++;
             if ($openedBrackets == 0) {
                 break; // end of string
             }
+
+            $this->offset += strcspn($this->data, '()\\', $this->offset);
         }
         if ($openedBrackets != 0) {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
         }
 
         return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
-                                                                 $start,
-                                                                 $this->offset - $start - 1) ));
+                                                                                     $start,
+                                                                                     $this->offset - $start - 1) ));
     }
 
 
@@ -408,21 +398,22 @@ class Zend_Pdf_StringParser
     {
         $start = $this->offset;
 
-        while ($this->offset < strlen($this->data)) {
-            if (self::isWhiteSpace( ord($this->data[$this->offset]) ) ||
-                ctype_xdigit( $this->data[$this->offset] ) ) {
-                $this->offset++;
-            } else if ($this->data[$this->offset] == '>') {
-                $this->offset++;
-                return new Zend_Pdf_Element_String_Binary(
-                               Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
-                                                                    $start,
-                                                                    $this->offset - $start - 1) ));
-            } else {
-                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
-            }
+        $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
+
+        if ($this->offset >= strlen($this->data) - 1) {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
         }
-        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while binary string reading. Offset - 0x%X. \'>\' expected.', $start));
+
+        if ($this->data[$this->offset++] != '>') {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
+        }
+
+        return new Zend_Pdf_Element_String_Binary(
+                       Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
+                                                                        $start,
+                                                                        $this->offset - $start - 1) ));
     }
 
 
@@ -445,6 +436,7 @@ class Zend_Pdf_StringParser
             }
         }
 
+        require_once 'Zend/Pdf/Exception.php';
         throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
     }
 
@@ -468,6 +460,7 @@ class Zend_Pdf_StringParser
                 $value = $this->readElement();
 
                 if (!$name instanceof Zend_Pdf_Element_Name) {
+                    require_once 'Zend/Pdf/Exception.php';
                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
                 }
 
@@ -477,6 +470,7 @@ class Zend_Pdf_StringParser
             }
         }
 
+        require_once 'Zend/Pdf/Exception.php';
         throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
     }
 
@@ -557,16 +551,19 @@ class Zend_Pdf_StringParser
 
         $objNum = $this->readLexeme();
         if (!ctype_digit($objNum)) {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
         }
 
         $genNum = $this->readLexeme();
         if (!ctype_digit($genNum)) {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
         }
 
         $objKeyword = $this->readLexeme();
         if ($objKeyword != 'obj') {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
         }
 
@@ -595,10 +592,12 @@ class Zend_Pdf_StringParser
          * It's a stream object
          */
         if ($nextLexeme != 'stream') {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
         }
 
         if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
         }
 
@@ -617,6 +616,7 @@ class Zend_Pdf_StringParser
         } else if ($this->data[$this->offset] == "\n"    ) {
             $this->offset++;
         } else {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
         }
 
@@ -626,11 +626,13 @@ class Zend_Pdf_StringParser
 
         $nextLexeme = $this->readLexeme();
         if ($nextLexeme != 'endstream') {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
         }
 
         $nextLexeme = $this->readLexeme();
         if ($nextLexeme != 'endobj') {
+            require_once 'Zend/Pdf/Exception.php';
             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
         }