Parser.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /** Zend_Pdf_Element */
  22. require_once 'Zend/Pdf/Element.php';
  23. /** Zend_Pdf_Element_Array */
  24. require_once 'Zend/Pdf/Element/Array.php';
  25. /** Zend_Pdf_Element_String_Binary */
  26. require_once 'Zend/Pdf/Element/String/Binary.php';
  27. /** Zend_Pdf_Element_Boolean */
  28. require_once 'Zend/Pdf/Element/Boolean.php';
  29. /** Zend_Pdf_Element_Dictionary */
  30. require_once 'Zend/Pdf/Element/Dictionary.php';
  31. /** Zend_Pdf_Element_Name */
  32. require_once 'Zend/Pdf/Element/Name.php';
  33. /** Zend_Pdf_Element_Numeric */
  34. require_once 'Zend/Pdf/Element/Numeric.php';
  35. /** Zend_Pdf_Element_Object */
  36. require_once 'Zend/Pdf/Element/Object.php';
  37. /** Zend_Pdf_Element_Reference */
  38. require_once 'Zend/Pdf/Element/Reference.php';
  39. /** Zend_Pdf_Element_Object_Stream */
  40. require_once 'Zend/Pdf/Element/Object/Stream.php';
  41. /** Zend_Pdf_Element_String */
  42. require_once 'Zend/Pdf/Element/String.php';
  43. /** Zend_Pdf_Element_Null */
  44. require_once 'Zend/Pdf/Element/Null.php';
  45. /** Zend_Pdf_Element_Reference_Context */
  46. require_once 'Zend/Pdf/Element/Reference/Context.php';
  47. /** Zend_Pdf_Element_Reference_Table */
  48. require_once 'Zend/Pdf/Element/Reference/Table.php';
  49. /** Zend_Pdf_Trailer_Keeper */
  50. require_once 'Zend/Pdf/Trailer/Keeper.php';
  51. /** Zend_Pdf_ElementFactory_Interface */
  52. require_once 'Zend/Pdf/ElementFactory/Interface.php';
  53. /** Zend_Pdf_PhpArray */
  54. require_once 'Zend/Pdf/PhpArray.php';
  55. /** Zend_Pdf_StringParser */
  56. require_once 'Zend/Pdf/StringParser.php';
  57. /** Zend_Pdf_Parser_Stream */
  58. require_once 'Zend/Pdf/Parser/Stream.php';
  59. /**
  60. * PDF file parser
  61. *
  62. * @package Zend_Pdf
  63. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  64. * @license http://framework.zend.com/license/new-bsd New BSD License
  65. */
  66. class Zend_Pdf_Parser
  67. {
  68. /**
  69. * String parser
  70. *
  71. * @var Zend_Pdf_StringParser
  72. */
  73. private $_stringParser;
  74. /**
  75. * Last PDF file trailer
  76. *
  77. * @var Zend_Pdf_Trailer_Keeper
  78. */
  79. private $_trailer;
  80. /**
  81. * PDF version specified in the file header
  82. *
  83. * @var string
  84. */
  85. private $_pdfVersion;
  86. /**
  87. * Get length of source PDF
  88. *
  89. * @return integer
  90. */
  91. public function getPDFLength()
  92. {
  93. return strlen($this->_stringParser->data);
  94. }
  95. /**
  96. * Get PDF String
  97. *
  98. * @return string
  99. */
  100. public function getPDFString()
  101. {
  102. return $this->_stringParser->data;
  103. }
  104. /**
  105. * PDF version specified in the file header
  106. *
  107. * @return string
  108. */
  109. public function getPDFVersion()
  110. {
  111. return $this->_pdfVersion;
  112. }
  113. /**
  114. * Load XReference table and referenced objects
  115. *
  116. * @param integer $offset
  117. * @throws Zend_Pdf_Exception
  118. * @return Zend_Pdf_Trailer_Keeper
  119. */
  120. private function _loadXRefTable($offset)
  121. {
  122. $this->_stringParser->offset = $offset;
  123. $refTable = new Zend_Pdf_Element_Reference_Table();
  124. $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
  125. $this->_stringParser->setContext($context);
  126. $nextLexeme = $this->_stringParser->readLexeme();
  127. if ($nextLexeme == 'xref') {
  128. /**
  129. * Common cross-reference table
  130. */
  131. $this->_stringParser->skipWhiteSpace();
  132. while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
  133. if (!ctype_digit($nextLexeme)) {
  134. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
  135. }
  136. $objNum = (int)$nextLexeme;
  137. $refCount = $this->_stringParser->readLexeme();
  138. if (!ctype_digit($refCount)) {
  139. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
  140. }
  141. $this->_stringParser->skipWhiteSpace();
  142. while ($refCount > 0) {
  143. $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
  144. if (!ctype_digit($objectOffset)) {
  145. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  146. }
  147. // Force $objectOffset to be treated as decimal instead of octal number
  148. for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
  149. if ($objectOffset[$numStart] != '0') {
  150. break;
  151. }
  152. }
  153. $objectOffset = substr($objectOffset, $numStart);
  154. $this->_stringParser->offset += 10;
  155. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  156. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  157. }
  158. $this->_stringParser->offset++;
  159. $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
  160. if (!ctype_digit($objectOffset)) {
  161. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  162. }
  163. // Force $objectOffset to be treated as decimal instead of octal number
  164. for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
  165. if ($genNumber[$numStart] != '0') {
  166. break;
  167. }
  168. }
  169. $genNumber = substr($genNumber, $numStart);
  170. $this->_stringParser->offset += 5;
  171. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  172. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  173. }
  174. $this->_stringParser->offset++;
  175. $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
  176. $this->_stringParser->offset++;
  177. switch ($inUseKey) {
  178. case 'f':
  179. // free entry
  180. unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
  181. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  182. $objectOffset,
  183. false);
  184. break;
  185. case 'n':
  186. // in-use entry
  187. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  188. $objectOffset,
  189. true);
  190. }
  191. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  192. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  193. }
  194. $this->_stringParser->offset++;
  195. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  196. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  197. }
  198. $this->_stringParser->offset++;
  199. $refCount--;
  200. $objNum++;
  201. }
  202. }
  203. $trailerDictOffset = $this->_stringParser->offset;
  204. $trailerDict = $this->_stringParser->readElement();
  205. if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
  206. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
  207. }
  208. } else {
  209. $xrefStream = $this->_stringParser->getObject($offset, $context);
  210. if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
  211. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset));
  212. }
  213. $trailerDict = $xrefStream->dictionary;
  214. if ($trailerDict->Type->value != 'XRef') {
  215. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
  216. }
  217. if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  218. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
  219. }
  220. $entryField1Size = $trailerDict->W->items[0]->value;
  221. $entryField2Size = $trailerDict->W->items[1]->value;
  222. $entryField3Size = $trailerDict->W->items[2]->value;
  223. if ($entryField2Size == 0 || $entryField3Size == 0) {
  224. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
  225. }
  226. $xrefStreamData = &$xrefStream->value;
  227. if ($trailerDict->Index !== null) {
  228. if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  229. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
  230. }
  231. $sections = count($trailerDict->Index->items)/2;
  232. } else {
  233. $sections = 1;
  234. }
  235. $streamOffset = 0;
  236. $size = $entryField1Size + $entryField2Size + $entryField3Size;
  237. $entries = strlen($xrefStreamData)/$size;
  238. for ($count = 0; $count < $sections; $count++) {
  239. if ($trailerDict->Index !== null) {
  240. $objNum = $trailerDict->Index->items[$count*2 ]->value;
  241. $entries = $trailerDict->Index->items[$count*2 + 1]->value;
  242. } else {
  243. $objNum = 0;
  244. $entries = $trailerDict->Size->value;
  245. }
  246. for ($count2 = 0; $count2 < $entries; $count2++) {
  247. if ($entryField1Size == 0) {
  248. $type = 1;
  249. } else if ($entryField1Size == 1) { // Optimyze one-byte field case
  250. $type = ord($xrefStreamData[$streamOffset++]);
  251. } else {
  252. $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
  253. $streamOffset += $entryField1Size;
  254. }
  255. if ($entryField2Size == 1) { // Optimyze one-byte field case
  256. $field2 = ord($xrefStreamData[$streamOffset++]);
  257. } else {
  258. $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
  259. $streamOffset += $entryField2Size;
  260. }
  261. if ($entryField3Size == 1) { // Optimyze one-byte field case
  262. $field3 = ord($xrefStreamData[$streamOffset++]);
  263. } else {
  264. $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
  265. $streamOffset += $entryField3Size;
  266. }
  267. switch ($type) {
  268. case 0:
  269. // Free object
  270. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
  271. // Debug output:
  272. // echo "Free object - $objNum $field3 R, next free - $field2\n";
  273. break;
  274. case 1:
  275. // In use object
  276. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
  277. // Debug output:
  278. // echo "In-use object - $objNum $field3 R, offset - $field2\n";
  279. break;
  280. case 2:
  281. // Object in an object stream
  282. // Debug output:
  283. // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
  284. break;
  285. }
  286. $objNum++;
  287. }
  288. }
  289. // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
  290. // "$entries\n";
  291. throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
  292. }
  293. $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
  294. if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
  295. $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
  296. $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
  297. $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
  298. }
  299. /**
  300. * We set '/Prev' dictionary property to the current cross-reference section offset.
  301. * It doesn't correspond to the actual data, but is true when trailer will be used
  302. * as a trailer for next generated PDF section.
  303. */
  304. $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
  305. return $trailerObj;
  306. }
  307. /**
  308. * Get Trailer object
  309. *
  310. * @return Zend_Pdf_Trailer_Keeper
  311. */
  312. public function getTrailer()
  313. {
  314. return $this->_trailer;
  315. }
  316. /**
  317. * Object constructor
  318. *
  319. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  320. * Thus we don't need to care about overhead
  321. *
  322. * @param mixed $source
  323. * @param Zend_Pdf_ElementFactory_Interface $factory
  324. * @param boolean $load
  325. * @throws Zend_Exception
  326. */
  327. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
  328. {
  329. if ($load) {
  330. if (($pdfFile = @fopen($source, 'rb')) === false ) {
  331. throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
  332. }
  333. $byteCount = filesize($source);
  334. $data = fread($pdfFile, $byteCount);
  335. $byteCount -= strlen($data);
  336. while ( $byteCount > 0 && ($nextBlock = fread($pdfFile, $byteCount)) != false ) {
  337. $data .= $nextBlock;
  338. $byteCount -= strlen($nextBlock);
  339. }
  340. fclose($pdfFile);
  341. $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
  342. } else {
  343. $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
  344. }
  345. $pdfVersionComment = $this->_stringParser->readComment();
  346. if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
  347. throw new Zend_Pdf_Exception('File is not a PDF.');
  348. }
  349. $pdfVersion = substr($pdfVersionComment, 5);
  350. if (version_compare($pdfVersion, '0.9', '<') ||
  351. version_compare($pdfVersion, '1.61', '>=')
  352. ) {
  353. /**
  354. * @todo
  355. * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
  356. * Stream compression filter must be implemented (for compressed object streams).
  357. * Cross reference streams must be implemented
  358. */
  359. throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
  360. }
  361. $this->_pdfVersion = $pdfVersion;
  362. $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
  363. if ($this->_stringParser->offset === false ||
  364. strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
  365. throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
  366. }
  367. $this->_stringParser->offset--;
  368. /**
  369. * Go to end of cross-reference table offset
  370. */
  371. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  372. ($this->_stringParser->offset > 0)) {
  373. $this->_stringParser->offset--;
  374. }
  375. /**
  376. * Go to the start of cross-reference table offset
  377. */
  378. while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
  379. ($this->_stringParser->offset > 0)) {
  380. $this->_stringParser->offset--;
  381. }
  382. /**
  383. * Go to the end of 'startxref' keyword
  384. */
  385. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  386. ($this->_stringParser->offset > 0)) {
  387. $this->_stringParser->offset--;
  388. }
  389. /**
  390. * Go to the white space (eol marker) before 'startxref' keyword
  391. */
  392. $this->_stringParser->offset -= 9;
  393. $nextLexeme = $this->_stringParser->readLexeme();
  394. if ($nextLexeme != 'startxref') {
  395. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  396. }
  397. $startXref = $this->_stringParser->readLexeme();
  398. if (!ctype_digit($startXref)) {
  399. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  400. }
  401. $this->_trailer = $this->_loadXRefTable($startXref);
  402. $factory->setObjectCount($this->_trailer->Size->value);
  403. }
  404. /**
  405. * Object destructor
  406. */
  407. public function __destruct()
  408. {
  409. $this->_stringParser->cleanUp();
  410. }
  411. }