OLERead.php 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. <?php
  2. defined('IDENTIFIER_OLE') ||
  3. define('IDENTIFIER_OLE', pack('CCCCCCCC', 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1));
  4. class PHPExcel_Shared_OLERead {
  5. private $data = '';
  6. const IDENTIFIER_OLE = IDENTIFIER_OLE; // OLE identifier
  7. const BIG_BLOCK_SIZE = 0x200;
  8. const SMALL_BLOCK_SIZE = 0x40; // Size of a short sector = 64 bytes
  9. const PROPERTY_STORAGE_BLOCK_SIZE = 0x80; // Size of a directory entry always = 128 bytes
  10. const SMALL_BLOCK_THRESHOLD = 0x1000; // Minimum size of a standard stream = 4096 bytes, streams smaller than this are stored as short streams
  11. // header offsets
  12. const NUM_BIG_BLOCK_DEPOT_BLOCKS_POS = 0x2c;
  13. const ROOT_START_BLOCK_POS = 0x30;
  14. const SMALL_BLOCK_DEPOT_BLOCK_POS = 0x3c;
  15. const EXTENSION_BLOCK_POS = 0x44;
  16. const NUM_EXTENSION_BLOCK_POS = 0x48;
  17. const BIG_BLOCK_DEPOT_BLOCKS_POS = 0x4c;
  18. // property storage offsets (directory offsets)
  19. const SIZE_OF_NAME_POS = 0x40;
  20. const TYPE_POS = 0x42;
  21. const START_BLOCK_POS = 0x74;
  22. const SIZE_POS = 0x78;
  23. public $error = false;
  24. public $workbook = null;
  25. public $summaryInformation = null;
  26. public $documentSummaryInformation = null;
  27. public function read($file){
  28. if( ! is_readable($file)) {
  29. throw new Exception('SpreadsheetReader_XLS: File not readable (' . $file . ')');
  30. }
  31. $this->data = file_get_contents($file);
  32. if( ! $this->data || substr($this->data, 0, 8) != self::IDENTIFIER_OLE){
  33. $this->error = true;
  34. return false;
  35. }
  36. $this->numBigBlockDepotBlocks = self::_GetInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS);
  37. $this->rootStartBlock = self::_GetInt4d($this->data, self::ROOT_START_BLOCK_POS);
  38. $this->sbdStartBlock = self::_GetInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS);
  39. $this->extensionBlock = self::_GetInt4d($this->data, self::EXTENSION_BLOCK_POS);
  40. $this->numExtensionBlocks = self::_GetInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS);
  41. $bigBlockDepotBlocks = array();
  42. $pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS;
  43. $bbdBlocks = $this->numExtensionBlocks == 0 ? $this->numBigBlockDepotBlocks : (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4;
  44. for ($i = 0; $i < $bbdBlocks; ++$i) {
  45. $bigBlockDepotBlocks[$i] = self::_GetInt4d($this->data, $pos);
  46. $pos += 4;
  47. }
  48. for ($j = 0; $j < $this->numExtensionBlocks; ++$j) {
  49. $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE;
  50. $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1);
  51. for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) {
  52. $bigBlockDepotBlocks[$i] = self::_GetInt4d($this->data, $pos);
  53. $pos += 4;
  54. }
  55. $bbdBlocks += $blocksToRead;
  56. if ($bbdBlocks < $this->numBigBlockDepotBlocks) {
  57. $this->extensionBlock = self::_GetInt4d($this->data, $pos);
  58. }
  59. }
  60. $pos = 0;
  61. $this->bigBlockChain = '';
  62. $bbs = self::BIG_BLOCK_SIZE / 4;
  63. for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) {
  64. $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE;
  65. $this->bigBlockChain .= substr($this->data, $pos, 4*$bbs);
  66. $pos += 4*$bbs;
  67. }
  68. $pos = 0;
  69. $sbdBlock = $this->sbdStartBlock;
  70. $this->smallBlockChain = '';
  71. while ($sbdBlock != -2) {
  72. $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE;
  73. $this->smallBlockChain .= substr($this->data, $pos, 4*$bbs);
  74. $pos += 4*$bbs;
  75. $sbdBlock = self::_GetInt4d($this->bigBlockChain, 4*$sbdBlock);
  76. }
  77. $block = $this->rootStartBlock; // read the directory stream
  78. $this->entry = $this->_readData($block);
  79. $this->_readPropertySets();
  80. }
  81. /**
  82. * Extract binary stream data
  83. *
  84. * @return string
  85. */
  86. public function getStream($stream) {
  87. if ($stream === NULL) {
  88. return null;
  89. }
  90. $streamData = '';
  91. if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) {
  92. $rootdata = $this->_readData($this->props[$this->rootentry]['startBlock']);
  93. $block = $this->props[$stream]['startBlock'];
  94. while ($block != -2) {
  95. $pos = $block * self::SMALL_BLOCK_SIZE;
  96. $streamData .= substr($rootdata, $pos, self::SMALL_BLOCK_SIZE);
  97. $block = self::_GetInt4d($this->smallBlockChain, $block*4);
  98. }
  99. }
  100. else {
  101. $numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE;
  102. if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) {
  103. ++$numBlocks;
  104. }
  105. if($numBlocks){
  106. $block = $this->props[$stream]['startBlock'];
  107. while ($block != -2) {
  108. $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
  109. $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
  110. $block = self::_GetInt4d($this->bigBlockChain, $block*4);
  111. }
  112. }
  113. }
  114. return $streamData;
  115. }
  116. /**
  117. * Read a standard stream (by joining sectors using information from SAT)
  118. *
  119. * @param int $bl Sector ID where the stream starts
  120. * @return string Data for standard stream
  121. */
  122. private function _readData($block) {
  123. $data = '';
  124. while ($block != -2) {
  125. $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
  126. $data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
  127. $block = self::_GetInt4d($this->bigBlockChain, 4*$block);
  128. }
  129. return $data;
  130. }
  131. /**
  132. * Read entries in the directory stream.
  133. */
  134. private function _readPropertySets() {
  135. $offset = 0;
  136. $entryLen = strlen($this->entry); // loop through entires, each entry is 128 bytes
  137. while ($offset < $entryLen) {
  138. $data = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE); // entry data (128 bytes)
  139. $nameSize = ord($data[self::SIZE_OF_NAME_POS]) | (ord($data[self::SIZE_OF_NAME_POS + 1]) << 8); // size in bytes of name
  140. $name = str_replace("\x00", "", substr($data, 0, $nameSize));
  141. $this->props[] = array (
  142. 'name' => $name,
  143. 'type' => ord($data[self::TYPE_POS]), // type of entry
  144. 'size' => self::_GetInt4d($data, self::SIZE_POS),
  145. 'startBlock' => self::_GetInt4d($data, self::START_BLOCK_POS)
  146. );
  147. $upName = strtoupper($name); // tmp helper to simplify checks
  148. if (($upName === 'WORKBOOK') || ($upName === 'BOOK')) { // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook)
  149. $this->workbook = count($this->props) - 1;
  150. }
  151. else if ( $upName === 'ROOT ENTRY' || $upName === 'R') {
  152. $this->rootentry = count($this->props) - 1; // Root entry
  153. }
  154. if ($name == chr(5) . 'SummaryInformation') {
  155. $this->summaryInformation = count($this->props) - 1; // Summary information
  156. }
  157. if ($name == chr(5) . 'DocumentSummaryInformation') {
  158. $this->documentSummaryInformation = count($this->props) - 1; // Additional Document Summary information
  159. }
  160. $offset += self::PROPERTY_STORAGE_BLOCK_SIZE;
  161. }
  162. }
  163. /**
  164. * Read 4 bytes of data at specified position
  165. * FIX: represent numbers correctly on 64-bit system. Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
  166. * http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
  167. *
  168. * @param string $data
  169. * @param int $pos
  170. * @return int
  171. */
  172. private static function _GetInt4d($data, $pos){
  173. $_or_24 = ord($data[$pos + 3]);
  174. if ($_or_24 >= 128) {
  175. $_ord_24 = -abs((256 - $_or_24) << 24); // negative number
  176. } else {
  177. $_ord_24 = ($_or_24 & 127) << 24;
  178. }
  179. return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $_ord_24;
  180. }
  181. }