SpreadsheetReader.php 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361
  1. <?php
  2. class Spreadsheet_Excel_Reader {
  3. // ParseXL definitions
  4. const XLS_BIFF8 = 0x0600;
  5. const XLS_BIFF7 = 0x0500;
  6. const XLS_WorkbookGlobals = 0x0005;
  7. const XLS_Worksheet = 0x0010;
  8. // record identifiers
  9. const XLS_Type_FORMULA = 0x0006;
  10. const XLS_Type_FORMULA2 = 0x0406;
  11. const XLS_Type_EOF = 0x000a;
  12. const XLS_Type_PROTECT = 0x0012;
  13. const XLS_Type_OBJECTPROTECT = 0x0063;
  14. const XLS_Type_SCENPROTECT = 0x00dd;
  15. const XLS_Type_PASSWORD = 0x0013;
  16. const XLS_Type_HEADER = 0x0014;
  17. const XLS_Type_FOOTER = 0x0015;
  18. const XLS_Type_EXTERNSHEET = 0x0017;
  19. const XLS_Type_DEFINEDNAME = 0x0018;
  20. const XLS_Type_VERTICALPAGEBREAKS = 0x001a;
  21. const XLS_Type_HORIZONTALPAGEBREAKS = 0x001b;
  22. const XLS_Type_NOTE = 0x001c;
  23. const XLS_Type_SELECTION = 0x001d;
  24. const XLS_Type_DATEMODE = 0x0022;
  25. const XLS_Type_EXTERNNAME = 0x0023;
  26. const XLS_Type_LEFTMARGIN = 0x0026;
  27. const XLS_Type_RIGHTMARGIN = 0x0027;
  28. const XLS_Type_TOPMARGIN = 0x0028;
  29. const XLS_Type_BOTTOMMARGIN = 0x0029;
  30. const XLS_Type_PRINTGRIDLINES = 0x002b;
  31. const XLS_Type_FILEPASS = 0x002f;
  32. const XLS_Type_FONT = 0x0031;
  33. const XLS_Type_CONTINUE = 0x003c;
  34. const XLS_Type_PANE = 0x0041;
  35. const XLS_Type_CODEPAGE = 0x0042;
  36. const XLS_Type_DEFCOLWIDTH = 0x0055;
  37. const XLS_Type_OBJ = 0x005d;
  38. const XLS_Type_COLINFO = 0x007d;
  39. const XLS_Type_IMDATA = 0x007f;
  40. const XLS_Type_SHEETPR = 0x0081;
  41. const XLS_Type_HCENTER = 0x0083;
  42. const XLS_Type_VCENTER = 0x0084;
  43. const XLS_Type_SHEET = 0x0085;
  44. const XLS_Type_PALETTE = 0x0092;
  45. const XLS_Type_SCL = 0x00a0;
  46. const XLS_Type_PAGESETUP = 0x00a1;
  47. const XLS_Type_MULRK = 0x00bd;
  48. const XLS_Type_MULBLANK = 0x00be;
  49. const XLS_Type_DBCELL = 0x00d7;
  50. const XLS_Type_XF = 0x00e0;
  51. const XLS_Type_MERGEDCELLS = 0x00e5;
  52. const XLS_Type_MSODRAWINGGROUP = 0x00eb;
  53. const XLS_Type_MSODRAWING = 0x00ec;
  54. const XLS_Type_SST = 0x00fc;
  55. const XLS_Type_LABELSST = 0x00fd;
  56. const XLS_Type_EXTSST = 0x00ff;
  57. const XLS_Type_EXTERNALBOOK = 0x01ae;
  58. const XLS_Type_DATAVALIDATIONS = 0x01b2;
  59. const XLS_Type_TXO = 0x01b6;
  60. const XLS_Type_HYPERLINK = 0x01b8;
  61. const XLS_Type_DATAVALIDATION = 0x01be;
  62. const XLS_Type_DIMENSION = 0x0200;
  63. const XLS_Type_BLANK = 0x0201;
  64. const XLS_Type_NUMBER = 0x0203;
  65. const XLS_Type_LABEL = 0x0204;
  66. const XLS_Type_BOOLERR = 0x0205;
  67. const XLS_Type_STRING = 0x0207;
  68. const XLS_Type_ROW = 0x0208;
  69. const XLS_Type_INDEX = 0x020b;
  70. const XLS_Type_ARRAY = 0x0221;
  71. const XLS_Type_DEFAULTROWHEIGHT = 0x0225;
  72. const XLS_Type_WINDOW2 = 0x023e;
  73. const XLS_Type_RK = 0x007e;
  74. const XLS_Type_RK2 = 0x027e;
  75. const XLS_Type_STYLE = 0x0293;
  76. const XLS_Type_FORMAT = 0x041e;
  77. const XLS_Type_SHAREDFMLA = 0x04bc;
  78. const XLS_Type_BOF = 0x0809;
  79. const XLS_Type_SHEETPROTECTION = 0x0867;
  80. const XLS_Type_RANGEPROTECTION = 0x0868;
  81. const XLS_Type_SHEETLAYOUT = 0x0862;
  82. const XLS_Type_XFEXT = 0x087d;
  83. const XLS_Type_PAGELAYOUTVIEW = 0x088b;
  84. const XLS_Type_UNKNOWN = 0xffff;
  85. // Encryption type
  86. const MS_BIFF_CRYPTO_NONE = 0;
  87. const MS_BIFF_CRYPTO_XOR = 1;
  88. const MS_BIFF_CRYPTO_RC4 = 2;
  89. // Size of stream blocks when using RC4 encryption
  90. const REKEY_BLOCK = 0x400;
  91. private $_pos;
  92. private $_data;
  93. private $_cell;
  94. private $_sst;
  95. private $_sheets;
  96. private $_dataSize;
  97. private $_codepage = 'CP1252';
  98. private $index = 0;
  99. private $curretSheet = 0;
  100. private $dateFormats = array (
  101. 0xe => "m/d/Y",
  102. 0xf => "M-d-Y",
  103. 0x10 => "d-M",
  104. 0x11 => "M-Y",
  105. 0x12 => "h:i a",
  106. 0x13 => "h:i:s a",
  107. 0x14 => "H:i",
  108. 0x15 => "H:i:s",
  109. 0x16 => "d/m/Y H:i",
  110. 0x2d => "i:s",
  111. 0x2e => "H:i:s",
  112. 0x2f => "i:s.S"
  113. );
  114. private $numberFormats = array(
  115. 0x1 => "0",
  116. 0x2 => "0.00",
  117. 0x3 => "#,##0",
  118. 0x4 => "#,##0.00",
  119. 0x5 => "\$#,##0;(\$#,##0)",
  120. 0x6 => "\$#,##0;[Red](\$#,##0)",
  121. 0x7 => "\$#,##0.00;(\$#,##0.00)",
  122. 0x8 => "\$#,##0.00;[Red](\$#,##0.00)",
  123. 0x9 => "0%",
  124. 0xa => "0.00%",
  125. 0xb => "0.00E+00",
  126. 0x25 => "#,##0;(#,##0)",
  127. 0x26 => "#,##0;[Red](#,##0)",
  128. 0x27 => "#,##0.00;(#,##0.00)",
  129. 0x28 => "#,##0.00;[Red](#,##0.00)",
  130. 0x29 => "#,##0;(#,##0)", // Not exactly
  131. 0x2a => "\$#,##0;(\$#,##0)", // Not exactly
  132. 0x2b => "#,##0.00;(#,##0.00)", // Not exactly
  133. 0x2c => "\$#,##0.00;(\$#,##0.00)", // Not exactly
  134. 0x30 => "##0.0E+0"
  135. );
  136. public $error = false;
  137. /**
  138. * Create a new Spreadsheet_Excel_Reader instance
  139. */
  140. public function __construct($file) {
  141. $this->_loadOLE($file); // Read the OLE file
  142. }
  143. /**
  144. * Changes sheet to another.
  145. * @param bool
  146. */
  147. public function ChangeSheet($index){
  148. $this->curretSheet = $index;
  149. return true;
  150. }
  151. /**
  152. * 获取Cell数据
  153. */
  154. public function getCell(){
  155. $this->_cell = array();
  156. $this->_endRow = false;
  157. $this->_key = null;
  158. if( ! $this->_parse){
  159. $this->_parse = true;
  160. $this->_pos = 0;
  161. // Parse Workbook Global Substream
  162. while ($this->_pos < $this->_dataSize) {
  163. $code = self::_GetInt2d($this->_data, $this->_pos);
  164. switch ($code) {
  165. case self::XLS_Type_SST: $this->_readSst(); break;
  166. case self::XLS_Type_CODEPAGE: $this->_readCodepage(); break;
  167. case self::XLS_Type_DATEMODE: $this->_readDateMode(); break;
  168. case self::XLS_Type_FORMAT: $this->_readFormat(); break;
  169. case self::XLS_Type_XF: $this->_readXf(); break;
  170. case self::XLS_Type_EOF: $this->_readDefault(); break 2;
  171. default: $this->_readDefault(); break;
  172. }
  173. }
  174. }
  175. // Parse the individual sheet
  176. $this->_pos = $this->_lastPos ? $this->_lastPos : $this->_sheets[$this->curretSheet]['offset'];
  177. while ($this->_pos <= $this->_dataSize - 4) {
  178. if($this->_endRow) break;
  179. $code = self::_GetInt2d($this->_data, $this->_pos);
  180. switch ($code) {
  181. case self::XLS_Type_RK:
  182. case self::XLS_Type_RK2: $this->_readRk(); break;
  183. case self::XLS_Type_LABELSST: $this->_readLabelSst(); break;
  184. case self::XLS_Type_MULRK: $this->_readMulRk(); break;
  185. case self::XLS_Type_NUMBER: $this->_readNumber(); break;
  186. case self::XLS_Type_FORMULA:
  187. case self::XLS_Type_FORMULA2: $this->_readFormula(); break;
  188. case self::XLS_Type_BOOLERR: $this->_readBoolErr(); break;
  189. case self::XLS_Type_STRING: $this->_readString(); break;
  190. case self::XLS_Type_MULBLANK: $this->_readBlank(); break;
  191. case self::XLS_Type_LABEL: $this->_readLabel(); break;
  192. case self::XLS_Type_EOF: $this->_readDefault(); break 2;
  193. default: $this->_readDefault(); break;
  194. }
  195. }
  196. return $this->_cell;
  197. }
  198. /**
  199. * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
  200. */
  201. public function getWorksheetInfo() {
  202. if( ! $this->_sheets){
  203. $this->_dataSize = strlen($this->_data); // total byte size of Excel data (workbook global substream + sheet substreams)
  204. $this->_pos = 0;
  205. $this->_sheets = array();
  206. // Parse Workbook Global Substream
  207. while ($this->_pos < $this->_dataSize) {
  208. $code = self::_GetInt2d($this->_data, $this->_pos);
  209. switch ($code) {
  210. case self::XLS_Type_BOF: $this->_readBof(); break;
  211. case self::XLS_Type_SHEET: $this->_readSheet(); break;
  212. case self::XLS_Type_EOF: $this->_readDefault(); break 2;
  213. default: $this->_readDefault(); break;
  214. }
  215. }
  216. }
  217. if( ! isset($this->_sheets[$this->curretSheet])){
  218. return array();
  219. }
  220. $sheetInfo = array(
  221. 'worksheetName' => $this->_sheets[$this->curretSheet]['name'],
  222. 'lastColumnLetter' => 'A',
  223. 'lastColumnIndex' => 0,
  224. 'totalRows' => 0,
  225. 'totalColumns' => 0
  226. );
  227. // Parse the individual sheet
  228. $this->_pos = $this->_sheets[$this->curretSheet]['offset'];
  229. while ($this->_pos <= $this->_dataSize - 4) {
  230. $code = self::_GetInt2d($this->_data, $this->_pos);
  231. switch ($code) {
  232. case self::XLS_Type_DIMENSION:
  233. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  234. $this->_pos += 4;
  235. if ($length == 10 && $this->_version == self::XLS_BIFF7) {
  236. $sheetInfo['totalRows'] = self::_GetInt2d($this->_data, $this->_pos + 2);
  237. $sheetInfo['totalColumns'] = self::_GetInt2d($this->_data, $this->_pos + 6);
  238. }
  239. else{
  240. $sheetInfo['totalRows'] = self::_GetInt2d($this->_data, $this->_pos + 4);
  241. $sheetInfo['totalColumns'] = self::_GetInt2d($this->_data, $this->_pos + 10);
  242. }
  243. break 2;
  244. default:
  245. $this->_readDefault();
  246. break;
  247. }
  248. }
  249. if ($sheetInfo['totalColumns']) {
  250. $sheetInfo['lastColumnIndex'] = $sheetInfo['totalColumns'] - 1;
  251. }
  252. $sheetInfo['lastColumnLetter'] = self::_stringFromColumnIndex($sheetInfo['lastColumnIndex']);
  253. return $sheetInfo;
  254. }
  255. private function _addCell($row, $column, $value, $format){
  256. if(is_null($this->_key)){
  257. $this->_key = $row;
  258. }
  259. if($row > $this->_key){
  260. $this->_endRow = true;
  261. return false;
  262. }
  263. switch ($format) {
  264. case 'NULL':
  265. $_value = $value;
  266. break;
  267. case 'STRING2':
  268. case 'STRING':
  269. case 'INLINE':
  270. $value = substr($value, 0, 32767);
  271. $_value = str_replace(array("\r\n", "\r"), "\n", $value);
  272. break;
  273. case 'NUMERIC':
  274. $_value = (float)$value;
  275. break;
  276. case 'FORMULA':
  277. $_value = (string)$value;
  278. break;
  279. case 'BOOL':
  280. $_value = (bool)$value;
  281. break;
  282. case 'ERROR':
  283. $_errorCodes = array(
  284. '#NULL!' => 0,
  285. '#DIV/0!' => 1,
  286. '#VALUE!' => 2,
  287. '#REF!' => 3,
  288. '#NAME?' => 4,
  289. '#NUM!' => 5,
  290. '#N/A' => 6
  291. );
  292. $_value = (string)$value;
  293. if ( ! array_key_exists($_value, $_errorCodes)) {
  294. $_value = '#NULL!';
  295. }
  296. break;
  297. default:
  298. $_value = '#NULL!';
  299. break;
  300. }
  301. $this->_lastPos = $this->_pos;
  302. $this->_cell[$column] = $_value;
  303. }
  304. /**
  305. * Use OLE reader to extract the relevant data streams from the OLE file
  306. */
  307. private function _loadOLE($file) {
  308. self::_loadClass();
  309. $ole = new PHPExcel_Shared_OLERead(); // OLE reader
  310. $res = $ole->read($file); // get excel data
  311. if ($ole->error) {
  312. $this->error = true;
  313. return false;
  314. }
  315. $this->_data = $ole->getStream($ole->workbook); // Get workbook data: workbook stream + sheet streams
  316. }
  317. private function _getFormatDetail($data, $pos, $value, $column){
  318. $xfIndex = self::_GetInt2d($data, $pos + 4);
  319. $xfRecord = $this->xfRecords[$xfIndex];
  320. $type = $xfRecord['type'];
  321. $format = $xfRecord['format'];
  322. $formatIndex = $xfRecord['formatIndex'];
  323. if ($type == 'date') {
  324. $_type = 'STRING';
  325. // Convert numeric value into a date
  326. $utcDays = floor($value - ($this->_excelBaseDate == 1904 ? 24107 : 25569));
  327. $utcValue = $utcDays * 86400;
  328. $keys = array('seconds','minutes','hours','mday','wday','mon','year','yday','weekday','month',0);
  329. $datas = explode(":", gmdate('s:i:G:j:w:n:Y:z:l:F:U', $utcValue));
  330. foreach ($keys as $key => $value) {
  331. $dateInfo[$value] = $datas[$key];
  332. }
  333. $fractionalDay = $value - floor($value) + .0000001; // The .0000001 is to fix for php/excel fractional diffs
  334. $totalSeconds = floor(86400 * $fractionalDay);
  335. $secs = $totalSeconds % 60;
  336. $totalSeconds -= $secs;
  337. $hours = floor($totalSeconds / (60 * 60));
  338. $mins = floor($totalSeconds / 60) % 60;
  339. $_value = date ($format, mktime($hours, $mins, $secs, $dateInfo["mon"], $dateInfo["mday"], $dateInfo["year"]));
  340. }
  341. else if ($type == 'number') {
  342. $_type = 'NUMERIC';
  343. $_value = $this->_format_value($format, $value, $formatIndex);
  344. }
  345. else {
  346. $_type = 'STRING';
  347. $_value = $this->_format_value("%s", $value, $formatIndex);
  348. }
  349. return array(
  350. 'value' => $_value,
  351. 'type' => $_type
  352. );
  353. }
  354. private function _format_value($format, $value, $formatIndex){
  355. // 49==TEXT format
  356. if ( ( ! $formatIndex && $format == "%s") || ($formatIndex == 49) || ($format == "GENERAL") ) {
  357. return $value;
  358. }
  359. // Custom pattern can be POSITIVE;NEGATIVE;ZERO
  360. // The "text" option as 4th parameter is not handled
  361. $parts = explode(";", $format);
  362. $pattern = $parts[0];
  363. if (count($parts) > 2 && $value == 0) { // Negative pattern
  364. $pattern = $parts[2];
  365. }
  366. else if (count($parts) > 1 && $value < 0) { // Zero pattern
  367. $pattern = $parts[1];
  368. $value = abs($value);
  369. }
  370. // In Excel formats, "_" is used to add spacing, which we can't do in HTML
  371. $pattern = preg_replace("/_./", "", $pattern);
  372. // Some non-number characters are escaped with \, which we don't need
  373. $pattern = preg_replace("/\\\/", "", $pattern);
  374. // Some non-number strings are quoted, so we'll get rid of the quotes
  375. $pattern = preg_replace("/\"/", "", $pattern);
  376. // TEMPORARY - Convert # to 0
  377. $pattern = preg_replace("/\#/", "0", $pattern);
  378. // Find out if we need comma formatting
  379. $has_commas = preg_match("/,/", $pattern);
  380. if ($has_commas) {
  381. $pattern = preg_replace("/,/", "", $pattern);
  382. }
  383. // Handle Percentages
  384. if (preg_match("/\d(\%)([^\%]|$)/", $pattern, $matches)) {
  385. $value = $value * 100;
  386. $pattern = preg_replace("/(\d)(\%)([^\%]|$)/", "$1%$3", $pattern);
  387. }
  388. // Handle the number itself
  389. $number_regex = "/(\d+)(\.?)(\d*)/";
  390. if (preg_match($number_regex,$pattern,$matches)) {
  391. $left = $matches[1];
  392. $dec = $matches[2];
  393. $right = $matches[3];
  394. if ($has_commas) {
  395. $formatted = number_format($value,strlen($right));
  396. }
  397. else {
  398. $sprintf_pattern = "%1.".strlen($right)."f";
  399. $formatted = sprintf($sprintf_pattern, $value);
  400. }
  401. $pattern = preg_replace($number_regex, $formatted, $pattern);
  402. }
  403. return $pattern;
  404. }
  405. /**
  406. * Read BOF
  407. */
  408. private function _readBof() {
  409. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  410. $recordData = substr($this->_data, $this->_pos + 4, $length);
  411. $this->_pos += 4 + $length; // move stream pointer to next record
  412. $substreamType = self::_GetInt2d($recordData, 2); // offset: 2; size: 2; type of the following data
  413. switch ($substreamType) {
  414. case self::XLS_WorkbookGlobals:
  415. $version = self::_GetInt2d($recordData, 0);
  416. if (($version != self::XLS_BIFF8) && ($version != self::XLS_BIFF7)) {
  417. die('Cannot read this Excel file. Version is too old.');
  418. }
  419. $this->_version = $version;
  420. break;
  421. case self::XLS_Worksheet:
  422. // do not use this version information for anything
  423. // it is unreliable (OpenOffice doc, 5.8), use only version information from the global stream
  424. break;
  425. default:
  426. // substream, e.g. chart. just skip the entire substream
  427. do {
  428. $code = self::_GetInt2d($this->_data, $this->_pos);
  429. $this->_readDefault();
  430. } while ($code != self::XLS_Type_EOF && $this->_pos < $this->_dataSize);
  431. break;
  432. }
  433. }
  434. /**
  435. * Read Sheet
  436. */
  437. private function _readSheet() {
  438. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  439. $recordData = substr($this->_data, $this->_pos + 4, $length);
  440. $rec_offset = self::_GetInt4d($this->_data, $this->_pos + 4); // offset: 0; size: 4; absolute stream position of the BOF record of the sheet
  441. $this->_pos += 4 + $length; // move stream pointer to next record
  442. // offset: 6; size: var; sheet name
  443. if ($this->_version == self::XLS_BIFF8) {
  444. $string = self::_readUnicodeStringShort(substr($recordData, 6));
  445. $rec_name = $string['value'];
  446. } elseif ($this->_version == self::XLS_BIFF7) {
  447. $string = self::_readByteStringShort(substr($recordData, 6));
  448. $rec_name = $string['value'];
  449. }
  450. $this->_sheets[] = array(
  451. 'name' => $rec_name,
  452. 'offset' => $rec_offset
  453. );
  454. }
  455. /**
  456. * Reads a general type of BIFF record. Does nothing except for moving stream pointer forward to next record.
  457. */
  458. private function _readDefault() {
  459. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  460. $this->_pos += 4 + $length; // move stream pointer to next record
  461. }
  462. /**
  463. * CODEPAGE
  464. *
  465. * This record stores the text encoding used to write byte
  466. * strings, stored as MS Windows code page identifier.
  467. */
  468. private function _readCodepage() {
  469. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  470. $recordData = substr($this->_data, $this->_pos + 4, $length);
  471. $this->_pos += 4 + $length;
  472. $codepage = self::_GetInt2d($recordData, 0);
  473. $this->_codepage = self::NumberToName($codepage);
  474. }
  475. /**
  476. * DATEMODE
  477. *
  478. * This record specifies the base date for displaying date values. All dates are stored as count of days past this base date.
  479. * In BIFF2-BIFF4 this record is part of the Calculation Settings Block. In BIFF5-BIFF8 it is stored in the Workbook Globals Substream.
  480. */
  481. private function _readDateMode() {
  482. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  483. $recordData = substr($this->_data, $this->_pos + 4, $length);
  484. $this->_pos += 4 + $length;
  485. if (ord($recordData{0}) == 1) {
  486. $this->_excelBaseDate = 1904;
  487. }
  488. else{
  489. $this->_excelBaseDate = 1900;
  490. }
  491. }
  492. /**
  493. * data format
  494. */
  495. private function _readFormat(){
  496. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  497. $indexCode = self::_GetInt2d($this->_data, $this->_pos + 4);
  498. if ($this->_version == self::XLS_BIFF8) {
  499. $numchars = self::_GetInt2d($this->_data, $this->_pos + 6);
  500. if (ord($this->_data[$this->_pos + 8]) == 0){
  501. $formatString = substr($this->_data, $this->_pos + 9, $numchars);
  502. } else {
  503. $formatString = substr($this->_data, $this->_pos + 9, $numchars*2);
  504. }
  505. }
  506. else {
  507. $numchars = ord($this->_data[$this->_pos + 6]);
  508. $formatString = substr($this->_data, $this->_pos + 7, $numchars*2);
  509. }
  510. $this->formatRecords[$indexCode] = $formatString;
  511. $this->_pos += 4 + $length;
  512. }
  513. /**
  514. * XF - Extended Format
  515. *
  516. * This record contains formatting information for cells, rows, columns or styles.
  517. * According to http://support.microsoft.com/kb/147732 there are always at least 15 cell style XF and 1 cell XF.
  518. * Inspection of Excel files generated by MS Office Excel shows that XF records 0-14 are cell style XF and XF record 15 is a cell XF
  519. * We only read the first cell style XF and skip the remaining cell style XF records
  520. */
  521. private function _readXf() {
  522. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  523. $indexCode = self::_GetInt2d($this->_data, $this->_pos + 6);
  524. $this->_pos += 4 + $length;
  525. $xf = array('formatIndex' => $indexCode);
  526. if (isset($this->dateFormats[$indexCode])) {
  527. $xf['type'] = 'date';
  528. $xf['format'] = $this->dateFormats[$indexCode];
  529. }
  530. elseif (isset($this->numberFormats[$indexCode])) {
  531. $xf['type'] = 'number';
  532. $xf['format'] = $this->numberFormats[$indexCode];
  533. }
  534. else {
  535. if ($indexCode > 0){
  536. if (isset($this->formatRecords[$indexCode])) {
  537. $formatStr = $this->formatRecords[$indexCode];
  538. }
  539. if ($formatStr) {
  540. $tmp = preg_replace("/\;.*/", "", $formatStr);
  541. $tmp = preg_replace("/^\[[^\]]*\]/", "", $tmp);
  542. if (preg_match("/[^hmsday\/\-:\s\\\,AMP]/i", $tmp) == 0) { // found day and time format
  543. $isDate = TRUE;
  544. $formatStr = $tmp;
  545. $formatStr = str_replace(array('AM/PM','mmmm','mmm'), array('a','F','M'), $formatStr);
  546. // m/mm are used for both minutes and months - oh SNAP!
  547. // This mess tries to fix for that.
  548. // 'm' == minutes only if following h/hh or preceding s/ss
  549. $formatstr = preg_replace("/(h:?)mm?/","$1i", $formatStr);
  550. $formatstr = preg_replace("/mm?(:?s)/","i$1", $formatStr);
  551. // A single 'm' = n in PHP
  552. $formatStr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatStr);
  553. $formatStr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatStr);
  554. // else it's months
  555. $formatStr = str_replace('mm', 'm', $formatStr);
  556. // Convert single 'd' to 'j'
  557. $formatStr = preg_replace("/(^|[^d])d([^d]|$)/", '$1j$2', $formatStr);
  558. $formatStr = str_replace(array('dddd','ddd','dd','yyyy','yy','hh','h'), array('l','D','d','Y','y','H','g'), $formatStr);
  559. $formatStr = preg_replace("/ss?/", 's', $formatStr);
  560. }
  561. }
  562. }
  563. if ($isDate){
  564. $xf['type'] = 'date';
  565. $xf['format'] = $formatStr;
  566. }else{
  567. if (preg_match("/[0#]/", $formatStr)) {
  568. $xf['type'] = 'number';
  569. $xf['format'] = $formatStr;
  570. }
  571. else {
  572. $xf['type'] = 'other';
  573. $xf['format'] = 0;
  574. }
  575. }
  576. }
  577. $this->xfRecords[] = $xf;
  578. }
  579. /**
  580. * SST - Shared String Table
  581. *
  582. * This record contains a list of all strings used anywherein the workbook. Each string occurs only once. The
  583. * workbook uses indexes into the list to reference the strings.
  584. **/
  585. private function _readSst() {
  586. $pos = 0; // offset within (spliced) record data
  587. $splicedRecordData = $this->_getSplicedRecordData(); // get spliced record data
  588. $recordData = $splicedRecordData['recordData'];
  589. $spliceOffsets = $splicedRecordData['spliceOffsets'];
  590. $pos += 4; // offset: 0; size: 4; total number of strings in the workbook
  591. $nm = self::_GetInt4d($recordData, 4); // offset: 4; size: 4; number of following strings ($nm)
  592. $pos += 4;
  593. for ($i = 0; $i < $nm; ++$i) { // loop through the Unicode strings (16-bit length)
  594. $numChars = self::_GetInt2d($recordData, $pos); // number of characters in the Unicode string
  595. $pos += 2;
  596. $optionFlags = ord($recordData{$pos}); // option flags
  597. ++$pos;
  598. $isCompressed = (($optionFlags & 0x01) == 0) ; // bit: 0; mask: 0x01; 0 = compressed; 1 = uncompressed
  599. $hasAsian = (($optionFlags & 0x04) != 0); // bit: 2; mask: 0x02; 0 = ordinary; 1 = Asian phonetic
  600. $hasRichText = (($optionFlags & 0x08) != 0); // bit: 3; mask: 0x03; 0 = ordinary; 1 = Rich-Text
  601. if ($hasRichText) {
  602. $formattingRuns = self::_GetInt2d($recordData, $pos); // number of Rich-Text formatting runs
  603. $pos += 2;
  604. }
  605. if ($hasAsian) {
  606. $extendedRunLength = self::_GetInt4d($recordData, $pos); // size of Asian phonetic setting
  607. $pos += 4;
  608. }
  609. $len = ($isCompressed) ? $numChars : $numChars * 2; // expected byte length of character array if not split
  610. foreach ($spliceOffsets as $spliceOffset) { // look up limit position
  611. if ($pos <= $spliceOffset) { // it can happen that the string is empty, therefore we need. <= and not just <
  612. $limitpos = $spliceOffset;
  613. break;
  614. }
  615. }
  616. if ($pos + $len <= $limitpos) {
  617. $retstr = substr($recordData, $pos, $len); // character array is not split between records
  618. $pos += $len;
  619. } else {
  620. $retstr = substr($recordData, $pos, $limitpos - $pos); // character array is split between records. first part of character array
  621. $bytesRead = $limitpos - $pos;
  622. $charsLeft = $numChars - (($isCompressed) ? $bytesRead : ($bytesRead / 2)); // remaining characters in Unicode string
  623. $pos = $limitpos;
  624. // keep reading the characters
  625. while ($charsLeft > 0) {
  626. // look up next limit position, in case the string span more than one continue record
  627. foreach ($spliceOffsets as $spliceOffset) {
  628. if ($pos < $spliceOffset) {
  629. $limitpos = $spliceOffset;
  630. break;
  631. }
  632. }
  633. // repeated option flags. OpenOffice.org documentation 5.21
  634. $option = ord($recordData{$pos});
  635. ++$pos;
  636. if ($isCompressed && ($option == 0)) {
  637. // 1st fragment compressed. this fragment compressed
  638. $len = min($charsLeft, $limitpos - $pos);
  639. $retstr .= substr($recordData, $pos, $len);
  640. $charsLeft -= $len;
  641. $isCompressed = true;
  642. } elseif (!$isCompressed && ($option != 0)) {
  643. // 1st fragment uncompressed. this fragment uncompressed
  644. $len = min($charsLeft * 2, $limitpos - $pos);
  645. $retstr .= substr($recordData, $pos, $len);
  646. $charsLeft -= $len / 2;
  647. $isCompressed = false;
  648. } elseif (!$isCompressed && ($option == 0)) {
  649. // 1st fragment uncompressed. this fragment compressed
  650. $len = min($charsLeft, $limitpos - $pos);
  651. for ($j = 0; $j < $len; ++$j) {
  652. $retstr .= $recordData{$pos + $j} . chr(0);
  653. }
  654. $charsLeft -= $len;
  655. $isCompressed = false;
  656. } else {
  657. // 1st fragment compressed. this fragment uncompressed
  658. $newstr = '';
  659. for ($j = 0; $j < strlen($retstr); ++$j) {
  660. $newstr .= $retstr[$j] . chr(0);
  661. }
  662. $retstr = $newstr;
  663. $len = min($charsLeft * 2, $limitpos - $pos);
  664. $retstr .= substr($recordData, $pos, $len);
  665. $charsLeft -= $len / 2;
  666. $isCompressed = false;
  667. }
  668. $pos += $len;
  669. }
  670. }
  671. $retstr = self::_encodeUTF16($retstr, $isCompressed); // convert to UTF-8
  672. $fmtRuns = array(); // read additional Rich-Text information, if any
  673. if ($hasRichText) {
  674. // list of formatting runs
  675. for ($j = 0; $j < $formattingRuns; ++$j) {
  676. $charPos = self::_GetInt2d($recordData, $pos + $j * 4); // first formatted character; zero-based
  677. $fontIndex = self::_GetInt2d($recordData, $pos + 2 + $j * 4); // index to font record
  678. $fmtRuns[] = array(
  679. 'charPos' => $charPos,
  680. 'fontIndex' => $fontIndex
  681. );
  682. }
  683. $pos += 4 * $formattingRuns;
  684. }
  685. // read additional Asian phonetics information, if any
  686. if ($hasAsian) {
  687. $pos += $extendedRunLength; // For Asian phonetic settings, we skip the extended string data
  688. }
  689. // store the shared sting
  690. $this->_sst[] = array(
  691. 'value' => $retstr,
  692. 'fmtRuns' => $fmtRuns
  693. );
  694. }
  695. }
  696. /**
  697. * Read RK record
  698. * This record represents a cell that contains an RK value (encoded integer or floating-point value). If a floating-point value
  699. * cannot be encoded to an RK value, a NUMBER record will be written. This record replaces the record INTEGER written in BIFF2.
  700. */
  701. private function _readRk() {
  702. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  703. $recordData = substr($this->_data, $this->_pos + 4, $length);
  704. $this->_pos += 4 + $length;
  705. $row = self::_GetInt2d($recordData, 0);
  706. $column = self::_GetInt2d($recordData, 2);
  707. $rknum = self::_GetInt4d($recordData, 6);
  708. $numValue = self::_GetIEEE754($rknum);
  709. // add cell
  710. $this->_addCell($row, $column, $numValue, 'NUMERIC');
  711. }
  712. /**
  713. * Read LABELSST record This record represents a cell that contains a string. It
  714. * replaces the LABEL record and RSTRING record used in BIFF2-BIFF5.
  715. */
  716. private function _readLabelSst() {
  717. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  718. $recordData = substr($this->_data, $this->_pos + 4, $length);
  719. $this->_pos += 4 + $length;
  720. $row = self::_GetInt2d($recordData, 0);
  721. $column = self::_GetInt2d($recordData, 2);
  722. // offset: 6; size: 4; index to SST record
  723. $index = self::_GetInt4d($recordData, 6);
  724. $this->_addCell($row, $column, $this->_sst[$index]['value'], 'STRING');
  725. }
  726. /**
  727. * Read MULRK record
  728. * This record represents a cell range containing RK value cells. All cells are located in the same row.
  729. */
  730. private function _readMulRk() {
  731. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  732. $recordData = substr($this->_data, $this->_pos + 4, $length);
  733. $this->_pos += 4 + $length;
  734. $row = self::_GetInt2d($recordData, 0);
  735. $colFirst = self::_GetInt2d($recordData, 2);
  736. $colLast = self::_GetInt2d($recordData, $length - 2);
  737. $columns = $colLast - $colFirst + 1;
  738. // offset within record data
  739. $offset = 4;
  740. for ($i = 0; $i < $columns; ++$i) {
  741. $numValue = self::_GetIEEE754(self::_GetInt4d($recordData, $offset + 2));
  742. $info = $this->_getFormatDetail($recordData, $offset - 4, $numValue, $colFirst + $i + 1);
  743. $this->_addCell($row, $colFirst + $i, $info['value'], $info['type']);
  744. $offset += 6;
  745. }
  746. }
  747. /**
  748. * Read NUMBER record
  749. * This record represents a cell that contains a floating-point value.
  750. */
  751. private function _readNumber() {
  752. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  753. $recordData = substr($this->_data, $this->_pos + 4, $length);
  754. $this->_pos += 4 + $length;
  755. $row = self::_GetInt2d($recordData, 0);
  756. $column = self::_GetInt2d($recordData, 2);
  757. $numValue = self::_extractNumber(substr($recordData, 6, 8));
  758. $this->_addCell($row, $column, $numValue, 'NUMERIC');
  759. }
  760. /**
  761. * Read FORMULA record + perhaps a following STRING record if formula result is a string
  762. * This record contains the token array and the result of a formula cell.
  763. */
  764. private function _readFormula() {
  765. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  766. $recordData = substr($this->_data, $this->_pos + 4, $length);
  767. $this->_pos += 4 + $length;
  768. $row = self::_GetInt2d($recordData, 0);
  769. $column = self::_GetInt2d($recordData, 2);
  770. if ((ord($recordData{6}) == 0) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
  771. $this->_preRow = $row;
  772. $this->_preColumn = $column;
  773. return false;
  774. }
  775. elseif ((ord($recordData{6}) == 1) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
  776. // Boolean formula. Result is in +2; 0=false, 1=true
  777. $dataType = 'BOOL';
  778. $value = (bool) ord($recordData{8});
  779. }
  780. elseif ((ord($recordData{6}) == 2) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
  781. // Error formula. Error code is in +2
  782. $dataType = 'ERROR';
  783. $value = self::_mapErrorCode(ord($recordData{8}));
  784. }
  785. elseif ((ord($recordData{6}) == 3) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
  786. // Formula result is a null string
  787. $dataType = 'NULL';
  788. $value = '';
  789. }
  790. else {
  791. // forumla result is a number, first 14 bytes like _NUMBER record
  792. $dataType = 'NUMERIC';
  793. $value = self::_extractNumber(substr($recordData, 6, 8));
  794. }
  795. $this->_addCell($row, $column, $value, $dataType);
  796. }
  797. /**
  798. * Read a STRING record from current stream position and advance the stream pointer to next record
  799. * This record is used for storing result from FORMULA record when it is a string, and it occurs directly after the FORMULA record
  800. *
  801. * @return string The string contents as UTF-8
  802. */
  803. private function _readString() {
  804. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  805. $recordData = substr($this->_data, $this->_pos + 4, $length);
  806. $this->_pos += 4 + $length;
  807. if ($this->_version == self::XLS_BIFF8) {
  808. $string = self::_readUnicodeStringLong($recordData);
  809. $value = $string['value'];
  810. } else {
  811. $string = $this->_readByteStringLong($recordData);
  812. $value = $string['value'];
  813. }
  814. $this->_addCell($this->_preRow, $this->_preColumn, $value, 'STRING');
  815. }
  816. /**
  817. * Read BOOLERR record
  818. * This record represents a Boolean value or error value cell.
  819. */
  820. private function _readBoolErr() {
  821. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  822. $recordData = substr($this->_data, $this->_pos + 4, $length);
  823. $this->_pos += 4 + $length;
  824. $row = self::_GetInt2d($recordData, 0);
  825. $column = self::_GetInt2d($recordData, 2);
  826. // offset: 6; size: 1; the boolean value or error value
  827. $boolErr = ord($recordData{6});
  828. // offset: 7; size: 1; 0=boolean; 1=error
  829. $isError = ord($recordData{7});
  830. switch ($isError) {
  831. case 0: // boolean
  832. $value = (bool) $boolErr;
  833. // add cell value
  834. $this->_addCell($row, $column, $value, 'BOOL');
  835. break;
  836. case 1: // error type
  837. $value = self::_mapErrorCode($boolErr);
  838. // add cell value
  839. $this->_addCell($row, $column, $value, 'ERROR');
  840. break;
  841. }
  842. }
  843. /**
  844. * Read LABEL record
  845. * This record represents a cell that contains a string. In BIFF8 it is usually replaced by the LABELSST record.
  846. * Excel still uses this record, if it copies unformatted text cells to the clipboard.
  847. */
  848. private function _readLabel() {
  849. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  850. $recordData = substr($this->_data, $this->_pos + 4, $length);
  851. $this->_pos += 4 + $length;
  852. $row = self::_GetInt2d($recordData, 0);
  853. $column = self::_GetInt2d($recordData, 2);
  854. if ($this->_version == self::XLS_BIFF8) {
  855. $string = self::_readUnicodeStringLong(substr($recordData, 6));
  856. $value = $string['value'];
  857. } else {
  858. $string = $this->_readByteStringLong(substr($recordData, 6));
  859. $value = $string['value'];
  860. }
  861. $this->_addCell($row, $column, $value, 'STRING');
  862. }
  863. /**
  864. * Read BLANK record
  865. */
  866. private function _readBlank() {
  867. $length = self::_GetInt2d($this->_data, $this->_pos + 2);
  868. $recordData = substr($this->_data, $this->_pos + 4, $length);
  869. $this->_pos += 4 + $length;
  870. $row = self::_GetInt2d($recordData, 0);
  871. $column = self::_GetInt2d($recordData, 2);
  872. $this->_addCell($row, $column, '', 'NULL');
  873. }
  874. /**
  875. * Reads a record from current position in data stream and continues reading data as long as CONTINUE
  876. * records are found. Splices the record data pieces and returns the combined string as if record data is in one piece.
  877. * Moves to next current position in data stream to start of next record different from a CONtINUE record
  878. *
  879. * @return array
  880. */
  881. private function _getSplicedRecordData() {
  882. $data = '';
  883. $spliceOffsets = array();
  884. $i = 0;
  885. $spliceOffsets[0] = 0;
  886. do {
  887. ++$i;
  888. $identifier = self::_GetInt2d($this->_data, $this->_pos); // offset: 0; size: 2; identifier
  889. $length = self::_GetInt2d($this->_data, $this->_pos + 2); // offset: 2; size: 2; length
  890. $data .= substr($this->_data, $this->_pos + 4, $length);
  891. $spliceOffsets[$i] = $spliceOffsets[$i - 1] + $length;
  892. $this->_pos += 4 + $length;
  893. $nextIdentifier = self::_GetInt2d($this->_data, $this->_pos);
  894. } while ($nextIdentifier == self::XLS_Type_CONTINUE);
  895. $splicedData = array(
  896. 'recordData' => $data,
  897. 'spliceOffsets' => $spliceOffsets,
  898. );
  899. return $splicedData;
  900. }
  901. /**
  902. * Read byte string (16-bit string length)
  903. * OpenOffice documentation: 2.5.2
  904. *
  905. * @param string $subData
  906. * @return array
  907. */
  908. private function _readByteStringLong($subData) {
  909. // offset: 0; size: 2; length of the string (character count)
  910. $ln = self::_GetInt2d($subData, 0);
  911. // offset: 2: size: var; character array (8-bit characters)
  912. $value = $this->_decodeCodepage(substr($subData, 2));
  913. //return $string;
  914. return array(
  915. 'value' => $value,
  916. 'size' => 2 + $ln, // size in bytes of data structure
  917. );
  918. }
  919. /**
  920. * Map error code, e.g. '#N/A'
  921. *
  922. * @param int $subData
  923. * @return string
  924. */
  925. private static function _mapErrorCode($subData) {
  926. switch ($subData) {
  927. case 0x00: return '#NULL!'; break;
  928. case 0x07: return '#DIV/0!'; break;
  929. case 0x0F: return '#VALUE!'; break;
  930. case 0x17: return '#REF!'; break;
  931. case 0x1D: return '#NAME?'; break;
  932. case 0x24: return '#NUM!'; break;
  933. case 0x2A: return '#N/A'; break;
  934. default: return false;
  935. }
  936. }
  937. /**
  938. * Convert Microsoft Code Page Identifier to Code Page Name which iconv
  939. * and mbstring understands
  940. *
  941. * @param integer $codePage Microsoft Code Page Indentifier
  942. * @return string Code Page Name
  943. */
  944. private static function NumberToName($codePage = 1252) {
  945. switch ($codePage) {
  946. case 367: return 'ASCII'; break; // ASCII
  947. case 437: return 'CP437'; break; // OEM US
  948. //case 720: throw new PHPExcel_Exception('Code page 720 not supported.'); break; // OEM Arabic
  949. case 737: return 'CP737'; break; // OEM Greek
  950. case 775: return 'CP775'; break; // OEM Baltic
  951. case 850: return 'CP850'; break; // OEM Latin I
  952. case 852: return 'CP852'; break; // OEM Latin II (Central European)
  953. case 855: return 'CP855'; break; // OEM Cyrillic
  954. case 857: return 'CP857'; break; // OEM Turkish
  955. case 858: return 'CP858'; break; // OEM Multilingual Latin I with Euro
  956. case 860: return 'CP860'; break; // OEM Portugese
  957. case 861: return 'CP861'; break; // OEM Icelandic
  958. case 862: return 'CP862'; break; // OEM Hebrew
  959. case 863: return 'CP863'; break; // OEM Canadian (French)
  960. case 864: return 'CP864'; break; // OEM Arabic
  961. case 865: return 'CP865'; break; // OEM Nordic
  962. case 866: return 'CP866'; break; // OEM Cyrillic (Russian)
  963. case 869: return 'CP869'; break; // OEM Greek (Modern)
  964. case 874: return 'CP874'; break; // ANSI Thai
  965. case 932: return 'CP932'; break; // ANSI Japanese Shift-JIS
  966. case 936: return 'CP936'; break; // ANSI Chinese Simplified GBK
  967. case 949: return 'CP949'; break; // ANSI Korean (Wansung)
  968. case 950: return 'CP950'; break; // ANSI Chinese Traditional BIG5
  969. case 1200: return 'UTF-16LE'; break; // UTF-16 (BIFF8)
  970. case 1250: return 'CP1250'; break; // ANSI Latin II (Central European)
  971. case 1251: return 'CP1251'; break; // ANSI Cyrillic
  972. case 0: // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
  973. case 1252: return 'CP1252'; break; // ANSI Latin I (BIFF4-BIFF7)
  974. case 1253: return 'CP1253'; break; // ANSI Greek
  975. case 1254: return 'CP1254'; break; // ANSI Turkish
  976. case 1255: return 'CP1255'; break; // ANSI Hebrew
  977. case 1256: return 'CP1256'; break; // ANSI Arabic
  978. case 1257: return 'CP1257'; break; // ANSI Baltic
  979. case 1258: return 'CP1258'; break; // ANSI Vietnamese
  980. case 1361: return 'CP1361'; break; // ANSI Korean (Johab)
  981. case 10000: return 'MAC'; break; // Apple Roman
  982. case 10006: return 'MACGREEK'; break; // Macintosh Greek
  983. case 10007: return 'MACCYRILLIC'; break; // Macintosh Cyrillic
  984. case 10008: return 'CP936'; break; // Macintosh - Simplified Chinese (GB 2312)
  985. case 10029: return 'MACCENTRALEUROPE'; break; // Macintosh Central Europe
  986. case 10079: return 'MACICELAND'; break; // Macintosh Icelandic
  987. case 10081: return 'MACTURKISH'; break; // Macintosh Turkish
  988. case 32768: return 'MAC'; break; // Apple Roman
  989. //case 32769: throw new PHPExcel_Exception('Code page 32769 not supported.'); break; // ANSI Latin I (BIFF2-BIFF3)
  990. case 65000: return 'UTF-7'; break; // Unicode (UTF-7)
  991. case 65001: return 'UTF-8'; break; // Unicode (UTF-8)
  992. default: return 'UTF-8'; break;
  993. }
  994. }
  995. /**
  996. * String from columnindex
  997. *
  998. * @param int $pColumnIndex
  999. * @return string
  1000. */
  1001. private static function _stringFromColumnIndex($pColumnIndex = 0) {
  1002. static $_indexCache = array();
  1003. if ( ! isset($_indexCache[$pColumnIndex])) {
  1004. if ($pColumnIndex < 26) {
  1005. $_indexCache[$pColumnIndex] = chr(65 + $pColumnIndex);
  1006. } elseif ($pColumnIndex < 702) {
  1007. $_indexCache[$pColumnIndex] = chr(64 + ($pColumnIndex / 26)) . chr(65 + $pColumnIndex % 26);
  1008. } else {
  1009. $_indexCache[$pColumnIndex] = chr(64 + (($pColumnIndex - 26) / 676)) . chr(65 + ((($pColumnIndex - 26) % 676) / 26)) . chr(65 + $pColumnIndex % 26);
  1010. }
  1011. }
  1012. return $_indexCache[$pColumnIndex];
  1013. }
  1014. /**
  1015. * Extracts an Excel Unicode short string (8-bit string length)
  1016. * OpenOffice documentation: 2.5.3
  1017. * function will automatically find out where the Unicode string ends.
  1018. *
  1019. * @param string $subData
  1020. * @return array
  1021. */
  1022. private static function _readUnicodeStringShort($subData) {
  1023. $characterCount = ord($subData[0]); // offset: 0: size: 1; length of the string (character count)
  1024. $string = self::_readUnicodeString(substr($subData, 1), $characterCount);
  1025. $string['size'] += 1; // add 1 for the string length
  1026. return $string;
  1027. }
  1028. /**
  1029. * Read byte string (8-bit string length)
  1030. * OpenOffice documentation: 2.5.2
  1031. *
  1032. * @param string $subData
  1033. * @return array
  1034. */
  1035. private static function _readByteStringShort($subData) {
  1036. $ln = ord($subData[0]); // offset: 0; size: 1; length of the string (character count)
  1037. $value = self::_decodeCodepage(substr($subData, 1, $ln)); // offset: 1: size: var; character array (8-bit characters)
  1038. return array(
  1039. 'value' => $value,
  1040. 'size' => 1 + $ln, // size in bytes of data structure
  1041. );
  1042. }
  1043. /**
  1044. * Extracts an Excel Unicode long string (16-bit string length)
  1045. * OpenOffice documentation: 2.5.3. this function is under construction, needs to support rich text, and Asian phonetic settings
  1046. *
  1047. * @param string $subData
  1048. * @return array
  1049. */
  1050. private static function _readUnicodeStringLong($subData) {
  1051. $value = '';
  1052. // offset: 0: size: 2; length of the string (character count)
  1053. $characterCount = self::_GetInt2d($subData, 0);
  1054. $string = self::_readUnicodeString(substr($subData, 2), $characterCount);
  1055. // add 2 for the string length
  1056. $string['size'] += 2;
  1057. return $string;
  1058. }
  1059. /**
  1060. * Read Unicode string with no string length field, but with known character count
  1061. * this function is under construction, needs to support rich text, and Asian phonetic settings
  1062. * OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.3
  1063. *
  1064. * @param string $subData
  1065. * @param int $characterCount
  1066. * @return array
  1067. */
  1068. private static function _readUnicodeString($subData, $characterCount) {
  1069. $isCompressed = !((0x01 & ord($subData[0])) >> 0); // bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit)
  1070. $hasAsian = (0x04) & ord($subData[0]) >> 2; // bit: 2; mask: 0x04; Asian phonetic settings
  1071. $hasRichText = (0x08) & ord($subData[0]) >> 3; // bit: 3; mask: 0x08; Rich-Text settings
  1072. // offset: 1: size: var; character array
  1073. // this offset assumes richtext and Asian phonetic settings are off which is generally wrong
  1074. // needs to be fixed
  1075. $value = self::_encodeUTF16(substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed);
  1076. return array(
  1077. 'value' => $value,
  1078. 'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount, // the size in bytes including the option flags
  1079. );
  1080. }
  1081. /**
  1082. * Get UTF-8 string from (compressed or uncompressed) UTF-16 string
  1083. *
  1084. * @param string $string
  1085. * @param bool $compressed
  1086. * @return string
  1087. */
  1088. private static function _encodeUTF16($string, $compressed = '') {
  1089. if ($compressed) {
  1090. $string = self::_uncompressByteString($string);
  1091. }
  1092. return mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
  1093. }
  1094. /**
  1095. * Convert string to UTF-8. Only used for BIFF5.
  1096. *
  1097. * @param string $string
  1098. * @return string
  1099. */
  1100. private static function _decodeCodepage($string) {
  1101. return mb_convert_encoding($string, 'UTF-8', $this->_codepage);
  1102. }
  1103. /**
  1104. * Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8.
  1105. *
  1106. * @param string $string
  1107. * @return string
  1108. */
  1109. private static function _uncompressByteString($string) {
  1110. $uncompressedString = '';
  1111. $strLen = strlen($string);
  1112. for ($i = 0; $i < $strLen; ++$i) {
  1113. $uncompressedString .= $string[$i] . "\0";
  1114. }
  1115. return $uncompressedString;
  1116. }
  1117. /**
  1118. * Read 16-bit unsigned integer
  1119. *
  1120. * @param string $data
  1121. * @param int $pos
  1122. * @return int
  1123. */
  1124. private static function _GetInt2d($data, $pos) {
  1125. return ord($data[$pos]) | (ord($data[$pos+1]) << 8);
  1126. }
  1127. /**
  1128. * Read 32-bit signed integer
  1129. * FIX: represent numbers correctly on 64-bit system. Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
  1130. * http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
  1131. *
  1132. * @param string $data
  1133. * @param int $pos
  1134. * @return int
  1135. */
  1136. private static function _GetInt4d($data, $pos) {
  1137. $_or_24 = ord($data[$pos + 3]);
  1138. if ($_or_24 >= 128) {
  1139. $_ord_24 = -abs((256 - $_or_24) << 24); // negative number
  1140. } else {
  1141. $_ord_24 = ($_or_24 & 127) << 24;
  1142. }
  1143. return ord($data[$pos]) | (ord($data[$pos+1]) << 8) | (ord($data[$pos+2]) << 16) | $_ord_24;
  1144. }
  1145. /**
  1146. * Reads first 8 bytes of a string and return IEEE 754 float
  1147. *
  1148. * @param string $data Binary string that is at least 8 bytes long
  1149. * @return float
  1150. */
  1151. private static function _extractNumber($data) {
  1152. $rknumhigh = self::_GetInt4d($data, 4);
  1153. $rknumlow = self::_GetInt4d($data, 0);
  1154. $sign = ($rknumhigh & 0x80000000) >> 31;
  1155. $exp = (($rknumhigh & 0x7ff00000) >> 20) - 1023;
  1156. $mantissa = (0x100000 | ($rknumhigh & 0x000fffff));
  1157. $mantissalow1 = ($rknumlow & 0x80000000) >> 31;
  1158. $mantissalow2 = ($rknumlow & 0x7fffffff);
  1159. $value = $mantissa / pow( 2 , (20 - $exp));
  1160. if ($mantissalow1 != 0) {
  1161. $value += 1 / pow (2 , (21 - $exp));
  1162. }
  1163. $value += $mantissalow2 / pow (2 , (52 - $exp));
  1164. if ($sign) {
  1165. $value *= -1;
  1166. }
  1167. return $value;
  1168. }
  1169. private static function _GetIEEE754($rknum) {
  1170. if (($rknum & 0x02) != 0) {
  1171. $value = $rknum >> 2;
  1172. } else {
  1173. // changes by mmp, info on IEEE754 encoding from
  1174. // research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
  1175. // The RK format calls for using only the most significant 30 bits
  1176. // of the 64 bit floating point value. The other 34 bits are assumed
  1177. // to be 0 so we use the upper 30 bits of $rknum as follows...
  1178. $sign = ($rknum & 0x80000000) >> 31;
  1179. $exp = ($rknum & 0x7ff00000) >> 20;
  1180. $mantissa = (0x100000 | ($rknum & 0x000ffffc));
  1181. $value = $mantissa / pow( 2 , (20- ($exp - 1023)));
  1182. if ($sign) {
  1183. $value = -1 * $value;
  1184. }
  1185. //end of changes by mmp
  1186. }
  1187. if (($rknum & 0x01) != 0) {
  1188. $value /= 100;
  1189. }
  1190. return $value;
  1191. }
  1192. /**
  1193. * load OLERead class
  1194. */
  1195. private static function _loadClass() {
  1196. if ( ! class_exists('PHPExcel_Shared_OLERead', false)) {
  1197. require 'OLERead.php';
  1198. }
  1199. }
  1200. }