'#NULL!',
0x07 => '#DIV/0!',
0x0F => '#VALUE!',
0x17 => '#REF!',
0x1D => '#NAME?',
0x24 => '#NUM!',
0x2A => '#N/A'
];
/**
* Base calendar year to use for calculations
*
* @var int
*/
private static $excelBaseDate = Format::CALENDAR_WINDOWS_1900;
/**
* Decimal separator
*
* @var string
*/
private static $decimalSeparator;
/**
* Thousands separator
*
* @var string
*/
private static $thousandsSeparator;
/**
* Currency code
*
* @var string
*/
private static $currencyCode;
/**
* Workbook stream data
*
* @var string
*/
private $data;
/**
* Size in bytes of $this->data
*
* @var int
*/
private $dataSize;
/**
* Current position in stream
*
* @var integer
*/
private $pos;
/**
* Worksheets
*
* @var array
*/
private $sheets;
/**
* BIFF version
*
* @var int
*/
private $version;
/**
* Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
* For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'
*
* @var string
*/
private $codePage;
/**
* Row data
*
* @var array
*/
private $row;
/**
* Shared formats
*
* @var array
*/
private $formats;
/**
* The current sheet of the file
*
* @var int
*/
private $sheetIndex = 0;
/**
* Ignore empty row
*
* @var bool
*/
private $ignoreEmpty = false;
/**
* The current row index of the sheet
*
* @var int
*/
private $rowIndex = 0;
/**
* Max column number
*
* @var int
*/
private $columnLimit = 0;
/**
* Whether to the end of the row
*
* @var bool
*/
private $eor = false;
/**
* Extended format record
*
* @var array
*/
private $xfRecords = [];
/**
* Shared strings. Only applies to BIFF8.
*
* @var array
*/
private $sst = [];
/**
* The type of encryption in use
*
* @var int
*/
private $encryption = 0;
/**
* The position in the stream after which contents are encrypted
*
* @var int
*/
private $encryptionStartPos = false;
/**
* The current RC4 decryption object
*
* @var RC4
*/
private $rc4Key = null;
/**
* The position in the stream that the RC4 decryption object was left at
*
* @var int
*/
private $rc4Pos = 0;
/**
* The current MD5 context state
*
* @var string
*/
private $md5Ctxt = null;
/**
* Use OLE reader to extract the relevant data streams from the OLE file
*
* @param string $file
*/
public function loadOLE($file) {
$oleRead = new OLERead();
$oleRead->read($file);
$this->data = $oleRead->getStream($oleRead->workbook);
}
/**
* Ignore empty row
*
* @param bool $ignoreEmpty
*
* @return $this
*/
public function ignoreEmptyRow($ignoreEmpty) {
$this->ignoreEmpty = $ignoreEmpty;
return $this;
}
/**
* Whether is ignore empty row
*
* @return bool
*/
public function isIgnoreEmptyRow() {
return $this->ignoreEmpty;
}
/**
* Set sheet index
*
* @param int $index
*
* @return $this
*/
public function setSheetIndex($index) {
$this->sheetIndex = $index;
return $this;
}
/**
* Get sheet index
*
* @return int
*/
public function getSheetIndex() {
return $this->sheetIndex;
}
/**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
*
* @throws ParserException
* @return array
*/
public function parseWorksheetInfo() {
if ($this->sheets === null) {
// total byte size of Excel data (workbook global substream + sheet substreams)
$this->dataSize = strlen($this->data);
$this->pos = 0;
$this->codePage = 'CP1252';
$this->sheets = [];
// Parse Workbook Global Substream
while ($this->pos < $this->dataSize) {
$code = Format::getUInt2d($this->data, $this->pos);
switch ($code) {
case self::XLS_TYPE_BOF:
$this->readBof();
break;
case self::XLS_TYPE_FILEPASS:
$this->readFilepass();
break;
case self::XLS_TYPE_CODEPAGE:
$this->readCodepage();
break;
case self::XLS_TYPE_DATEMODE:
$this->readDateMode();
break;
case self::XLS_TYPE_FORMAT:
$this->readFormat();
break;
case self::XLS_TYPE_XF:
$this->readXf();
break;
case self::XLS_TYPE_SST:
$this->readSst();
break;
case self::XLS_TYPE_SHEET:
$this->readSheet();
break;
case self::XLS_TYPE_EOF:
$this->readDefault();
break 2;
default:
$this->readDefault();
break;
}
}
// Parse the individual sheets
foreach ($this->sheets as $key => $sheet) {
if ($sheet['sheetType'] != 0x00) {
// 0x00: Worksheet
// 0x02: Chart
// 0x06: Visual Basic module
continue;
}
$sheet['lastColumnLetter'] = '';
$sheet['lastColumnIndex'] = null;
$sheet['totalRows'] = 0;
$sheet['totalColumns'] = 0;
$lastRowIndex = 0;
$this->pos = $sheet['offset'];
while ($this->pos <= $this->dataSize - 4) {
$code = Format::getUInt2d($this->data, $this->pos);
switch ($code) {
case self::XLS_TYPE_RK:
case self::XLS_TYPE_LABELSST:
case self::XLS_TYPE_NUMBER:
case self::XLS_TYPE_FORMULA:
case self::XLS_TYPE_BOOLERR:
case self::XLS_TYPE_LABEL:
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
$rowIndex = Format::getUInt2d($recordData, 0) + 1;
$columnIndex = Format::getUInt2d($recordData, 2);
if ($this->ignoreEmpty) {
if ($lastRowIndex < $rowIndex) {
$sheet['totalRows']++;
}
$lastRowIndex = $rowIndex;
} else {
$sheet['totalRows'] = max($sheet['totalRows'], $rowIndex);
}
$sheet['lastColumnIndex'] = max($columnIndex, $sheet['lastColumnIndex']);
break;
case self::XLS_TYPE_BOF:
$this->readBof();
break;
case self::XLS_TYPE_EOF:
$this->readDefault();
break 2;
default:
$this->readDefault();
break;
}
}
if ($sheet['lastColumnIndex'] !== null) {
$sheet['lastColumnLetter'] = Format::stringFromColumnIndex($sheet['lastColumnIndex']);
} else {
$sheet['lastColumnIndex'] = 0;
}
if ($sheet['lastColumnLetter']) {
$sheet['totalColumns'] = $sheet['lastColumnIndex'] + 1;
}
$this->sheets[$key] = $sheet;
}
$this->pos = 0;
}
return $this->sheets;
}
/**
* Get row data
*
* @param int $rowIndex
* @param int $columnLimit
*
* @throws ParserException
* @return array|bool
*/
public function getRow($rowIndex, $columnLimit = 0) {
$this->parseWorksheetInfo();
// Rewind or change sheet
if ($rowIndex === 0 || $this->pos < $this->sheets[$this->sheetIndex]['offset']) {
$this->pos = $this->sheets[$this->sheetIndex]['offset'];
}
$endPos = $this->dataSize - 4;
if (isset($this->sheets[$this->sheetIndex + 1]['offset'])) {
$endPos = $this->sheets[$this->sheetIndex + 1]['offset'] - 4;
}
if ($this->pos >= $endPos) {
return false;
}
$this->rowIndex = $rowIndex;
$this->columnLimit = $columnLimit;
$this->eor = false;
$this->row = $columnLimit ? array_fill(0, $columnLimit, '') : [];
while ($this->pos <= $endPos) {
// Remember last position
$lastPos = $this->pos;
$code = Format::getUInt2d($this->data, $this->pos);
switch ($code) {
case self::XLS_TYPE_BOF:
$this->readBof();
break;
case self::XLS_TYPE_RK:
$this->readRk();
break;
case self::XLS_TYPE_LABELSST:
$this->readLabelSst();
break;
case self::XLS_TYPE_MULRK:
$this->readMulRk();
break;
case self::XLS_TYPE_NUMBER:
$this->readNumber();
break;
case self::XLS_TYPE_FORMULA:
$this->readFormula();
break;
case self::XLS_TYPE_BOOLERR:
$this->readBoolErr();
break;
case self::XLS_TYPE_MULBLANK:
case self::XLS_TYPE_BLANK:
$this->readBlank();
break;
case self::XLS_TYPE_LABEL:
$this->readLabel();
break;
case self::XLS_TYPE_EOF:
$this->readDefault();
break 2;
default:
$this->readDefault();
break;
}
//End of row
if ($this->eor) {
//Recover current position
$this->pos = $lastPos;
break;
}
}
return $this->row;
}
/**
* Add cell data
*
* @param int $row
* @param int $column
* @param mixed $value
* @param int $xfIndex
* @return bool
*/
private function addCell($row, $column, $value, $xfIndex) {
if ($this->rowIndex != $row) {
$this->eor = true;
return false;
}
if (!$this->columnLimit || $column < $this->columnLimit) {
$xfRecord = $this->xfRecords[$xfIndex];
$this->row[$column] = self::toFormattedString($value, $xfRecord['format']);
}
return true;
}
/**
* Read BOF
*
* @throws ParserException
*/
private function readBof() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 2; size: 2; type of the following data
$substreamType = Format::getUInt2d($recordData, 2);
switch ($substreamType) {
case self::XLS_WORKBOOKGLOBALS:
$version = Format::getUInt2d($recordData, 0);
if (($version != self::XLS_BIFF8) && ($version != self::XLS_BIFF7)) {
throw new ParserException('Cannot read this Excel file. Version is too old.', 1);
}
$this->version = $version;
break;
case self::XLS_WORKSHEET:
// do not use this version information for anything
// it is unreliable (OpenOffice doc, 5.8), use only version information from the global stream
break;
default:
// substream, e.g. chart
// just skip the entire substream
do {
$code = Format::getUInt2d($this->data, $this->pos);
$this->readDefault();
} while ($code != self::XLS_TYPE_EOF && $this->pos < $this->dataSize);
break;
}
}
/**
* SHEET
*
* This record is located in the Workbook Globals Substream and represents a sheet inside the workbook.
* One SHEET record is written for each sheet. It stores the sheet name and a stream offset to the BOF
* record of the respective Sheet Substream within the Workbook Stream.
*/
private function readSheet() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// offset: 0; size: 4; absolute stream position of the BOF record of the sheet
// NOTE: not encrypted
$offset = Format::getInt4d($this->data, $this->pos + 4);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 4; size: 1; sheet state
switch (ord($recordData{4})) {
case 0x00:
$sheetState = self::SHEETSTATE_VISIBLE;
break;
case 0x01:
$sheetState = self::SHEETSTATE_HIDDEN;
break;
case 0x02:
$sheetState = self::SHEETSTATE_VERYHIDDEN;
break;
default:
$sheetState = self::SHEETSTATE_VISIBLE;
break;
}
// offset: 5; size: 1; sheet type
$sheetType = ord($recordData{5});
// offset: 6; size: var; sheet name
$name = '';
if ($this->version == self::XLS_BIFF8) {
$string = self::readUnicodeStringShort(substr($recordData, 6));
$name = $string['value'];
} elseif ($this->version == self::XLS_BIFF7) {
$string = $this->readByteStringShort(substr($recordData, 6));
$name = $string['value'];
}
// ignore hidden sheet
if ($sheetState == self::SHEETSTATE_VISIBLE) {
$this->sheets[] = [
'name' => $name, 'offset' => $offset, 'sheetState' => $sheetState, 'sheetType' => $sheetType
];
}
}
/**
* Reads a general type of BIFF record.
* Does nothing except for moving stream pointer forward to next record.
*/
private function readDefault() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
//$recordData = $this->readRecordData($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
}
/**
* FILEPASS
*
* This record is part of the File Protection Block. It contains information about the read/write password of
* the file. All record contents following this record will be encrypted.
* The decryption functions and objects used from here on in are based on the source of Spreadsheet-ParseExcel:
* http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/
*
* @throws ParserException
*/
private function readFilepass() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
if ($length != 54) {
throw new ParserException('Unexpected file pass record length', 2);
}
$recordData = $this->readRecordData($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
if (!$this->verifyPassword('VelvetSweatshop', substr($recordData, 6, 16), substr($recordData, 22, 16),
substr($recordData, 38, 16), $this->md5Ctxt)) {
throw new ParserException('Decryption password incorrect', 3);
}
$this->encryption = self::MS_BIFF_CRYPTO_RC4;
// Decryption required from the record after next onwards
$this->encryptionStartPos = $this->pos + Format::getUInt2d($this->data, $this->pos + 2);
}
/**
* Read record data from stream, decrypting as required
*
* @param string $data Data stream to read from
* @param int $pos Position to start reading from
* @param int $len Record data length
*
* @throws ParserException
* @return string Record data
*/
private function readRecordData($data, $pos, $len) {
$data = substr($data, $pos, $len);
// File not encrypted, or record before encryption start point
if ($this->encryption == self::MS_BIFF_CRYPTO_NONE || $pos < $this->encryptionStartPos) {
return $data;
}
$recordData = '';
if ($this->encryption == self::MS_BIFF_CRYPTO_RC4) {
$oldBlock = floor($this->rc4Pos / self::REKEY_BLOCK);
$block = floor($pos / self::REKEY_BLOCK);
$endBlock = floor(($pos + $len) / self::REKEY_BLOCK);
// Spin an RC4 decryptor to the right spot. If we have a decryptor sitting
// at a point earlier in the current block, re-use it as we can save some time.
if ($block != $oldBlock || $pos < $this->rc4Pos || !$this->rc4Key) {
$this->rc4Key = $this->makeKey($block, $this->md5Ctxt);
$step = $pos % self::REKEY_BLOCK;
} else {
$step = $pos - $this->rc4Pos;
}
$this->rc4Key->RC4(str_repeat("\0", $step));
// Decrypt record data (re-keying at the end of every block)
while ($block != $endBlock) {
$step = self::REKEY_BLOCK - ($pos % self::REKEY_BLOCK);
$recordData .= $this->rc4Key->RC4(substr($data, 0, $step));
$data = substr($data, $step);
$pos += $step;
$len -= $step;
$block++;
$this->rc4Key = $this->makeKey($block, $this->md5Ctxt);
}
$recordData .= $this->rc4Key->RC4(substr($data, 0, $len));
// Keep track of the position of this decryptor.
// We'll try and re-use it later if we can to speed things up
$this->rc4Pos = $pos + $len;
} elseif ($this->encryption == self::MS_BIFF_CRYPTO_XOR) {
throw new ParserException('XOr encryption not supported', 4);
}
return $recordData;
}
/**
* Make an RC4 decryptor for the given block
*
* @param int $block Block for which to create decrypto
* @param string $valContext MD5 context state
*
* @return RC4
*/
private function makeKey($block, $valContext) {
$pw = str_repeat("\0", 64);
for ($i = 0; $i < 5; $i++) {
$pw[$i] = $valContext[$i];
}
$pw[5] = chr($block & 0xff);
$pw[6] = chr(($block >> 8) & 0xff);
$pw[7] = chr(($block >> 16) & 0xff);
$pw[8] = chr(($block >> 24) & 0xff);
$pw[9] = "\x80";
$pw[56] = "\x48";
return new RC4(md5($pw));
}
/**
* Verify RC4 file password
*
* @var string $password Password to check
* @var string $docid Document id
* @var string $salt_data Salt data
* @var string $hashedsalt_data Hashed salt data
* @var string &$valContext Set to the MD5 context of the value
*
* @return bool Success
*/
private function verifyPassword($password, $docid, $salt_data, $hashedsalt_data, &$valContext) {
$pw = str_repeat("\0", 64);
for ($i = 0; $i < strlen($password); $i++) {
$o = ord(substr($password, $i, 1));
$pw[2 * $i] = chr($o & 0xff);
$pw[2 * $i + 1] = chr(($o >> 8) & 0xff);
}
$pw[2 * $i] = chr(0x80);
$pw[56] = chr(($i << 4) & 0xff);
$mdContext1 = md5($pw);
$offset = 0;
$keyOffset = 0;
$toCopy = 5;
while ($offset != 16) {
if ((64 - $offset) < 5) {
$toCopy = 64 - $offset;
}
for ($i = 0; $i <= $toCopy; $i++) {
$pw[$offset + $i] = $mdContext1[$keyOffset + $i];
}
$offset += $toCopy;
if ($offset == 64) {
$keyOffset = $toCopy;
$toCopy = 5 - $toCopy;
$offset = 0;
continue;
}
$keyOffset = 0;
$toCopy = 5;
for ($i = 0; $i < 16; $i++) {
$pw[$offset + $i] = $docid[$i];
}
$offset += 16;
}
$pw[16] = "\x80";
for ($i = 0; $i < 47; $i++) {
$pw[17 + $i] = "\0";
}
$pw[56] = "\x80";
$pw[57] = "\x0a";
$valContext = md5($pw);
$key = $this->makeKey(0, $valContext);
$salt = $key->RC4($salt_data);
$hashedsalt = $key->RC4($hashedsalt_data);
$salt .= "\x80" . str_repeat("\0", 47);
$salt[56] = "\x80";
$mdContext2 = md5($salt);
return $mdContext2 == $hashedsalt;
}
/**
* CODEPAGE
*
* This record stores the text encoding used to write byte strings, stored as MS Windows code page identifier.
*
* @throws ParserException
*/
private function readCodepage() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; code page identifier
$codePage = Format::getUInt2d($recordData, 0);
$this->codePage = self::NumberToName($codePage);
}
/**
* DATEMODE
* This record specifies the base date for displaying date values. All dates are stored as count of days
* past this base date. In BIFF2-BIFF4 this record is part of the Calculation Settings Block. In BIFF5-BIFF8
* it is stored in the Workbook Globals Substream.
*/
private function readDateMode() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; 0 = base 1900, 1 = base 1904
self::$excelBaseDate = Format::CALENDAR_WINDOWS_1900;
if (ord($recordData{0}) == 1) {
self::$excelBaseDate = Format::CALENDAR_MAC_1904;
}
}
/**
* FORMAT
*
* This record contains information about a number format. All FORMAT records occur together in a sequential list.
* In BIFF2-BIFF4 other records referencing a FORMAT record contain a zero-based index into this list. From BIFF5
* on the FORMAT record contains the index itself that will be used by other records.
*/
private function readFormat() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
$indexCode = Format::getUInt2d($recordData, 0);
if ($this->version == self::XLS_BIFF8) {
$string = self::readUnicodeStringLong(substr($recordData, 2));
} else {
// BIFF7
$string = $this->readByteStringShort(substr($recordData, 2));
}
$formatString = $string['value'];
$this->formats[$indexCode] = $formatString;
}
/**
* XF - Extended Format
*
* This record contains formatting information for cells, rows, columns or styles.
* According to http://support.microsoft.com/kb/147732 there are always at least 15 cell style XF and 1 cell XF.
* Inspection of Excel files generated by MS Office Excel shows that XF records 0-14 are cell style XF and XF
* record 15 is a cell XF. We only read the first cell style XF and skip the remaining cell style XF records
* We read all cell XF records.
*/
private function readXf() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 2; size: 2; Index to FORMAT record
$numberFormatIndex = Format::getUInt2d($recordData, 2);
if (isset($this->formats[$numberFormatIndex])) {
// then we have user-defined format code
$numberFormat = $this->formats[$numberFormatIndex];
} elseif (isset(Format::$buildInFormats[$numberFormatIndex])) {
// then we have built-in format code
$numberFormat = Format::$buildInFormats[$numberFormatIndex];
} else {
// we set the general format code
$numberFormat = Format::FORMAT_GENERAL;
}
$this->xfRecords[] = ['index' => $numberFormatIndex, 'format' => $numberFormat];
}
/**
* SST - Shared String Table
*
* This record contains a list of all strings used anywhere in the workbook. Each string occurs only once.
* The workbook uses indexes into the list to reference the strings.
**/
private function readSst() {
// offset within (spliced) record data
$pos = 0;
// get spliced record data
$splicedRecordData = $this->getSplicedRecordData();
$recordData = $splicedRecordData['recordData'];
$spliceOffsets = $splicedRecordData['spliceOffsets'];
// offset: 0; size: 4; total number of strings in the workbook
$pos += 4;
// offset: 4; size: 4; number of following strings ($nm)
$nm = Format::getInt4d($recordData, 4);
$pos += 4;
// loop through the Unicode strings (16-bit length)
for ($i = 0; $i < $nm; ++$i) {
if (!isset($recordData[$pos + 2])) {
break;
}
// number of characters in the Unicode string
$numChars = Format::getUInt2d($recordData, $pos);
$pos += 2;
// option flags
$optionFlags = ord($recordData[$pos]);
++$pos;
// bit: 0; mask: 0x01; 0 = compressed; 1 = uncompressed
$isCompressed = (($optionFlags & 0x01) == 0) ;
// bit: 2; mask: 0x02; 0 = ordinary; 1 = Asian phonetic
$hasAsian = (($optionFlags & 0x04) != 0);
// bit: 3; mask: 0x03; 0 = ordinary; 1 = Rich-Text
$formattingRuns = 0;
$hasRichText = (($optionFlags & 0x08) != 0);
if ($hasRichText && isset($recordData[$pos])) {
// number of Rich-Text formatting runs
$formattingRuns = Format::getUInt2d($recordData, $pos);
$pos += 2;
}
$extendedRunLength = 0;
if ($hasAsian && isset($recordData[$pos])) {
// size of Asian phonetic setting
$extendedRunLength = Format::getInt4d($recordData, $pos);
$pos += 4;
}
// expected byte length of character array if not split
$len = ($isCompressed) ? $numChars : $numChars * 2;
// look up limit position
$limitPos = 0;
foreach ($spliceOffsets as $spliceOffset) {
// it can happen that the string is empty, therefore we need
// <= and not just <
if ($pos <= $spliceOffset) {
$limitPos = $spliceOffset;
break;
}
}
if ($pos + $len <= $limitPos) {
// character array is not split between records
$retStr = substr($recordData, $pos, $len);
$pos += $len;
} else {
// character array is split between records
// first part of character array
$retStr = substr($recordData, $pos, $limitPos - $pos);
$bytesRead = $limitPos - $pos;
// remaining characters in Unicode string
$charsLeft = $numChars - (($isCompressed) ? $bytesRead : ($bytesRead / 2));
$pos = $limitPos;
// keep reading the characters
while ($charsLeft > 0) {
// look up next limit position, in case the string span more than one continue record
foreach ($spliceOffsets as $spliceOffset) {
if ($pos < $spliceOffset) {
$limitPos = $spliceOffset;
break;
}
}
if (!isset($recordData[$pos])) {
break;
}
// repeated option flags
// OpenOffice.org documentation 5.21
$option = ord($recordData[$pos]);
++$pos;
if ($isCompressed && ($option == 0)) {
// 1st fragment compressed
// this fragment compressed
$len = min($charsLeft, $limitPos - $pos);
$retStr .= substr($recordData, $pos, $len);
$charsLeft -= $len;
$isCompressed = true;
} elseif (!$isCompressed && ($option != 0)) {
// 1st fragment uncompressed
// this fragment uncompressed
$len = min($charsLeft * 2, $limitPos - $pos);
$retStr .= substr($recordData, $pos, $len);
$charsLeft -= $len / 2;
$isCompressed = false;
} elseif (!$isCompressed && ($option == 0)) {
// 1st fragment uncompressed
// this fragment compressed
$len = min($charsLeft, $limitPos - $pos);
for ($j = 0; $j < $len; ++$j) {
if (!isset($recordData[$pos + $j])) {
break;
}
$retStr .= $recordData[$pos + $j] . chr(0);
}
$charsLeft -= $len;
$isCompressed = false;
} else {
// 1st fragment compressed
// this fragment uncompressed
$newStr = '';
$jMax = strlen($retStr);
for ($j = 0; $j < $jMax; ++$j) {
$newStr .= $retStr[$j] . chr(0);
}
$retStr = $newStr;
$len = min($charsLeft * 2, $limitPos - $pos);
$retStr .= substr($recordData, $pos, $len);
$charsLeft -= $len / 2;
$isCompressed = false;
}
$pos += $len;
}
}
// convert to UTF-8
$retStr = self::encodeUTF16($retStr, $isCompressed);
// read additional Rich-Text information, if any
// $fmtRuns = [];
if ($hasRichText) {
// list of formatting runs
/*for ($j = 0; $j < $formattingRuns; ++$j) {
// first formatted character; zero-based
$charPos = Format::getUInt2d($recordData, $pos + $j * 4);
// index to font record
$fontIndex = Format::getUInt2d($recordData, $pos + 2 + $j * 4);
$fmtRuns[] = ['charPos' => $charPos, 'fontIndex' => $fontIndex];
}*/
$pos += 4 * $formattingRuns;
}
// read additional Asian phonetics information, if any
if ($hasAsian) {
// For Asian phonetic settings, we skip the extended string data
$pos += $extendedRunLength;
}
// store the shared sting
$this->sst[] = ['value' => $retStr];
}
}
/**
* Read RK record
*
* This record represents a cell that contains an RK value (encoded integer or floating-point value). If a
* floating-point value cannot be encoded to an RK value, a NUMBER record will be written. This record replaces
* the record INTEGER written in BIFF2.
*/
private function readRk() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; index to row
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size: 2; index to column
$column = Format::getUInt2d($recordData, 2);
// offset: 4; size: 2; index to XF record
$xfIndex = Format::getUInt2d($recordData, 4);
// offset: 6; size: 4; RK value
$rkNum = Format::getInt4d($recordData, 6);
$numValue = self::getIEEE754($rkNum);
// add cell
$this->addCell($row, $column, $numValue, $xfIndex);
}
/**
* Read LABELSST record
*
* This record represents a cell that contains a string. It replaces the LABEL record and RSTRING record used in
* BIFF2-BIFF5.
*/
private function readLabelSst() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
$this->pos += 4 + $length;
$xfIndex = Format::getUInt2d($recordData, 4);
$row = Format::getUInt2d($recordData, 0);
$column = Format::getUInt2d($recordData, 2);
// offset: 6; size: 4; index to SST record
$index = Format::getInt4d($recordData, 6);
$this->addCell($row, $column, $this->sst[$index]['value'], $xfIndex);
}
/**
* Read MULRK record
*
* This record represents a cell range containing RK value cells. All cells are located in the same row.
*/
private function readMulRk() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; index to row
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size: 2; index to first column
$colFirst = Format::getUInt2d($recordData, 2);
// offset: var; size: 2; index to last column
$colLast = Format::getUInt2d($recordData, $length - 2);
$columns = $colLast - $colFirst + 1;
// offset within record data
$offset = 4;
for ($i = 0; $i < $columns; ++$i) {
// offset: var; size: 2; index to XF record
$xfIndex = Format::getUInt2d($recordData, $offset);
// offset: var; size: 4; RK value
$numValue = self::getIEEE754(Format::getInt4d($recordData, $offset + 2));
$this->addCell($row, $colFirst + $i, $numValue, $xfIndex);
$offset += 6;
}
}
/**
* Read NUMBER record
*
* This record represents a cell that contains a floating-point value.
*/
private function readNumber() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; index to row
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size 2; index to column
$column = Format::getUInt2d($recordData, 2);
// offset 4; size: 2; index to XF record
$xfIndex = Format::getUInt2d($recordData, 4);
$numValue = self::extractNumber(substr($recordData, 6, 8));
$this->addCell($row, $column, $numValue, $xfIndex);
}
/**
* Read FORMULA record + perhaps a following STRING record if formula result is a string
* This record contains the token array and the result of a formula cell.
*/
private function readFormula() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; row index
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size: 2; col index
$column = Format::getUInt2d($recordData, 2);
// offset 4; size: 2; index to XF record
$xfIndex = Format::getUInt2d($recordData, 4);
// offset: 6; size: 8; result of the formula
if ((ord($recordData{6}) == 0) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
// read STRING record
$value = $this->readString();
} elseif ((ord($recordData{6}) == 1) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
// Boolean formula. Result is in +2; 0=false, 1=true
$value = (bool) ord($recordData{8});
} elseif ((ord($recordData{6}) == 2) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
// Error formula. Error code is in +2
$value = self::mapErrorCode(ord($recordData{8}));
} elseif ((ord($recordData{6}) == 3) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
// Formula result is a null string
$value = '';
} else {
// forumla result is a number, first 14 bytes like _NUMBER record
$value = self::extractNumber(substr($recordData, 6, 8));
}
$this->addCell($row, $column, $value, $xfIndex);
}
/**
* Read a STRING record from current stream position and advance the stream pointer to next record.
* This record is used for storing result from FORMULA record when it is a string, and it occurs
* directly after the FORMULA record
*
* @return string The string contents as UTF-8
*/
private function readString() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
if ($this->version == self::XLS_BIFF8) {
$string = self::readUnicodeStringLong($recordData);
$value = $string['value'];
} else {
$string = $this->readByteStringLong($recordData);
$value = $string['value'];
}
return $value;
}
/**
* Read BOOLERR record
*
* This record represents a Boolean value or error value cell.
*/
private function readBoolErr() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; row index
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size: 2; column index
$column = Format::getUInt2d($recordData, 2);
// offset: 4; size: 2; index to XF record
$xfIndex = Format::getUInt2d($recordData, 4);
// offset: 6; size: 1; the boolean value or error value
$boolError = ord($recordData{6});
// offset: 7; size: 1; 0=boolean; 1=error
$isError = ord($recordData{7});
switch ($isError) {
case 0: // boolean
$value = (bool)$boolError;
// add cell value
$this->addCell($row, $column, $value, $xfIndex);
break;
case 1: // error type
$value = self::mapErrorCode($boolError);
// add cell value
$this->addCell($row, $column, $value, $xfIndex);
break;
}
}
/**
* Read BLANK record
*/
private function readBlank() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; row index
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size: 2; col index
$column = Format::getUInt2d($recordData, 2);
// offset: 4; size: 2; XF index
$xfIndex = Format::getUInt2d($recordData, 4);
$this->addCell($row, $column, '', $xfIndex);
}
/**
* Read LABEL record
*
* This record represents a cell that contains a string. In BIFF8 it is usually replaced by the LABELSST record.
* Excel still uses this record, if it copies unformatted text cells to the clipboard.
*/
private function readLabel() {
$length = Format::getUInt2d($this->data, $this->pos + 2);
$recordData = substr($this->data, $this->pos + 4, $length);
// move stream pointer to next record
$this->pos += 4 + $length;
// offset: 0; size: 2; index to row
$row = Format::getUInt2d($recordData, 0);
// offset: 2; size: 2; index to column
$column = Format::getUInt2d($recordData, 2);
// offset: 4; size: 2; XF index
$xfIndex = Format::getUInt2d($recordData, 4);
// add cell value
if ($this->version == self::XLS_BIFF8) {
$string = self::readUnicodeStringLong(substr($recordData, 6));
$value = $string['value'];
} else {
$string = $this->readByteStringLong(substr($recordData, 6));
$value = $string['value'];
}
$this->addCell($row, $column, $value, $xfIndex);
}
/**
* Map error code, e.g. '#N/A'
*
* @param int $code
* @return string
*/
private static function mapErrorCode($code) {
if (isset(self::$errorCode[$code])) {
return self::$errorCode[$code];
}
return false;
}
/**
* Convert a value in a pre-defined format to a PHP string
*
* @param mixed $value Value to format
* @param string $format Format code
* @return string
*/
private static function toFormattedString($value = '0', $format = Format::FORMAT_GENERAL) {
// For now we do not treat strings although section 4 of a format code affects strings
if (!is_numeric($value)) {
return $value;
}
// For 'General' format code, we just pass the value although this is not entirely the way Excel does it,
// it seems to round numbers to a total of 10 digits.
if (($format === Format::FORMAT_GENERAL) || ($format === Format::FORMAT_TEXT)) {
return $value;
}
// Convert any other escaped characters to quoted strings, e.g. (\T to "T")
$format = preg_replace('/(\\\(.))(?=(?:[^"]|"[^"]*")*$)/u', '"${2}"', $format);
// Get the sections, there can be up to four sections, separated with a semi-colon (but only if not a quoted literal)
$sections = preg_split('/(;)(?=(?:[^"]|"[^"]*")*$)/u', $format);
// Extract the relevant section depending on whether number is positive, negative, or zero?
// Text not supported yet.
// Here is how the sections apply to various values in Excel:
// 1 section: [POSITIVE/NEGATIVE/ZERO/TEXT]
// 2 sections: [POSITIVE/ZERO/TEXT] [NEGATIVE]
// 3 sections: [POSITIVE/TEXT] [NEGATIVE] [ZERO]
// 4 sections: [POSITIVE] [NEGATIVE] [ZERO] [TEXT]
switch (count($sections)) {
case 1:
$format = $sections[0];
break;
case 2:
$format = ($value >= 0) ? $sections[0] : $sections[1];
$value = abs($value); // Use the absolute value
break;
case 3:
$format = ($value > 0) ? $sections[0] : ( ($value < 0) ? $sections[1] : $sections[2]);
$value = abs($value); // Use the absolute value
break;
case 4:
$format = ($value > 0) ? $sections[0] : ( ($value < 0) ? $sections[1] : $sections[2]);
$value = abs($value); // Use the absolute value
break;
default:
// something is wrong, just use first section
$format = $sections[0];
break;
}
// In Excel formats, "_" is used to add spacing,
// The following character indicates the size of the spacing, which we can't do in HTML, so we just use a standard space
$format = preg_replace('/_./', ' ', $format);
// Save format with color information for later use below
//$formatColor = $format;
// Strip color information
$colorRegex = '/^\\[[a-zA-Z]+\\]/';
$format = preg_replace($colorRegex, '', $format);
// Let's begin inspecting the format and converting the value to a formatted string
// Check for date/time characters (not inside quotes)
if (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy](?=(?:[^"]|"[^"]*")*$)/miu', $format, $matches)) {
// datetime format
self::formatAsDate($value, $format);
} elseif (preg_match('/%$/', $format)) {
// % number format
self::formatAsPercentage($value, $format);
} else {
if ($format === Format::FORMAT_CURRENCY_EUR_SIMPLE) {
$value = 'EUR ' . sprintf('%1.2f', $value);
} else {
// Some non-number strings are quoted, so we'll get rid of the quotes, likewise any positional * symbols
$format = str_replace(['"', '*'], '', $format);
// Find out if we need thousands separator
// This is indicated by a comma enclosed by a digit placeholder:
// #,# or 0,0
$useThousands = preg_match('/(#,#|0,0)/', $format);
if ($useThousands) {
$format = preg_replace('/0,0/', '00', $format);
$format = preg_replace('/#,#/', '##', $format);
}
// Scale thousands, millions,...
// This is indicated by a number of commas after a digit placeholder:
// #, or 0.0,,
$scale = 1; // same as no scale
$matches = [];
if (preg_match('/(#|0)(,+)/', $format, $matches)) {
$scale = pow(1000, strlen($matches[2]));
// strip the commas
$format = preg_replace('/0,+/', '0', $format);
$format = preg_replace('/#,+/', '#', $format);
}
if (preg_match('/#?.*\?\/\?/', $format, $m)) {
//echo 'Format mask is fractional '.$format.'
';
if ($value != (int)$value) {
self::formatAsFraction($value, $format);
}
} else {
// Handle the number itself
// scale number
$value = $value / $scale;
// Strip #
$format = preg_replace('/\\#/', '0', $format);
$n = "/\[[^\]]+\]/";
$m = preg_replace($n, '', $format);
$numberRegex = "/(0+)(\.?)(0*)/";
if (preg_match($numberRegex, $m, $matches)) {
$left = $matches[1];
$dec = $matches[2];
$right = $matches[3];
// minimun width of formatted number (including dot)
$minWidth = strlen($left) + strlen($dec) + strlen($right);
if ($useThousands) {
$value = number_format(
$value,
strlen($right),
self::getDecimalSeparator(),
self::getThousandsSeparator()
);
$value = preg_replace($numberRegex, $value, $format);
} else {
if (preg_match('/[0#]E[+-]0/i', $format)) {
//Scientific format
$value = sprintf('%5.2E', $value);
} elseif (preg_match('/0([^\d\.]+)0/', $format)) {
$value = self::complexNumberFormatMask($value, $format);
} else {
$sprintfPattern = "%0$minWidth." . strlen($right) . "f";
$value = sprintf($sprintfPattern, $value);
$value = preg_replace($numberRegex, $value, $format);
}
}
}
}
if (preg_match('/\[\$(.*)\]/u', $format, $m)) {
// Currency or Accounting
//$currencyFormat = $m[0];
$currencyCode = $m[1];
list($currencyCode) = explode('-', $currencyCode);
if ($currencyCode == '') {
$currencyCode = self::getCurrencyCode();
}
$value = preg_replace('/\[\$([^\]]*)\]/u', $currencyCode, $value);
}
}
}
return $value;
}
/**
* Reads a record from current position in data stream and continues reading data as long as CONTINUE records
* are found. Splices the record data pieces and returns the combined string as if record data is in one piece.
* Moves to next current position in data stream to start of next record different from a CONtINUE record
*
* @return array
*/
private function getSplicedRecordData() {
$i = 0;
$data = '';
$spliceOffsets = [0];
do {
++$i;
// offset: 0; size: 2; identifier
//$identifier = Cell::getInt2d($this->data, $this->pos);
// offset: 2; size: 2; length
$length = Format::getUInt2d($this->data, $this->pos + 2);
$data .= substr($this->data, $this->pos + 4, $length);
$spliceOffsets[$i] = $spliceOffsets[$i - 1] + $length;
$this->pos += 4 + $length;
$nextIdentifier = Format::getUInt2d($this->data, $this->pos);
} while ($nextIdentifier == self::XLS_TYPE_CONTINUE);
return ['recordData' => $data, 'spliceOffsets' => $spliceOffsets];
}
/**
* Get the decimal separator. If it has not yet been set explicitly, try to obtain number formatting
* information from locale.
*
* @return string
*/
private static function getDecimalSeparator() {
if (!isset(self::$decimalSeparator)) {
$localeconv = localeconv();
self::$decimalSeparator = ($localeconv['decimal_point'] != '') ? $localeconv['decimal_point']
: $localeconv['mon_decimal_point'];
if (self::$decimalSeparator == '') {
// Default to .
self::$decimalSeparator = '.';
}
}
return self::$decimalSeparator;
}
/**
* Get the thousands separator. If it has not yet been set explicitly, try to obtain number formatting
* information from locale.
*
* @return string
*/
private static function getThousandsSeparator() {
if (!isset(self::$thousandsSeparator)) {
$localeconv = localeconv();
self::$thousandsSeparator = ($localeconv['thousands_sep'] != '') ? $localeconv['thousands_sep']
: $localeconv['mon_thousands_sep'];
if (self::$thousandsSeparator == '') {
// Default to .
self::$thousandsSeparator = ',';
}
}
return self::$thousandsSeparator;
}
/**
* Get the currency code. If it has not yet been set explicitly, try to obtain the symbol information from locale.
*
* @return string
*/
private static function getCurrencyCode() {
if (!isset(self::$currencyCode)) {
$localeconv = localeconv();
self::$currencyCode = ($localeconv['currency_symbol'] != '') ? $localeconv['currency_symbol']
: $localeconv['int_curr_symbol'];
if (self::$currencyCode == '') {
// Default to $
self::$currencyCode = '$';
}
}
return self::$currencyCode;
}
private static function complexNumberFormatMask($number, $mask) {
$sign = ($number < 0.0);
$number = abs($number);
if (strpos($mask, '.') !== false) {
$numbers = explode('.', $number . '.0');
$masks = explode('.', $mask . '.0');
$result1 = self::complexNumberFormatMask($numbers[0], $masks[0]);
$result2 = strrev(self::complexNumberFormatMask(strrev($numbers[1]), strrev($masks[1])));
return (($sign) ? '-' : '') . $result1 . '.' . $result2;
}
$r = preg_match_all('/0+/', $mask, $result, PREG_OFFSET_CAPTURE);
if ($r > 1) {
$result = array_reverse($result[0]);
$offset = 0;
foreach ($result as $block) {
$divisor = 1 . $block[0];
$size = strlen($block[0]);
$offset = $block[1];
$blockValue = sprintf('%0' . $size . 'd', fmod($number, $divisor));
$number = floor($number / $divisor);
$mask = substr_replace($mask, $blockValue, $offset, $size);
}
if ($number > 0) {
$mask = substr_replace($mask, $number, $offset, 0);
}
$result = $mask;
} else {
$result = $number;
}
return (($sign) ? '-' : '') . $result;
}
/**
* Convert Microsoft Code Page Identifier to Code Page Name which iconv and mbstring understands
*
* @param int $codePage Microsoft Code Page Indentifier
*
* @throws ParserException
* @return string Code Page Name
*/
private static function NumberToName($codePage = 1252) {
switch ($codePage) {
case 367:
return 'ASCII'; //ASCII
case 437:
return 'CP437'; //OEM US
case 720:
throw new ParserException('Code page 720 not supported.', 5); //OEM Arabic
case 737:
return 'CP737'; //OEM Greek
case 775:
return 'CP775'; //OEM Baltic
case 850:
return 'CP850'; //OEM Latin I
case 852:
return 'CP852'; //OEM Latin II (Central European)
case 855:
return 'CP855'; //OEM Cyrillic
case 857:
return 'CP857'; //OEM Turkish
case 858:
return 'CP858'; //OEM Multilingual Latin I with Euro
case 860:
return 'CP860'; //OEM Portugese
case 861:
return 'CP861'; //OEM Icelandic
case 862:
return 'CP862'; //OEM Hebrew
case 863:
return 'CP863'; //OEM Canadian (French)
case 864:
return 'CP864'; //OEM Arabic
case 865:
return 'CP865'; //OEM Nordic
case 866:
return 'CP866'; //OEM Cyrillic (Russian)
case 869:
return 'CP869'; //OEM Greek (Modern)
case 874:
return 'CP874'; //ANSI Thai
case 932:
return 'CP932'; //ANSI Japanese Shift-JIS
case 936:
return 'CP936'; //ANSI Chinese Simplified GBK
case 949:
return 'CP949'; //ANSI Korean (Wansung)
case 950:
return 'CP950'; //ANSI Chinese Traditional BIG5
case 1200:
return 'UTF-16LE'; //UTF-16 (BIFF8)
case 1250:
return 'CP1250'; //ANSI Latin II (Central European)
case 1251:
return 'CP1251'; //ANSI Cyrillic
case 0: //CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
case 1252:
return 'CP1252'; //ANSI Latin I (BIFF4-BIFF7)
case 1253:
return 'CP1253'; //ANSI Greek
case 1254:
return 'CP1254'; //ANSI Turkish
case 1255:
return 'CP1255'; //ANSI Hebrew
case 1256:
return 'CP1256'; //ANSI Arabic
case 1257:
return 'CP1257'; //ANSI Baltic
case 1258:
return 'CP1258'; //ANSI Vietnamese
case 1361:
return 'CP1361'; //ANSI Korean (Johab)
case 10000:
return 'MAC'; //Apple Roman
case 10001:
return 'CP932'; //Macintosh Japanese
case 10002:
return 'CP950'; //Macintosh Chinese Traditional
case 10003:
return 'CP1361'; //Macintosh Korean
case 10004:
return 'MACARABIC'; // Apple Arabic
case 10005:
return 'MACHEBREW'; //Apple Hebrew
case 10006:
return 'MACGREEK'; //Macintosh Greek
case 10007:
return 'MACCYRILLIC'; //Macintosh Cyrillic
case 10008:
return 'CP936'; //Macintosh - Simplified Chinese (GB 2312)
case 10010:
return 'MACROMANIA'; //Macintosh Romania
case 10017:
return 'MACUKRAINE'; //Macintosh Ukraine
case 10021:
return 'MACTHAI'; //Macintosh Thai
case 10029:
return 'MACCENTRALEUROPE'; //Macintosh Central Europe
case 10079:
return 'MACICELAND'; //Macintosh Icelandic
case 10081:
return 'MACTURKISH'; //Macintosh Turkish
case 10082:
return 'MACCROATIAN'; //Macintosh Croatian
case 21010:
return 'UTF-16LE'; //UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously
// use Codepage 21010 for UTF-16LE
case 32768:
return 'MAC'; //Apple Roman
case 32769:
throw new ParserException('Code page 32769 not supported.', 6); //ANSI Latin I (BIFF2-BIFF3)
case 65000:
return 'UTF-7'; //Unicode (UTF-7)
case 65001:
return 'UTF-8'; //Unicode (UTF-8)
}
throw new ParserException("Unknown codepage: $codePage", 7);
}
/**
* Read byte string (8-bit string length). OpenOffice documentation: 2.5.2
*
* @param string $subData
*
* @return array
*/
private function readByteStringShort($subData) {
// offset: 0; size: 1; length of the string (character count)
$ln = ord($subData[0]);
// offset: 1: size: var; character array (8-bit characters)
$value = $this->decodeCodepage(substr($subData, 1, $ln));
// size in bytes of data structure
return ['value' => $value, 'size' => 1 + $ln];
}
/**
* Read byte string (16-bit string length). OpenOffice documentation: 2.5.2
*
* @param string $subData
* @return array
*/
private function readByteStringLong($subData) {
// offset: 0; size: 2; length of the string (character count)
$ln = Format::getUInt2d($subData, 0);
// offset: 2: size: var; character array (8-bit characters)
$value = $this->decodeCodepage(substr($subData, 2));
// size in bytes of data structure
return ['value' => $value, 'size' => 2 + $ln];
}
private static function formatAsDate(&$value, &$format) {
// strip off first part containing e.g. [$-F800] or [$USD-409]
// general syntax: [$-]
// language info is in hexadecimal
$format = preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format);
// OpenOffice.org uses upper-case number formats, e.g. 'YYYY', convert to lower-case;
// but we don't want to change any quoted strings
$format = preg_replace_callback('/(?:^|")([^"]*)(?:$|")/', ['self', 'setLowercaseCallback'], $format);
// Only process the non-quoted blocks for date format characters
$blocks = explode('"', $format);
foreach($blocks as $key => &$block) {
if ($key % 2 == 0) {
$block = strtr($block, Format::$dateFormatReplacements);
if (strpos($block, 'A') === false) {
// 24-hour time format
$block = strtr($block, Format::$dateFormatReplacements24);
} else {
// 12-hour time format
$block = strtr($block, Format::$dateFormatReplacements12);
}
}
}
$format = implode('"', $blocks);
// escape any quoted characters so that DateTime format() will render them correctly
$format = preg_replace_callback('/"(.*)"/U', ['self', 'escapeQuotesCallback'], $format);
$dateObj = self::ExcelToPHPObject($value);
$value = $dateObj->format($format);
}
private static function setLowercaseCallback($matches) {
return mb_strtolower($matches[0]);
}
private static function escapeQuotesCallback($matches) {
return '\\' . implode('\\', str_split($matches[1]));
}
/**
* Convert a date from Excel to a PHP Date/Time object
*
* @param int $dateValue Excel date/time value
*
* @return \DateTime PHP date/time object
*/
private static function ExcelToPHPObject($dateValue = 0) {
$dateTime = self::ExcelToPHP($dateValue);
$days = floor($dateTime / 86400);
$time = round((($dateTime / 86400) - $days) * 86400);
$hours = round($time / 3600);
$minutes = round($time / 60) - ($hours * 60);
$seconds = round($time) - ($hours * 3600) - ($minutes * 60);
$dateObj = new \DateTime("1-Jan-1970+$days days");
$dateObj->setTime($hours, $minutes, $seconds);
return $dateObj;
}
/**
* Convert a date from Excel to PHP
*
* @param int $dateValue Excel date/time value
*
* @return int PHP serialized date/time
*/
private static function ExcelToPHP($dateValue = 0) {
if (self::$excelBaseDate == Format::CALENDAR_WINDOWS_1900) {
$excelBaseDate = 25569;
//Adjust for the spurious 29-Feb-1900 (Day 60)
if ($dateValue < 60) {
--$excelBaseDate;
}
} else {
$excelBaseDate = 24107;
}
// Perform conversion
if ($dateValue >= 1) {
$utcDays = $dateValue - $excelBaseDate;
$returnValue = round($utcDays * 86400);
if (($returnValue <= PHP_INT_MAX) && ($returnValue >= -PHP_INT_MAX)) {
$returnValue = (integer) $returnValue;
}
} else {
$hours = round($dateValue * 24);
$mins = round($dateValue * 1440) - round($hours * 60);
$secs = round($dateValue * 86400) - round($hours * 3600) - round($mins * 60);
$returnValue = (integer) gmmktime($hours, $mins, $secs);
}
return $returnValue;
}
private static function formatAsPercentage(&$value, &$format) {
if ($format === Format::FORMAT_PERCENTAGE) {
$value = round((100 * $value), 0) . '%';
} else {
if (preg_match('/\.[#0]+/i', $format, $m)) {
$s = substr($m[0], 0, 1) . (strlen($m[0]) - 1);
$format = str_replace($m[0], $s, $format);
}
if (preg_match('/^[#0]+/', $format, $m)) {
$format = str_replace($m[0], strlen($m[0]), $format);
}
$format = '%' . str_replace('%', 'f%%', $format);
$value = sprintf($format, 100 * $value);
}
}
private static function formatAsFraction(&$value, &$format) {
$sign = ($value < 0) ? '-' : '';
$integerPart = floor(abs($value));
$decimalPart = trim(fmod(abs($value), 1), '0.');
$decimalLength = strlen($decimalPart);
$decimalDivisor = pow(10, $decimalLength);
$GCD = self::GCD([$decimalPart, $decimalDivisor]);
$adjustedDecimalPart = $decimalPart/$GCD;
$adjustedDecimalDivisor = $decimalDivisor/$GCD;
if ((strpos($format, '0') !== false) || (strpos($format, '#') !== false) || (substr($format, 0, 3) == '? ?')) {
if ($integerPart == 0) {
$integerPart = '';
}
$value = "$sign$integerPart $adjustedDecimalPart/$adjustedDecimalDivisor";
} else {
$adjustedDecimalPart += $integerPart * $adjustedDecimalDivisor;
$value = "$sign$adjustedDecimalPart/$adjustedDecimalDivisor";
}
}
/**
* GCD
*
* Returns the greatest common divisor of a series of numbers. The greatest common divisor is the largest
* integer that divides both number1 and number2 without a remainder.
* Excel Function:
* GCD(number1[,number2[, ...]])
*
* @param array $params
*
* @return integer Greatest Common Divisor
*/
private static function GCD($params) {
$returnValue = 1;
$allValuesFactors = [];
// Loop through arguments
$flattenArr = self::flattenArray($params);
foreach ($flattenArr as $value) {
if (!is_numeric($value)) {
return '#VALUE!';
} elseif ($value == 0) {
continue;
} elseif ($value < 0) {
return '#NULL!';
}
$factors = self::factors($value);
$countedFactors = array_count_values($factors);
$allValuesFactors[] = $countedFactors;
}
$allValuesCount = count($allValuesFactors);
if ($allValuesCount == 0) {
return 0;
}
$mergedArray = $allValuesFactors[0];
for ($i=1; $i < $allValuesCount; ++$i) {
$mergedArray = array_intersect_key($mergedArray, $allValuesFactors[$i]);
}
$mergedArrayValues = count($mergedArray);
if ($mergedArrayValues == 0) {
return $returnValue;
} elseif ($mergedArrayValues > 1) {
foreach ($mergedArray as $mergedKey => $mergedValue) {
foreach ($allValuesFactors as $highestPowerTest) {
foreach ($highestPowerTest as $testKey => $testValue) {
if (($testKey == $mergedKey) && ($testValue < $mergedValue)) {
$mergedArray[$mergedKey] = $testValue;
$mergedValue = $testValue;
}
}
}
}
$returnValue = 1;
foreach ($mergedArray as $key => $value) {
$returnValue *= pow($key, $value);
}
return $returnValue;
} else {
$keys = array_keys($mergedArray);
$key = $keys[0];
$value = $mergedArray[$key];
foreach ($allValuesFactors as $testValue) {
foreach ($testValue as $mergedKey => $mergedValue) {
if (($mergedKey == $key) && ($mergedValue < $value)) {
$value = $mergedValue;
}
}
}
return pow($key, $value);
}
}
/**
* Convert a multi-dimensional array to a simple 1-dimensional array
*
* @param array $array Array to be flattened
*
* @return array Flattened array
*/
private static function flattenArray($array) {
if (!is_array($array)) {
return (array) $array;
}
$arrayValues = [];
foreach ($array as $value) {
if (is_array($value)) {
foreach ($value as $val) {
if (is_array($val)) {
foreach ($val as $v) {
$arrayValues[] = $v;
}
} else {
$arrayValues[] = $val;
}
}
} else {
$arrayValues[] = $value;
}
}
return $arrayValues;
}
/**
* Return an array of the factors of the input value
*
* @param int $value
*
* @return array
*/
private static function factors($value) {
$startVal = floor(sqrt($value));
$factorArray = [];
for ($i = $startVal; $i > 1; --$i) {
if (($value % $i) == 0) {
$factorArray = array_merge($factorArray, self::factors($value / $i));
$factorArray = array_merge($factorArray, self::factors($i));
if ($i <= sqrt($value)) {
break;
}
}
}
if (!empty($factorArray)) {
rsort($factorArray);
return $factorArray;
}
return [(int) $value];
}
/**
* Read Unicode string with no string length field, but with known character count this function is under
* construction, needs to support rich text, and Asian phonetic settings
*
* @param string $subData
* @param int $characterCount
*
* @return array
*/
private static function readUnicodeString($subData, $characterCount) {
// offset: 0: size: 1; option flags
// bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit)
$isCompressed = !((0x01 & ord($subData[0])) >> 0);
// offset: 1: size: var; character array
// this offset assumes richtext and Asian phonetic settings are off which is generally wrong
// needs to be fixed
$value = self::encodeUTF16(
substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed
);
// the size in bytes including the option flags
return ['value' => $value, 'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount];
}
/**
* Extracts an Excel Unicode short string (8-bit string length), this function will automatically find out
* where the Unicode string ends.
*
* @param string $subData
*
* @return array
*/
private static function readUnicodeStringShort($subData) {
// offset: 0: size: 1; length of the string (character count)
$characterCount = ord($subData[0]);
$string = self::readUnicodeString(substr($subData, 1), $characterCount);
// add 1 for the string length
$string['size'] += 1;
return $string;
}
/**
* Extracts an Excel Unicode long string (16-bit string length), this function is under construction,
* needs to support rich text, and Asian phonetic settings
*
* @param string $subData
*
* @return array
*/
private static function readUnicodeStringLong($subData) {
// offset: 0: size: 2; length of the string (character count)
$characterCount = Format::getUInt2d($subData, 0);
$string = self::readUnicodeString(substr($subData, 2), $characterCount);
// add 2 for the string length
$string['size'] += 2;
return $string;
}
private static function getIEEE754($rkNum) {
if (($rkNum & 0x02) != 0) {
$value = $rkNum >> 2;
} else {
// changes by mmp, info on IEEE754 encoding from
// research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
// The RK format calls for using only the most significant 30 bits of the 64 bit floating point value.
// The other 34 bits are assumed to be 0 so we use the upper 30 bits of $rknum as follows...
$sign = ($rkNum & 0x80000000) >> 31;
$exp = ($rkNum & 0x7ff00000) >> 20;
$mantissa = (0x100000 | ($rkNum & 0x000ffffc));
$value = $mantissa / pow(2, (20- ($exp - 1023)));
if ($sign) {
$value = -1 * $value;
}
//end of changes by mmp
}
if (($rkNum & 0x01) != 0) {
$value /= 100;
}
return $value;
}
/**
* Get UTF-8 string from (compressed or uncompressed) UTF-16 string
*
* @param string $string
* @param bool $compressed
*
* @return string
*/
private static function encodeUTF16($string, $compressed = false) {
if ($compressed) {
$string = self::uncompressByteString($string);
}
return mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
}
/**
* Convert string to UTF-8. Only used for BIFF5.
*
* @param string $string
*
* @return string
*/
private function decodeCodepage($string) {
return mb_convert_encoding($string, 'UTF-8', $this->codePage);
}
/**
* Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8.
*
* @param string $string
*
* @return string
*/
private static function uncompressByteString($string) {
$uncompressedString = '';
$strLen = strlen($string);
for ($i = 0; $i < $strLen; ++$i) {
$uncompressedString .= $string[$i] . "\0";
}
return $uncompressedString;
}
/**
* Reads first 8 bytes of a string and return IEEE 754 float
*
* @param string $data Binary string that is at least 8 bytes long
*
* @return float
*/
private static function extractNumber($data) {
$rkNumHigh = Format::getInt4d($data, 4);
$rkNumLow = Format::getInt4d($data, 0);
$sign = ($rkNumHigh & 0x80000000) >> 31;
$exp = (($rkNumHigh & 0x7ff00000) >> 20) - 1023;
$mantissa = (0x100000 | ($rkNumHigh & 0x000fffff));
$mantissaLow1 = ($rkNumLow & 0x80000000) >> 31;
$mantissaLow2 = ($rkNumLow & 0x7fffffff);
$value = $mantissa / pow(2, (20 - $exp));
if ($mantissaLow1 != 0) {
$value += 1 / pow(2, (21 - $exp));
}
$value += $mantissaLow2 / pow(2, (52 - $exp));
if ($sign) {
$value *= -1;
}
return $value;
}
}