Excel2007.php 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. <?php
  2. /**
  3. * Excel2017 Parser
  4. *
  5. * @author Janson
  6. * @create 2017-12-02
  7. */
  8. namespace Asan\PHPExcel\Parser;
  9. use Asan\PHPExcel\Exception\ParserException;
  10. use Asan\PHPExcel\Exception\ReaderException;
  11. class Excel2007 {
  12. const CELL_TYPE_SHARED_STR = 's';
  13. /**
  14. * Temporary directory
  15. *
  16. * @var string
  17. */
  18. protected $tmpDir;
  19. /**
  20. * ZipArchive reader
  21. *
  22. * @var \ZipArchive
  23. */
  24. protected $zip;
  25. /**
  26. * Worksheet reader
  27. *
  28. * @var \XMLReader
  29. */
  30. protected $worksheetXML;
  31. /**
  32. * SharedStrings reader
  33. *
  34. * @var \XMLReader
  35. */
  36. protected $sharedStringsXML;
  37. /**
  38. * SharedStrings position
  39. *
  40. * @var array
  41. */
  42. private $sharedStringsPosition = -1;
  43. /**
  44. * The current sheet of the file
  45. *
  46. * @var int
  47. */
  48. private $sheetIndex = 0;
  49. /**
  50. * Ignore empty row
  51. *
  52. * @var bool
  53. */
  54. private $ignoreEmpty = false;
  55. /**
  56. * Style xfs
  57. *
  58. * @var array
  59. */
  60. private $styleXfs;
  61. /**
  62. * Number formats
  63. *
  64. * @var array
  65. */
  66. private $formats;
  67. /**
  68. * Parsed number formats
  69. *
  70. * @var array
  71. */
  72. private $parsedFormats;
  73. /**
  74. * Worksheets
  75. *
  76. * @var array
  77. */
  78. private $sheets;
  79. /**
  80. * Default options for libxml loader
  81. *
  82. * @var int
  83. */
  84. private static $libXmlLoaderOptions;
  85. /**
  86. * Base date
  87. *
  88. * @var \DateTime
  89. */
  90. private static $baseDate;
  91. private static $decimalSeparator = '.';
  92. private static $thousandSeparator = ',';
  93. private static $currencyCode = '';
  94. private static $runtimeInfo = ['GMPSupported' => false];
  95. private $_sharedStrings = [];
  96. /**
  97. * Use ZipArchive reader to extract the relevant data streams from the ZipArchive file
  98. *
  99. * @throws ParserException|ReaderException
  100. * @param string $file
  101. */
  102. public function loadZip($file) {
  103. $this->openFile($file);
  104. // Setting base date
  105. if (!self::$baseDate) {
  106. self::$baseDate = new \DateTime;
  107. self::$baseDate->setTimezone(new \DateTimeZone('UTC'));
  108. self::$baseDate->setDate(1900, 1, 0);
  109. self::$baseDate->setTime(0, 0, 0);
  110. }
  111. if (function_exists('gmp_gcd')) {
  112. self::$runtimeInfo['GMPSupported'] = true;
  113. }
  114. }
  115. /**
  116. * Ignore empty row
  117. *
  118. * @param bool $ignoreEmpty
  119. *
  120. * @return $this
  121. */
  122. public function ignoreEmptyRow($ignoreEmpty) {
  123. $this->ignoreEmpty = $ignoreEmpty;
  124. return $this;
  125. }
  126. /**
  127. * Whether is ignore empty row
  128. *
  129. * @return bool
  130. */
  131. public function isIgnoreEmptyRow() {
  132. return $this->ignoreEmpty;
  133. }
  134. /**
  135. * Set sheet index
  136. *
  137. * @param int $index
  138. *
  139. * @return $this
  140. */
  141. public function setSheetIndex($index) {
  142. if ($index != $this->sheetIndex) {
  143. $this->sheetIndex = $index;
  144. $this->getWorksheetXML();
  145. }
  146. return $this;
  147. }
  148. /**
  149. * Get sheet index
  150. *
  151. * @return int
  152. */
  153. public function getSheetIndex() {
  154. return $this->sheetIndex;
  155. }
  156. /**
  157. * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
  158. *
  159. * @throws ReaderException
  160. * @return array
  161. */
  162. public function parseWorksheetInfo() {
  163. if ($this->sheets === null) {
  164. $workbookXML = simplexml_load_string(
  165. $this->securityScan($this->zip->getFromName('xl/workbook.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions()
  166. );
  167. $this->sheets = [];
  168. if (isset($workbookXML->sheets) && $workbookXML->sheets) {
  169. $xml = new \XMLReader();
  170. $index = 0;
  171. foreach ($workbookXML->sheets->sheet as $sheet) {
  172. $info = [
  173. 'name' => (string)$sheet['name'], 'lastColumnLetter' => '', 'lastColumnIndex' => 0,
  174. 'totalRows' => 0, 'totalColumns' => 0
  175. ];
  176. $this->zip->extractTo($this->tmpDir, $file = 'xl/worksheets/sheet' . (++$index) . '.xml');
  177. $xml->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions());
  178. $xml->setParserProperty(\XMLReader::DEFAULTATTRS, true);
  179. $nonEmpty = false;
  180. $columnLetter = '';
  181. while ($xml->read()) {
  182. if ($xml->name == 'row') {
  183. if (!$this->ignoreEmpty && $xml->nodeType == \XMLReader::ELEMENT) {
  184. $info['totalRows'] = (int)$xml->getAttribute('r');
  185. } elseif ($xml->nodeType == \XMLReader::END_ELEMENT) {
  186. if ($this->ignoreEmpty && $nonEmpty) {
  187. $info['totalRows']++;
  188. $nonEmpty = false;
  189. }
  190. if ($columnLetter > $info['lastColumnLetter']) {
  191. $info['lastColumnLetter'] = $columnLetter;
  192. }
  193. }
  194. } elseif ($xml->name == 'c' && $xml->nodeType == \XMLReader::ELEMENT) {
  195. $columnLetter = preg_replace('{[^[:alpha:]]}S', '', $xml->getAttribute('r'));
  196. } elseif ($this->ignoreEmpty && !$nonEmpty && $xml->name == 'v'
  197. && $xml->nodeType == \XMLReader::ELEMENT && trim($xml->readString()) !== '') {
  198. $nonEmpty = true;
  199. }
  200. }
  201. if ($info['lastColumnLetter']) {
  202. $info['totalColumns'] = Format::columnIndexFromString($info['lastColumnLetter']);
  203. $info['lastColumnIndex'] = $info['totalColumns'] - 1;
  204. }
  205. $this->sheets[] = $info;
  206. }
  207. $xml->close();
  208. }
  209. }
  210. return $this->sheets;
  211. }
  212. /**
  213. * Get shared string
  214. *
  215. * @param int $position
  216. * @return string
  217. */
  218. protected function getSharedString($position) {
  219. $value = '';
  220. $file = 'xl/sharedStrings.xml';
  221. if ($this->sharedStringsXML === null) {
  222. $this->sharedStringsXML = new \XMLReader();
  223. $this->zip->extractTo($this->tmpDir, $file);
  224. }
  225. if ($this->sharedStringsPosition < 0 || $position < $this->sharedStringsPosition) {
  226. $this->sharedStringsXML->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions());
  227. $this->sharedStringsPosition = -1;
  228. }
  229. while ($this->sharedStringsXML->read()) {
  230. $name = $this->sharedStringsXML->name;
  231. $nodeType = $this->sharedStringsXML->nodeType;
  232. if ($name == 'si') {
  233. if ($nodeType == \XMLReader::ELEMENT) {
  234. $this->sharedStringsPosition++;
  235. } elseif ($position == $this->sharedStringsPosition && $nodeType == \XMLReader::END_ELEMENT) {
  236. break;
  237. }
  238. } elseif ($name == 't' && $position == $this->sharedStringsPosition && $nodeType == \XMLReader::ELEMENT) {
  239. $value .= trim($this->sharedStringsXML->readString());
  240. $this->_sharedStrings[$position] = $value;
  241. }
  242. }
  243. if(strlen($value) == 0 && array_key_exists($position, $this->_sharedStrings)){
  244. $value .= $this->_sharedStrings[$position];
  245. }
  246. return $value;
  247. }
  248. /**
  249. * Parse styles info
  250. *
  251. * @throws ReaderException
  252. */
  253. protected function parseStyles() {
  254. if ($this->styleXfs === null) {
  255. $stylesXML = simplexml_load_string(
  256. $this->securityScan($this->zip->getFromName('xl/styles.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions()
  257. );
  258. $this->styleXfs = $this->formats = [];
  259. if ($stylesXML) {
  260. if (isset($stylesXML->cellXfs->xf) && $stylesXML->cellXfs->xf) {
  261. foreach ($stylesXML->cellXfs->xf as $xf) {
  262. $numFmtId = isset($xf['numFmtId']) ? (int)$xf['numFmtId'] : 0;
  263. if (isset($xf['applyNumberFormat']) || $numFmtId == 0) {
  264. // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts
  265. $this->styleXfs[] = $numFmtId;
  266. } else {
  267. // 0 for "General" format
  268. $this->styleXfs[] = Format::FORMAT_GENERAL;
  269. }
  270. }
  271. }
  272. if (isset($stylesXML->numFmts->numFmt) && $stylesXML->numFmts->numFmt) {
  273. foreach ($stylesXML->numFmts->numFmt as $numFmt) {
  274. if (isset($numFmt['numFmtId'], $numFmt['formatCode'])) {
  275. $this->formats[(int)$numFmt['numFmtId']] = (string)$numFmt['formatCode'];
  276. }
  277. }
  278. }
  279. }
  280. }
  281. }
  282. /**
  283. * Get worksheet XMLReader
  284. */
  285. protected function getWorksheetXML() {
  286. if ($this->worksheetXML === null) {
  287. $this->worksheetXML = new \XMLReader();
  288. }
  289. $this->worksheetXML->open(
  290. $this->tmpDir . '/xl/worksheets/sheet' . ($this->getSheetIndex() + 1) . '.xml',
  291. null, self::getLibXmlLoaderOptions()
  292. );
  293. }
  294. /**
  295. * Get row data
  296. *
  297. * @param int $rowIndex
  298. * @param int $columnLimit
  299. *
  300. * @throws ReaderException
  301. * @return array|bool
  302. */
  303. public function getRow($rowIndex, $columnLimit = 0) {
  304. $this->parseStyles();
  305. $rowIndex === 0 && $this->getWorksheetXML();
  306. $sharedString = false;
  307. $index = $styleId = 0;
  308. $row = $columnLimit ? array_fill(0, $columnLimit, '') : [];
  309. while ($canRead = $this->worksheetXML->read()) {
  310. $name = $this->worksheetXML->name;
  311. $type = $this->worksheetXML->nodeType;
  312. // End of row
  313. if ($name == 'row') {
  314. if (!$this->ignoreEmpty && $type == \XMLReader::ELEMENT
  315. && $rowIndex+1 != (int)$this->worksheetXML->getAttribute('r')) {
  316. $this->worksheetXML->moveToElement();
  317. break;
  318. }
  319. if ($type == \XMLReader::END_ELEMENT) {
  320. break;
  321. }
  322. }
  323. if ($columnLimit > 0 && $index >= $columnLimit) {
  324. continue;
  325. }
  326. switch ($name) {
  327. // Cell
  328. case 'c':
  329. if ($type == \XMLReader::END_ELEMENT) {
  330. continue;
  331. }
  332. $styleId = (int)$this->worksheetXML->getAttribute('s');
  333. $letter = preg_replace('{[^[:alpha:]]}S', '', $this->worksheetXML->getAttribute('r'));
  334. $index = Format::columnIndexFromString($letter) - 1;
  335. // Determine cell type
  336. $sharedString = false;
  337. if ($this->worksheetXML->getAttribute('t') == self::CELL_TYPE_SHARED_STR) {
  338. $sharedString = true;
  339. }
  340. break;
  341. // Cell value
  342. case 'v':
  343. case 'is':
  344. if ($type == \XMLReader::END_ELEMENT) {
  345. continue;
  346. }
  347. $value = $this->worksheetXML->readString();
  348. if ($sharedString) {
  349. $value = $this->getSharedString($value);
  350. }
  351. // Format value if necessary
  352. if ($value !== '' && $styleId && isset($this->styleXfs[$styleId])) {
  353. $value = $this->formatValue($value, $styleId);
  354. } elseif ($value && is_numeric($value)) {
  355. $value = (float)$value;
  356. }
  357. $row[$index] = $value;
  358. break;
  359. }
  360. }
  361. if ($canRead === false) {
  362. return false;
  363. }
  364. return $row;
  365. }
  366. /**
  367. * Close ZipArchive、XMLReader and remove temp dir
  368. */
  369. public function __destruct() {
  370. if ($this->zip && $this->tmpDir) {
  371. $this->zip->close();
  372. }
  373. if ($this->worksheetXML) {
  374. $this->worksheetXML->close();
  375. }
  376. if ($this->sharedStringsXML) {
  377. $this->sharedStringsXML->close();
  378. }
  379. $this->removeDir($this->tmpDir);
  380. $this->zip = null;
  381. $this->worksheetXML = null;
  382. $this->sharedStringsXML = null;
  383. $this->tmpDir = null;
  384. }
  385. /**
  386. * Remove dir
  387. *
  388. * @param string $dir
  389. */
  390. protected function removeDir($dir) {
  391. if($dir && is_dir($dir)) {
  392. $handle = opendir($dir);
  393. while($item = readdir($handle)) {
  394. if ($item != '.' && $item != '..') {
  395. is_file($item = $dir . '/' . $item) ? unlink($item) : $this->removeDir($item);
  396. }
  397. }
  398. closedir($handle);
  399. rmdir($dir);
  400. }
  401. }
  402. /**
  403. * Formats the value according to the index
  404. *
  405. * @param string $value
  406. * @param int $index Format index
  407. *
  408. * @throws \Exception
  409. * @return string Formatted cell value
  410. */
  411. private function formatValue($value, $index) {
  412. if (!is_numeric($value)) {
  413. return $value;
  414. }
  415. if (isset($this->styleXfs[$index]) && $this->styleXfs[$index] !== false) {
  416. $index = $this->styleXfs[$index];
  417. } else {
  418. return $value;
  419. }
  420. // A special case for the "General" format
  421. if ($index == 0) {
  422. return is_numeric($value) ? (float)$value : $value;
  423. }
  424. $format = $this->parsedFormats[$index] ?? [];
  425. if (empty($format)) {
  426. $format = [
  427. 'code' => false, 'type' => false, 'scale' => 1, 'thousands' => false, 'currency' => false
  428. ];
  429. if (isset(Format::$buildInFormats[$index])) {
  430. $format['code'] = Format::$buildInFormats[$index];
  431. } elseif (isset($this->formats[$index])) {
  432. $format['code'] = str_replace('"', '', $this->formats[$index]);
  433. }
  434. // Format code found, now parsing the format
  435. if ($format['code']) {
  436. $sections = explode(';', $format['code']);
  437. $format['code'] = $sections[0];
  438. switch (count($sections)) {
  439. case 2:
  440. if ($value < 0) {
  441. $format['code'] = $sections[1];
  442. }
  443. $value = abs($value);
  444. break;
  445. case 3:
  446. case 4:
  447. if ($value < 0) {
  448. $format['code'] = $sections[1];
  449. } elseif ($value == 0) {
  450. $format['code'] = $sections[2];
  451. }
  452. $value = abs($value);
  453. break;
  454. }
  455. }
  456. // Stripping colors
  457. $format['code'] = trim(preg_replace('/^\\[[a-zA-Z]+\\]/', '', $format['code']));
  458. // Percentages
  459. if (substr($format['code'], -1) == '%') {
  460. $format['type'] = 'Percentage';
  461. } elseif (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy]/i', $format['code'])) {
  462. $format['type'] = 'DateTime';
  463. $format['code'] = trim(preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format['code']));
  464. $format['code'] = strtolower($format['code']);
  465. $format['code'] = strtr($format['code'], Format::$dateFormatReplacements);
  466. if (strpos($format['code'], 'A') === false) {
  467. $format['code'] = strtr($format['code'], Format::$dateFormatReplacements24);
  468. } else {
  469. $format['code'] = strtr($format['code'], Format::$dateFormatReplacements12);
  470. }
  471. } elseif ($format['code'] == '[$EUR ]#,##0.00_-') {
  472. $format['type'] = 'Euro';
  473. } else {
  474. // Removing skipped characters
  475. $format['code'] = preg_replace('/_./', '', $format['code']);
  476. // Removing unnecessary escaping
  477. $format['code'] = preg_replace("/\\\\/", '', $format['code']);
  478. // Removing string quotes
  479. $format['code'] = str_replace(['"', '*'], '', $format['code']);
  480. // Removing thousands separator
  481. if (strpos($format['code'], '0,0') !== false || strpos($format['code'], '#,#') !== false) {
  482. $format['thousands'] = true;
  483. }
  484. $format['code'] = str_replace(['0,0', '#,#'], ['00', '##'], $format['code']);
  485. // Scaling (Commas indicate the power)
  486. $scale = 1;
  487. $matches = [];
  488. if (preg_match('/(0|#)(,+)/', $format['code'], $matches)) {
  489. $scale = pow(1000, strlen($matches[2]));
  490. // Removing the commas
  491. $format['code'] = preg_replace(['/0,+/', '/#,+/'], ['0', '#'], $format['code']);
  492. }
  493. $format['scale'] = $scale;
  494. if (preg_match('/#?.*\?\/\?/', $format['code'])) {
  495. $format['type'] = 'Fraction';
  496. } else {
  497. $format['code'] = str_replace('#', '', $format['code']);
  498. $matches = [];
  499. if (preg_match('/(0+)(\.?)(0*)/', preg_replace('/\[[^\]]+\]/', '', $format['code']), $matches)) {
  500. list(, $integer, $decimalPoint, $decimal) = $matches;
  501. $format['minWidth'] = strlen($integer) + strlen($decimalPoint) + strlen($decimal);
  502. $format['decimals'] = $decimal;
  503. $format['precision'] = strlen($format['decimals']);
  504. $format['pattern'] = '%0' . $format['minWidth'] . '.' . $format['precision'] . 'f';
  505. }
  506. }
  507. $matches = [];
  508. if (preg_match('/\[\$(.*)\]/u', $format['code'], $matches)) {
  509. $currencyCode = explode('-', $matches[1]);
  510. if ($currencyCode) {
  511. $currencyCode = $currencyCode[0];
  512. }
  513. if (!$currencyCode) {
  514. $currencyCode = self::$currencyCode;
  515. }
  516. $format['currency'] = $currencyCode;
  517. }
  518. $format['code'] = trim($format['code']);
  519. }
  520. $this->parsedFormats[$index] = $format;
  521. }
  522. // Applying format to value
  523. if ($format) {
  524. if ($format['code'] == '@') {
  525. return (string)$value;
  526. } elseif ($format['type'] == 'Percentage') { // Percentages
  527. if ($format['code'] === '0%') {
  528. $value = round(100*$value, 0) . '%';
  529. } else {
  530. $value = sprintf('%.2f%%', round(100*$value, 2));
  531. }
  532. } elseif ($format['type'] == 'DateTime') { // Dates and times
  533. $days = (int)$value;
  534. // Correcting for Feb 29, 1900
  535. if ($days > 60) {
  536. $days--;
  537. }
  538. // At this point time is a fraction of a day
  539. $time = ($value - (int)$value);
  540. // Here time is converted to seconds
  541. // Some loss of precision will occur
  542. $seconds = $time ? (int)($time*86400) : 0;
  543. $value = clone self::$baseDate;
  544. $value->add(new \DateInterval('P' . $days . 'D' . ($seconds ? 'T' . $seconds . 'S' : '')));
  545. $value = $value->format($format['code']);
  546. } elseif ($format['type'] == 'Euro') {
  547. $value = 'EUR ' . sprintf('%1.2f', $value);
  548. } else {
  549. // Fractional numbers
  550. if ($format['type'] == 'Fraction' && ($value != (int)$value)) {
  551. $integer = floor(abs($value));
  552. $decimal = fmod(abs($value), 1);
  553. // Removing the integer part and decimal point
  554. $decimal *= pow(10, strlen($decimal) - 2);
  555. $decimalDivisor = pow(10, strlen($decimal));
  556. if (self::$runtimeInfo['GMPSupported']) {
  557. $GCD = gmp_strval(gmp_gcd($decimal, $decimalDivisor));
  558. } else {
  559. $GCD = self::GCD($decimal, $decimalDivisor);
  560. }
  561. $adjDecimal = $decimal/$GCD;
  562. $adjDecimalDivisor = $decimalDivisor/$GCD;
  563. if (strpos($format['code'], '0') !== false || strpos($format['code'], '#') !== false
  564. || substr($format['code'], 0, 3) == '? ?') {
  565. // The integer part is shown separately apart from the fraction
  566. $value = ($value < 0 ? '-' : '') . $integer ? $integer . ' '
  567. : '' . $adjDecimal . '/' . $adjDecimalDivisor;
  568. } else {
  569. // The fraction includes the integer part
  570. $adjDecimal += $integer * $adjDecimalDivisor;
  571. $value = ($value < 0 ? '-' : '') . $adjDecimal . '/' . $adjDecimalDivisor;
  572. }
  573. } else {
  574. // Scaling
  575. $value = $value/$format['scale'];
  576. if (!empty($format['minWidth']) && $format['decimals']) {
  577. if ($format['thousands']) {
  578. $value = number_format(
  579. $value, $format['precision'], self::$decimalSeparator, self::$thousandSeparator
  580. );
  581. $value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']);
  582. } else {
  583. if (preg_match('/[0#]E[+-]0/i', $format['code'])) {
  584. // Scientific format
  585. $value = sprintf('%5.2E', $value);
  586. } else {
  587. $value = sprintf($format['pattern'], $value);
  588. $value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']);
  589. }
  590. }
  591. }
  592. }
  593. // currency/Accounting
  594. if ($format['currency']) {
  595. $value = preg_replace('', $format['currency'], $value);
  596. }
  597. }
  598. }
  599. return $value;
  600. }
  601. /**
  602. * Greatest common divisor calculation in case GMP extension is not enabled
  603. *
  604. * @param int $number1
  605. * @param int $number2
  606. *
  607. * @return int
  608. */
  609. private static function GCD($number1, $number2) {
  610. $number1 = abs($number1);
  611. $number2 = abs($number2);
  612. if ($number1 + $number2 == 0) {
  613. return 0;
  614. }
  615. $number = 1;
  616. while ($number1 > 0) {
  617. $number = $number1;
  618. $number1 = $number2 % $number1;
  619. $number2 = $number;
  620. }
  621. return $number;
  622. }
  623. /**
  624. * Open file for reading
  625. *
  626. * @param string $file
  627. *
  628. * @throws ParserException|ReaderException
  629. */
  630. public function openFile($file) {
  631. // Check if file exists
  632. if (!file_exists($file) || !is_readable($file)) {
  633. throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
  634. }
  635. $this->zip = new \ZipArchive();
  636. $xl = false;
  637. if ($this->zip->open($file) === true) {
  638. $this->tmpDir = sys_get_temp_dir() . '/' . uniqid();
  639. // check if it is an OOXML archive
  640. $rels = simplexml_load_string(
  641. $this->securityScan($this->zip->getFromName('_rels/.rels')),
  642. 'SimpleXMLElement', self::getLibXmlLoaderOptions()
  643. );
  644. if ($rels !== false) {
  645. foreach ($rels->Relationship as $rel) {
  646. switch ($rel["Type"]) {
  647. case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument":
  648. if ($rel["Target"] == 'xl/workbook.xml') {
  649. $xl = true;
  650. }
  651. break;
  652. }
  653. }
  654. }
  655. }
  656. if ($xl === false) {
  657. throw new ParserException("The file [$file] is not recognised as a zip archive");
  658. }
  659. }
  660. /**
  661. * Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks
  662. *
  663. * @param string $xml
  664. *
  665. * @throws ReaderException
  666. * @return string
  667. */
  668. protected function securityScan($xml) {
  669. $pattern = sprintf('/\\0?%s\\0?/', implode('\\0?', str_split('<!DOCTYPE')));
  670. if (preg_match($pattern, $xml)) {
  671. throw new ReaderException(
  672. 'Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'
  673. );
  674. }
  675. return $xml;
  676. }
  677. /**
  678. * Set default options for libxml loader
  679. *
  680. * @param int $options Default options for libxml loader
  681. */
  682. public static function setLibXmlLoaderOptions($options = null) {
  683. if (is_null($options) && defined(LIBXML_DTDLOAD)) {
  684. $options = LIBXML_DTDLOAD | LIBXML_DTDATTR;
  685. }
  686. if (version_compare(PHP_VERSION, '5.2.11') >= 0) {
  687. @libxml_disable_entity_loader($options == (LIBXML_DTDLOAD | LIBXML_DTDATTR));
  688. }
  689. self::$libXmlLoaderOptions = $options;
  690. }
  691. /**
  692. * Get default options for libxml loader.
  693. * Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
  694. *
  695. * @return int Default options for libxml loader
  696. */
  697. public static function getLibXmlLoaderOptions() {
  698. if (is_null(self::$libXmlLoaderOptions) && defined(LIBXML_DTDLOAD)) {
  699. self::setLibXmlLoaderOptions(LIBXML_DTDLOAD | LIBXML_DTDATTR);
  700. }
  701. if (version_compare(PHP_VERSION, '5.2.11') >= 0) {
  702. @libxml_disable_entity_loader(self::$libXmlLoaderOptions == (LIBXML_DTDLOAD | LIBXML_DTDATTR));
  703. }
  704. return self::$libXmlLoaderOptions;
  705. }
  706. }