SpreadsheetReader_ODS.php 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. <?php
  2. class SpreadsheetReader_ODS implements Iterator, Countable {
  3. private $Options = array(
  4. 'TempDir' => '',
  5. 'ReturnDateTimeObjects' => false
  6. );
  7. /**
  8. * @var string Path to temporary content file
  9. */
  10. private $ContentPath = '';
  11. /**
  12. * @var XMLReader XML reader object
  13. */
  14. private $Content = false;
  15. /**
  16. * @var array Data about separate sheets in the file
  17. */
  18. private $Sheets = false;
  19. private $CurrentRow = null;
  20. /**
  21. * @var int Number of the sheet we're currently reading
  22. */
  23. private $CurrentSheet = 0;
  24. private $Index = 0;
  25. private $TableOpen = false;
  26. private $RowOpen = false;
  27. /**
  28. * @param string Path to file
  29. * @param array Options:
  30. * TempDir => string Temporary directory path
  31. * ReturnDateTimeObjects => bool True => dates and times will be returned as PHP DateTime objects, false => as strings
  32. */
  33. public function __construct($Filepath, array $Options = null)
  34. {
  35. if (!is_readable($Filepath))
  36. {
  37. throw new Exception('SpreadsheetReader_ODS: File not readable ('.$Filepath.')');
  38. }
  39. $this -> TempDir = isset($Options['TempDir']) && is_writable($Options['TempDir']) ?
  40. $Options['TempDir'] :
  41. sys_get_temp_dir();
  42. $this -> TempDir = rtrim($this -> TempDir, DIRECTORY_SEPARATOR);
  43. $this -> TempDir = $this -> TempDir.DIRECTORY_SEPARATOR.uniqid().DIRECTORY_SEPARATOR;
  44. $Zip = new ZipArchive;
  45. $Status = $Zip -> open($Filepath);
  46. if ($Status !== true)
  47. {
  48. throw new Exception('SpreadsheetReader_ODS: File not readable ('.$Filepath.') (Error '.$Status.')');
  49. }
  50. if ($Zip -> locateName('content.xml') !== false)
  51. {
  52. $Zip -> extractTo($this -> TempDir, 'content.xml');
  53. $this -> ContentPath = $this -> TempDir.'content.xml';
  54. }
  55. $Zip -> close();
  56. if ($this -> ContentPath && is_readable($this -> ContentPath))
  57. {
  58. $this -> Content = new XMLReader;
  59. $this -> Content -> open($this -> ContentPath);
  60. $this -> Valid = true;
  61. }
  62. }
  63. /**
  64. * Destructor, destroys all that remains (closes and deletes temp files)
  65. */
  66. public function __destruct()
  67. {
  68. if ($this -> Content && $this -> Content instanceof XMLReader)
  69. {
  70. $this -> Content -> close();
  71. unset($this -> Content);
  72. }
  73. if (file_exists($this -> ContentPath))
  74. {
  75. @unlink($this -> ContentPath);
  76. unset($this -> ContentPath);
  77. }
  78. }
  79. /**
  80. * Retrieves an array with information about sheets in the current file
  81. *
  82. * @return array List of sheets (key is sheet index, value is name)
  83. */
  84. public function Sheets()
  85. {
  86. if ($this -> Sheets === false)
  87. {
  88. $this -> Sheets = array();
  89. if ($this -> Valid)
  90. {
  91. $this -> SheetReader = new XMLReader;
  92. $this -> SheetReader -> open($this -> ContentPath);
  93. while ($this -> SheetReader -> read())
  94. {
  95. if ($this -> SheetReader -> name == 'table:table')
  96. {
  97. $this -> Sheets[] = $this -> SheetReader -> getAttribute('table:name');
  98. $this -> SheetReader -> next();
  99. }
  100. }
  101. $this -> SheetReader -> close();
  102. }
  103. }
  104. return $this -> Sheets;
  105. }
  106. /**
  107. * Changes the current sheet in the file to another
  108. *
  109. * @param int Sheet index
  110. *
  111. * @return bool True if sheet was successfully changed, false otherwise.
  112. */
  113. public function ChangeSheet($Index)
  114. {
  115. $Index = (int)$Index;
  116. $Sheets = $this -> Sheets();
  117. if (isset($Sheets[$Index]))
  118. {
  119. $this -> CurrentSheet = $Index;
  120. $this -> rewind();
  121. return true;
  122. }
  123. return false;
  124. }
  125. // !Iterator interface methods
  126. /**
  127. * Rewind the Iterator to the first element.
  128. * Similar to the reset() function for arrays in PHP
  129. */
  130. public function rewind()
  131. {
  132. if ($this -> Index > 0)
  133. {
  134. // If the worksheet was already iterated, XML file is reopened.
  135. // Otherwise it should be at the beginning anyway
  136. $this -> Content -> close();
  137. $this -> Content -> open($this -> ContentPath);
  138. $this -> Valid = true;
  139. $this -> TableOpen = false;
  140. $this -> RowOpen = false;
  141. $this -> CurrentRow = null;
  142. }
  143. $this -> Index = 0;
  144. }
  145. /**
  146. * Return the current element.
  147. * Similar to the current() function for arrays in PHP
  148. *
  149. * @return mixed current element from the collection
  150. */
  151. public function current()
  152. {
  153. if ($this -> Index == 0 && is_null($this -> CurrentRow))
  154. {
  155. $this -> next();
  156. $this -> Index--;
  157. }
  158. return $this -> CurrentRow;
  159. }
  160. /**
  161. * Move forward to next element.
  162. * Similar to the next() function for arrays in PHP
  163. */
  164. public function next()
  165. {
  166. $this -> Index++;
  167. $this -> CurrentRow = array();
  168. if (!$this -> TableOpen)
  169. {
  170. $TableCounter = 0;
  171. $SkipRead = false;
  172. while ($this -> Valid = ($SkipRead || $this -> Content -> read()))
  173. {
  174. if ($SkipRead)
  175. {
  176. $SkipRead = false;
  177. }
  178. if ($this -> Content -> name == 'table:table' && $this -> Content -> nodeType != XMLReader::END_ELEMENT)
  179. {
  180. if ($TableCounter == $this -> CurrentSheet)
  181. {
  182. $this -> TableOpen = true;
  183. break;
  184. }
  185. $TableCounter++;
  186. $this -> Content -> next();
  187. $SkipRead = true;
  188. }
  189. }
  190. }
  191. if ($this -> TableOpen && !$this -> RowOpen)
  192. {
  193. while ($this -> Valid = $this -> Content -> read())
  194. {
  195. switch ($this -> Content -> name)
  196. {
  197. case 'table:table':
  198. $this -> TableOpen = false;
  199. $this -> Content -> next('office:document-content');
  200. $this -> Valid = false;
  201. break 2;
  202. case 'table:table-row':
  203. if ($this -> Content -> nodeType != XMLReader::END_ELEMENT)
  204. {
  205. $this -> RowOpen = true;
  206. break 2;
  207. }
  208. break;
  209. }
  210. }
  211. }
  212. if ($this -> RowOpen)
  213. {
  214. $LastCellContent = '';
  215. while ($this -> Valid = $this -> Content -> read())
  216. {
  217. switch ($this -> Content -> name)
  218. {
  219. case 'table:table-cell':
  220. if ($this -> Content -> nodeType == XMLReader::END_ELEMENT || $this -> Content -> isEmptyElement)
  221. {
  222. if ($this -> Content -> nodeType == XMLReader::END_ELEMENT)
  223. {
  224. $CellValue = $LastCellContent;
  225. }
  226. elseif ($this -> Content -> isEmptyElement)
  227. {
  228. $LastCellContent = '';
  229. $CellValue = $LastCellContent;
  230. }
  231. $this -> CurrentRow[] = $LastCellContent;
  232. if ($this -> Content -> getAttribute('table:number-columns-repeated') !== null)
  233. {
  234. $RepeatedColumnCount = $this -> Content -> getAttribute('table:number-columns-repeated');
  235. // Checking if larger than one because the value is already added to the row once before
  236. if ($RepeatedColumnCount > 1)
  237. {
  238. $this -> CurrentRow = array_pad($this -> CurrentRow, count($this -> CurrentRow) + $RepeatedColumnCount - 1, $LastCellContent);
  239. }
  240. }
  241. }
  242. else
  243. {
  244. $LastCellContent = '';
  245. }
  246. case 'text:p':
  247. if ($this -> Content -> nodeType != XMLReader::END_ELEMENT)
  248. {
  249. $LastCellContent = $this -> Content -> readString();
  250. }
  251. break;
  252. case 'table:table-row':
  253. $this -> RowOpen = false;
  254. break 2;
  255. }
  256. }
  257. }
  258. return $this -> CurrentRow;
  259. }
  260. /**
  261. * Return the identifying key of the current element.
  262. * Similar to the key() function for arrays in PHP
  263. *
  264. * @return mixed either an integer or a string
  265. */
  266. public function key()
  267. {
  268. return $this -> Index;
  269. }
  270. /**
  271. * Check if there is a current element after calls to rewind() or next().
  272. * Used to check if we've iterated to the end of the collection
  273. *
  274. * @return boolean FALSE if there's nothing more to iterate over
  275. */
  276. public function valid()
  277. {
  278. return $this -> Valid;
  279. }
  280. // !Countable interface method
  281. /**
  282. * Ostensibly should return the count of the contained items but this just returns the number
  283. * of rows read so far. It's not really correct but at least coherent.
  284. */
  285. public function count()
  286. {
  287. return $this -> Index + 1;
  288. }
  289. }