SpreadsheetReader_CSV.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. <?php
  2. class SpreadsheetReader_CSV implements Iterator, Countable {
  3. private $options = array(
  4. 'Delimiter' => ';',
  5. 'Enclosure' => '"'
  6. );
  7. private $encoding = 'UTF-8';
  8. private $filePath = '';
  9. private $handle = false;
  10. private $index = 0;
  11. private $currentRow = null;
  12. private $rowCount = null;
  13. public function __construct($filePath, $options = null, $encoding = '') {
  14. if ( ! is_readable($filePath)) {
  15. throw new Exception('SpreadsheetReader_CSV: File not readable (' . $filePath . ')');
  16. }
  17. $this->filePath = $filePath;
  18. @ini_set('auto_detect_line_endings', true);
  19. $this->options = array_merge($this->options, $options);
  20. $encoding && $this->encoding = $encoding;
  21. $this->handle = fopen($filePath, 'r');
  22. // Checking the file for byte-order mark to determine encoding
  23. $BOM16 = bin2hex(fread($this->handle, 2));
  24. if ($BOM16 == 'fffe') {
  25. $this->Encoding = 'UTF-16LE';
  26. $this->BOMLength = 2;
  27. }
  28. elseif ($BOM16 == 'feff') {
  29. $this->Encoding = 'UTF-16BE';
  30. $this->BOMLength = 2;
  31. }
  32. if ( ! $this->BOMLength) {
  33. fseek($this->handle, 0);
  34. $BOM32 = bin2hex(fread($this->handle, 4));
  35. if ($BOM32 == '0000feff') {
  36. $this->Encoding = 'UTF-32';
  37. $this->BOMLength = 4;
  38. }
  39. elseif ($BOM32 == 'fffe0000') {
  40. $this->Encoding = 'UTF-32';
  41. $this->BOMLength = 4;
  42. }
  43. }
  44. fseek($this->handle, 0);
  45. $BOM8 = bin2hex(fread($this->handle, 3));
  46. if ($BOM8 == 'efbbbf') {
  47. $this->Encoding = 'UTF-8';
  48. $this->BOMLength = 3;
  49. }
  50. // Seeking the place right after BOM as the start of the real content
  51. if ($this->BOMLength) {
  52. fseek($this->handle, $this->BOMLength);
  53. }
  54. // Checking for the delimiter if it should be determined automatically
  55. if ( ! $this->options['Delimiter']) {
  56. $Semicolon = ';'; // fgetcsv needs single-byte separators
  57. $Tab = "\t";
  58. $Comma = ',';
  59. // Reading the first row and checking if a specific separator character
  60. // has more columns than others (it means that most likely that is the delimiter).
  61. $SemicolonCount = count(fgetcsv($this->handle, null, $Semicolon));
  62. fseek($this->handle, $this->BOMLength);
  63. $TabCount = count(fgetcsv($this->handle, null, $Tab));
  64. fseek($this->handle, $this->BOMLength);
  65. $CommaCount = count(fgetcsv($this->handle, null, $Comma));
  66. fseek($this->handle, $this->BOMLength);
  67. $Delimiter = $Semicolon;
  68. if ($TabCount > $SemicolonCount || $CommaCount > $SemicolonCount) {
  69. $Delimiter = $CommaCount > $TabCount ? $Comma : $Tab;
  70. }
  71. $this->options['Delimiter'] = $Delimiter;
  72. }
  73. }
  74. /**
  75. * Returns information about sheets in the file.
  76. * @return array
  77. */
  78. public function Sheets() {
  79. return array(0 => basename($this->filePath));
  80. }
  81. /**
  82. * Changes sheet to another.
  83. * @param bool
  84. */
  85. public function ChangeSheet($index) {
  86. if ($index == 0) {
  87. $this->rewind();
  88. return true;
  89. }
  90. return false;
  91. }
  92. /**
  93. * Rewind the Iterator to the first element.
  94. */
  95. public function rewind() {
  96. fseek($this->handle, 0);
  97. $this->currentRow = null;
  98. $this->index = 0;
  99. }
  100. /**
  101. * Return the current element.
  102. * @return mixed
  103. */
  104. public function current() {
  105. if ($this->index == 0 && is_null($this->currentRow)) {
  106. $this->rewind();
  107. $this->next();
  108. $this->index = 0;
  109. }
  110. return $this->currentRow;
  111. }
  112. /**
  113. * Move forward to next element.
  114. */
  115. public function next() {
  116. $this->currentRow = array();
  117. if ($this->Encoding == 'UTF-16LE' || $this->Encoding == 'UTF-16BE') {
  118. while ( ! feof($this->handle)) {
  119. $Char = ord(fgetc($this->handle));
  120. if ( ! $Char || $Char == 10 || $Char == 13) {
  121. continue; // While bytes are insignificant whitespace, do nothing
  122. }
  123. else {
  124. if ($this->Encoding == 'UTF-16LE') {
  125. fseek($this->handle, ftell($this->handle) - 1); // When significant bytes are found, step back to the last place before them
  126. }
  127. else {
  128. fseek($this->handle, ftell($this->handle) - 2);
  129. }
  130. break;
  131. }
  132. }
  133. }
  134. $this->index++;
  135. $this->currentRow = fgetcsv($this->handle, null, $this->options['Delimiter'], $this->options['Enclosure']);
  136. if ($this->currentRow) {
  137. if ($this->encoding != 'ASCII' && $this->encoding != 'UTF-8') {
  138. foreach($this->currentRow as $key => $value) {
  139. $this->currentRow[$key] = trim(trim(
  140. mb_convert_encoding($value, 'UTF-8', $this->encoding),
  141. $this->options['Enclosure']
  142. ));
  143. }
  144. }
  145. }
  146. return $this->currentRow;
  147. }
  148. /**
  149. * Return the identifying key of the current element.
  150. * @return mixed
  151. */
  152. public function key() {
  153. return $this->index;
  154. }
  155. /**
  156. * Check if there is a current element after calls to rewind() or next().
  157. * @return bool
  158. */
  159. public function valid() {
  160. return ($this->currentRow || ! feof($this->handle));
  161. }
  162. /**
  163. * return the count of the contained items
  164. * @return int
  165. */
  166. public function count() {
  167. if (is_null($this->rowCount)) {
  168. $total = 0;
  169. fseek($this->handle, 0);
  170. while ($row = fgetcsv($this->handle, null, $this->options['Delimiter'], $this->options['Enclosure'])) {
  171. $total++;
  172. }
  173. $this->rowCount = $total;
  174. }
  175. return $this->rowCount;
  176. }
  177. }