Xlsx.php 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Document_OpenXml */
  22. require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  23. if (class_exists('ZipArchive', false)) {
  24. /**
  25. * Xlsx document.
  26. *
  27. * @category Zend
  28. * @package Zend_Search_Lucene
  29. * @subpackage Document
  30. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenXml
  34. {
  35. /**
  36. * Xml Schema - SpreadsheetML
  37. *
  38. * @var string
  39. */
  40. const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
  41. /**
  42. * Xml Schema - DrawingML
  43. *
  44. * @var string
  45. */
  46. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  47. /**
  48. * Xml Schema - Shared Strings
  49. *
  50. * @var string
  51. */
  52. const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
  53. /**
  54. * Xml Schema - Worksheet relation
  55. *
  56. * @var string
  57. */
  58. const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
  59. /**
  60. * Xml Schema - Slide notes relation
  61. *
  62. * @var string
  63. */
  64. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  65. /**
  66. * Object constructor
  67. *
  68. * @param string $fileName
  69. * @param boolean $storeContent
  70. */
  71. private function __construct($fileName, $storeContent)
  72. {
  73. // Document data holders
  74. $sharedStrings = array();
  75. $worksheets = array();
  76. $documentBody = array();
  77. $coreProperties = array();
  78. // Open OpenXML package
  79. $package = new ZipArchive();
  80. $package->open($fileName);
  81. // Read relations and search for officeDocument
  82. $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
  83. foreach ($relations->Relationship as $rel) {
  84. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  85. // Found office document! Read relations for workbook...
  86. $workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
  87. $workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
  88. // Read shared strings
  89. $sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
  90. $sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
  91. $xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
  92. if (isset($xmlStrings) && isset($xmlStrings->si)) {
  93. foreach ($xmlStrings->si as $val) {
  94. if (isset($val->t)) {
  95. $sharedStrings[] = (string)$val->t;
  96. } elseif (isset($val->r)) {
  97. $sharedStrings[] = $this->_parseRichText($val);
  98. }
  99. }
  100. }
  101. // Loop relations for workbook and extract worksheets...
  102. foreach ($workbookRelations->Relationship as $workbookRelation) {
  103. if ($workbookRelation["Type"] == Zend_Search_Lucene_Document_Xlsx::SCHEMA_WORKSHEETRELATION) {
  104. $worksheets[ str_replace( 'rId', '', (string)$workbookRelation["Id"]) ] = simplexml_load_string(
  105. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) )
  106. );
  107. }
  108. }
  109. break;
  110. }
  111. }
  112. // Sort worksheets
  113. ksort($worksheets);
  114. // Extract contents from worksheets
  115. foreach ($worksheets as $sheetKey => $worksheet) {
  116. foreach ($worksheet->sheetData->row as $row) {
  117. foreach ($row->c as $c) {
  118. // Determine data type
  119. $dataType = (string)$c["t"];
  120. switch ($dataType) {
  121. case "s":
  122. // Value is a shared string
  123. if ((string)$c->v != '') {
  124. $value = $sharedStrings[intval($c->v)];
  125. } else {
  126. $value = '';
  127. }
  128. break;
  129. case "b":
  130. // Value is boolean
  131. $value = (string)$c->v;
  132. if ($value == '0') {
  133. $value = false;
  134. } else if ($value == '1') {
  135. $value = true;
  136. } else {
  137. $value = (bool)$c->v;
  138. }
  139. break;
  140. case "inlineStr":
  141. // Value is rich text inline
  142. $value = $this->_parseRichText($c->is);
  143. break;
  144. case "e":
  145. // Value is an error message
  146. if ((string)$c->v != '') {
  147. $value = (string)$c->v;
  148. } else {
  149. $value = '';
  150. }
  151. break;
  152. default:
  153. // Value is a string
  154. $value = (string)$c->v;
  155. // Check for numeric values
  156. if (is_numeric($value) && $dataType != 's') {
  157. if ($value == (int)$value) $value = (int)$value;
  158. elseif ($value == (float)$value) $value = (float)$value;
  159. elseif ($value == (double)$value) $value = (double)$value;
  160. }
  161. }
  162. $documentBody[] = $value;
  163. }
  164. }
  165. }
  166. // Read core properties
  167. $coreProperties = $this->extractMetaData($package);
  168. // Close file
  169. $package->close();
  170. // Store filename
  171. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  172. // Store contents
  173. if ($storeContent) {
  174. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  175. } else {
  176. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  177. }
  178. // Store meta data properties
  179. foreach ($coreProperties as $key => $value)
  180. {
  181. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  182. }
  183. // Store title (if not present in meta data)
  184. if (!isset($coreProperties['title']))
  185. {
  186. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  187. }
  188. }
  189. /**
  190. * Parse rich text XML
  191. *
  192. * @param SimpleXMLElement $is
  193. * @return string
  194. */
  195. private function _parseRichText($is = null) {
  196. $value = array();
  197. if (isset($is->t)) {
  198. $value[] = (string)$is->t;
  199. } else {
  200. foreach ($is->r as $run) {
  201. $value[] = (string)$run->t;
  202. }
  203. }
  204. return implode('', $value);
  205. }
  206. /**
  207. * Load Xlsx document from a file
  208. *
  209. * @param string $fileName
  210. * @param boolean $storeContent
  211. * @return Zend_Search_Lucene_Document_Xlsx
  212. */
  213. public static function loadXlsxFile($fileName, $storeContent = false)
  214. {
  215. return new Zend_Search_Lucene_Document_Xlsx($fileName, $storeContent);
  216. }
  217. }
  218. } // end if (class_exists('ZipArchive'))