Xlsx.php 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Document_OpenXml */
  23. require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  24. if (class_exists('ZipArchive', false)) {
  25. /**
  26. * Xlsx document.
  27. *
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Document
  31. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenXml
  35. {
  36. /**
  37. * Xml Schema - SpreadsheetML
  38. *
  39. * @var string
  40. */
  41. const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
  42. /**
  43. * Xml Schema - DrawingML
  44. *
  45. * @var string
  46. */
  47. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  48. /**
  49. * Xml Schema - Shared Strings
  50. *
  51. * @var string
  52. */
  53. const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
  54. /**
  55. * Xml Schema - Worksheet relation
  56. *
  57. * @var string
  58. */
  59. const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
  60. /**
  61. * Xml Schema - Slide notes relation
  62. *
  63. * @var string
  64. */
  65. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  66. /**
  67. * Object constructor
  68. *
  69. * @param string $fileName
  70. * @param boolean $storeContent
  71. */
  72. private function __construct($fileName, $storeContent)
  73. {
  74. // Document data holders
  75. $sharedStrings = array();
  76. $worksheets = array();
  77. $documentBody = array();
  78. $coreProperties = array();
  79. // Open OpenXML package
  80. $package = new ZipArchive();
  81. $package->open($fileName);
  82. // Read relations and search for officeDocument
  83. $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
  84. foreach ($relations->Relationship as $rel) {
  85. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  86. // Found office document! Read relations for workbook...
  87. $workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
  88. $workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
  89. // Read shared strings
  90. $sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
  91. $sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
  92. $xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
  93. if (isset($xmlStrings) && isset($xmlStrings->si)) {
  94. foreach ($xmlStrings->si as $val) {
  95. if (isset($val->t)) {
  96. $sharedStrings[] = (string)$val->t;
  97. } elseif (isset($val->r)) {
  98. $sharedStrings[] = $this->_parseRichText($val);
  99. }
  100. }
  101. }
  102. // Loop relations for workbook and extract worksheets...
  103. foreach ($workbookRelations->Relationship as $workbookRelation) {
  104. if ($workbookRelation["Type"] == Zend_Search_Lucene_Document_Xlsx::SCHEMA_WORKSHEETRELATION) {
  105. $worksheets[ str_replace( 'rId', '', (string)$workbookRelation["Id"]) ] = simplexml_load_string(
  106. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) )
  107. );
  108. }
  109. }
  110. break;
  111. }
  112. }
  113. // Sort worksheets
  114. ksort($worksheets);
  115. // Extract contents from worksheets
  116. foreach ($worksheets as $sheetKey => $worksheet) {
  117. foreach ($worksheet->sheetData->row as $row) {
  118. foreach ($row->c as $c) {
  119. // Determine data type
  120. $dataType = (string)$c["t"];
  121. switch ($dataType) {
  122. case "s":
  123. // Value is a shared string
  124. if ((string)$c->v != '') {
  125. $value = $sharedStrings[intval($c->v)];
  126. } else {
  127. $value = '';
  128. }
  129. break;
  130. case "b":
  131. // Value is boolean
  132. $value = (string)$c->v;
  133. if ($value == '0') {
  134. $value = false;
  135. } else if ($value == '1') {
  136. $value = true;
  137. } else {
  138. $value = (bool)$c->v;
  139. }
  140. break;
  141. case "inlineStr":
  142. // Value is rich text inline
  143. $value = $this->_parseRichText($c->is);
  144. break;
  145. case "e":
  146. // Value is an error message
  147. if ((string)$c->v != '') {
  148. $value = (string)$c->v;
  149. } else {
  150. $value = '';
  151. }
  152. break;
  153. default:
  154. // Value is a string
  155. $value = (string)$c->v;
  156. // Check for numeric values
  157. if (is_numeric($value) && $dataType != 's') {
  158. if ($value == (int)$value) $value = (int)$value;
  159. elseif ($value == (float)$value) $value = (float)$value;
  160. elseif ($value == (double)$value) $value = (double)$value;
  161. }
  162. }
  163. $documentBody[] = $value;
  164. }
  165. }
  166. }
  167. // Read core properties
  168. $coreProperties = $this->extractMetaData($package);
  169. // Close file
  170. $package->close();
  171. // Store filename
  172. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  173. // Store contents
  174. if ($storeContent) {
  175. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  176. } else {
  177. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  178. }
  179. // Store meta data properties
  180. foreach ($coreProperties as $key => $value)
  181. {
  182. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  183. }
  184. // Store title (if not present in meta data)
  185. if (!isset($coreProperties['title']))
  186. {
  187. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  188. }
  189. }
  190. /**
  191. * Parse rich text XML
  192. *
  193. * @param SimpleXMLElement $is
  194. * @return string
  195. */
  196. private function _parseRichText($is = null) {
  197. $value = array();
  198. if (isset($is->t)) {
  199. $value[] = (string)$is->t;
  200. } else {
  201. foreach ($is->r as $run) {
  202. $value[] = (string)$run->t;
  203. }
  204. }
  205. return implode('', $value);
  206. }
  207. /**
  208. * Load Xlsx document from a file
  209. *
  210. * @param string $fileName
  211. * @param boolean $storeContent
  212. * @return Zend_Search_Lucene_Document_Xlsx
  213. */
  214. public static function loadXlsxFile($fileName, $storeContent = false)
  215. {
  216. return new Zend_Search_Lucene_Document_Xlsx($fileName, $storeContent);
  217. }
  218. }
  219. } // end if (class_exists('ZipArchive'))