Pptx.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Document_OpenXml */
  23. require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  24. if (class_exists('ZipArchive', false)) {
  25. /**
  26. * Pptx document.
  27. *
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Document
  31. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenXml
  35. {
  36. /**
  37. * Xml Schema - PresentationML
  38. *
  39. * @var string
  40. */
  41. const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
  42. /**
  43. * Xml Schema - DrawingML
  44. *
  45. * @var string
  46. */
  47. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  48. /**
  49. * Xml Schema - Slide relation
  50. *
  51. * @var string
  52. */
  53. const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide';
  54. /**
  55. * Xml Schema - Slide notes relation
  56. *
  57. * @var string
  58. */
  59. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  60. /**
  61. * Object constructor
  62. *
  63. * @param string $fileName
  64. * @param boolean $storeContent
  65. */
  66. private function __construct($fileName, $storeContent)
  67. {
  68. // Document data holders
  69. $slides = array();
  70. $slideNotes = array();
  71. $documentBody = array();
  72. $coreProperties = array();
  73. // Open OpenXML package
  74. $package = new ZipArchive();
  75. $package->open($fileName);
  76. // Read relations and search for officeDocument
  77. $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
  78. foreach ($relations->Relationship as $rel) {
  79. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  80. // Found office document! Search for slides...
  81. $slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
  82. foreach ($slideRelations->Relationship as $slideRel) {
  83. if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) {
  84. // Found slide!
  85. $slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
  86. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
  87. );
  88. // Search for slide notes
  89. $slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
  90. foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
  91. if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) {
  92. // Found slide notes!
  93. $slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
  94. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
  95. );
  96. break;
  97. }
  98. }
  99. }
  100. }
  101. break;
  102. }
  103. }
  104. // Sort slides
  105. ksort($slides);
  106. ksort($slideNotes);
  107. // Extract contents from slides
  108. foreach ($slides as $slideKey => $slide) {
  109. // Register namespaces
  110. $slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
  111. $slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
  112. // Fetch all text
  113. $textElements = $slide->xpath('//a:t');
  114. foreach ($textElements as $textElement) {
  115. $documentBody[] = (string)$textElement;
  116. }
  117. // Extract contents from slide notes
  118. if (isset($slideNotes[$slideKey])) {
  119. // Fetch slide note
  120. $slideNote = $slideNotes[$slideKey];
  121. // Register namespaces
  122. $slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
  123. $slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
  124. // Fetch all text
  125. $textElements = $slideNote->xpath('//a:t');
  126. foreach ($textElements as $textElement) {
  127. $documentBody[] = (string)$textElement;
  128. }
  129. }
  130. }
  131. // Read core properties
  132. $coreProperties = $this->extractMetaData($package);
  133. // Close file
  134. $package->close();
  135. // Store filename
  136. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  137. // Store contents
  138. if ($storeContent) {
  139. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  140. } else {
  141. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  142. }
  143. // Store meta data properties
  144. foreach ($coreProperties as $key => $value)
  145. {
  146. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  147. }
  148. // Store title (if not present in meta data)
  149. if (!isset($coreProperties['title']))
  150. {
  151. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  152. }
  153. }
  154. /**
  155. * Load Pptx document from a file
  156. *
  157. * @param string $fileName
  158. * @param boolean $storeContent
  159. * @return Zend_Search_Lucene_Document_Pptx
  160. */
  161. public static function loadPptxFile($fileName, $storeContent = false)
  162. {
  163. return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent);
  164. }
  165. }
  166. } // end if (class_exists('ZipArchive'))