Pptx.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Document_OpenXml */
  22. require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  23. if (class_exists('ZipArchive', false)) {
  24. /**
  25. * Pptx document.
  26. *
  27. * @category Zend
  28. * @package Zend_Search_Lucene
  29. * @subpackage Document
  30. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenXml
  34. {
  35. /**
  36. * Xml Schema - PresentationML
  37. *
  38. * @var string
  39. */
  40. const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
  41. /**
  42. * Xml Schema - DrawingML
  43. *
  44. * @var string
  45. */
  46. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  47. /**
  48. * Xml Schema - Slide relation
  49. *
  50. * @var string
  51. */
  52. const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide';
  53. /**
  54. * Xml Schema - Slide notes relation
  55. *
  56. * @var string
  57. */
  58. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  59. /**
  60. * Object constructor
  61. *
  62. * @param string $fileName
  63. * @param boolean $storeContent
  64. */
  65. private function __construct($fileName, $storeContent)
  66. {
  67. // Document data holders
  68. $slides = array();
  69. $slideNotes = array();
  70. $documentBody = array();
  71. $coreProperties = array();
  72. // Open OpenXML package
  73. $package = new ZipArchive();
  74. $package->open($fileName);
  75. // Read relations and search for officeDocument
  76. $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
  77. foreach ($relations->Relationship as $rel) {
  78. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  79. // Found office document! Search for slides...
  80. $slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
  81. foreach ($slideRelations->Relationship as $slideRel) {
  82. if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) {
  83. // Found slide!
  84. $slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
  85. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
  86. );
  87. // Search for slide notes
  88. $slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
  89. foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
  90. if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) {
  91. // Found slide notes!
  92. $slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
  93. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
  94. );
  95. break;
  96. }
  97. }
  98. }
  99. }
  100. break;
  101. }
  102. }
  103. // Sort slides
  104. ksort($slides);
  105. ksort($slideNotes);
  106. // Extract contents from slides
  107. foreach ($slides as $slideKey => $slide) {
  108. // Register namespaces
  109. $slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
  110. $slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
  111. // Fetch all text
  112. $textElements = $slide->xpath('//a:t');
  113. foreach ($textElements as $textElement) {
  114. $documentBody[] = (string)$textElement;
  115. }
  116. // Extract contents from slide notes
  117. if (isset($slideNotes[$slideKey])) {
  118. // Fetch slide note
  119. $slideNote = $slideNotes[$slideKey];
  120. // Register namespaces
  121. $slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
  122. $slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
  123. // Fetch all text
  124. $textElements = $slideNote->xpath('//a:t');
  125. foreach ($textElements as $textElement) {
  126. $documentBody[] = (string)$textElement;
  127. }
  128. }
  129. }
  130. // Read core properties
  131. $coreProperties = $this->extractMetaData($package);
  132. // Close file
  133. $package->close();
  134. // Store filename
  135. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  136. // Store contents
  137. if ($storeContent) {
  138. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  139. } else {
  140. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  141. }
  142. // Store meta data properties
  143. foreach ($coreProperties as $key => $value)
  144. {
  145. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  146. }
  147. // Store title (if not present in meta data)
  148. if (!isset($coreProperties['title']))
  149. {
  150. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  151. }
  152. }
  153. /**
  154. * Load Pptx document from a file
  155. *
  156. * @param string $fileName
  157. * @param boolean $storeContent
  158. * @return Zend_Search_Lucene_Document_Pptx
  159. */
  160. public static function loadPptxFile($fileName, $storeContent = false)
  161. {
  162. return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent);
  163. }
  164. }
  165. } // end if (class_exists('ZipArchive'))