OpenXml.php 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Document */
  23. require_once 'Zend/Search/Lucene/Document.php';
  24. if (class_exists('ZipArchive', false)) {
  25. /**
  26. * OpenXML document.
  27. *
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Document
  31. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document
  35. {
  36. /**
  37. * Xml Schema - Relationships
  38. *
  39. * @var string
  40. */
  41. const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
  42. /**
  43. * Xml Schema - Office document
  44. *
  45. * @var string
  46. */
  47. const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
  48. /**
  49. * Xml Schema - Core properties
  50. *
  51. * @var string
  52. */
  53. const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
  54. /**
  55. * Xml Schema - Dublin Core
  56. *
  57. * @var string
  58. */
  59. const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
  60. /**
  61. * Xml Schema - Dublin Core Terms
  62. *
  63. * @var string
  64. */
  65. const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
  66. /**
  67. * Extract metadata from document
  68. *
  69. * @param ZipArchive $package ZipArchive OpenXML package
  70. * @return array Key-value pairs containing document meta data
  71. */
  72. protected function extractMetaData(ZipArchive $package)
  73. {
  74. // Data holders
  75. $coreProperties = array();
  76. // Read relations and search for core properties
  77. $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
  78. foreach ($relations->Relationship as $rel) {
  79. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) {
  80. // Found core properties! Read in contents...
  81. $contents = simplexml_load_string(
  82. $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
  83. );
  84. foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) {
  85. $coreProperties[$child->getName()] = (string)$child;
  86. }
  87. foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) {
  88. $coreProperties[$child->getName()] = (string)$child;
  89. }
  90. foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) {
  91. $coreProperties[$child->getName()] = (string)$child;
  92. }
  93. }
  94. }
  95. return $coreProperties;
  96. }
  97. /**
  98. * Determine absolute zip path
  99. *
  100. * @param string $path
  101. * @return string
  102. */
  103. protected function absoluteZipPath($path) {
  104. $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
  105. $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
  106. $absolutes = array();
  107. foreach ($parts as $part) {
  108. if ('.' == $part) continue;
  109. if ('..' == $part) {
  110. array_pop($absolutes);
  111. } else {
  112. $absolutes[] = $part;
  113. }
  114. }
  115. return implode('/', $absolutes);
  116. }
  117. }
  118. } // end if (class_exists('ZipArchive'))