OpenXml.php 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Document */
  23. require_once 'Zend/Search/Lucene/Document.php';
  24. /** Zend_Xml_Security */
  25. require_once 'Zend/Xml/Security.php';
  26. /**
  27. * OpenXML document.
  28. *
  29. * @category Zend
  30. * @package Zend_Search_Lucene
  31. * @subpackage Document
  32. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  33. * @license http://framework.zend.com/license/new-bsd New BSD License
  34. */
  35. abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document
  36. {
  37. /**
  38. * Xml Schema - Relationships
  39. *
  40. * @var string
  41. */
  42. const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
  43. /**
  44. * Xml Schema - Office document
  45. *
  46. * @var string
  47. */
  48. const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
  49. /**
  50. * Xml Schema - Core properties
  51. *
  52. * @var string
  53. */
  54. const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
  55. /**
  56. * Xml Schema - Dublin Core
  57. *
  58. * @var string
  59. */
  60. const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
  61. /**
  62. * Xml Schema - Dublin Core Terms
  63. *
  64. * @var string
  65. */
  66. const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
  67. /**
  68. * Extract metadata from document
  69. *
  70. * @param ZipArchive $package ZipArchive OpenXML package
  71. * @return array Key-value pairs containing document meta data
  72. */
  73. protected function extractMetaData(ZipArchive $package)
  74. {
  75. // Data holders
  76. $coreProperties = array();
  77. // Read relations and search for core properties
  78. $relations = Zend_Xml_Security::scan($package->getFromName("_rels/.rels"));
  79. foreach ($relations->Relationship as $rel) {
  80. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) {
  81. // Found core properties! Read in contents...
  82. $contents = Zend_Xml_Security::scan(
  83. $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
  84. );
  85. foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) {
  86. $coreProperties[$child->getName()] = (string)$child;
  87. }
  88. foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) {
  89. $coreProperties[$child->getName()] = (string)$child;
  90. }
  91. foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) {
  92. $coreProperties[$child->getName()] = (string)$child;
  93. }
  94. }
  95. }
  96. return $coreProperties;
  97. }
  98. /**
  99. * Determine absolute zip path
  100. *
  101. * @param string $path
  102. * @return string
  103. */
  104. protected function absoluteZipPath($path) {
  105. $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
  106. $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
  107. $absolutes = array();
  108. foreach ($parts as $part) {
  109. if ('.' == $part) continue;
  110. if ('..' == $part) {
  111. array_pop($absolutes);
  112. } else {
  113. $absolutes[] = $part;
  114. }
  115. }
  116. return implode('/', $absolutes);
  117. }
  118. }