Query.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Dom
  17. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @see Zend_Dom_Query_Css2Xpath
  23. */
  24. require_once 'Zend/Dom/Query/Css2Xpath.php';
  25. /**
  26. * @see Zend_Dom_Query_Result
  27. */
  28. require_once 'Zend/Dom/Query/Result.php';
  29. /** @see Zend_Xml_Security */
  30. require_once 'Zend/Xml/Security.php';
  31. /** @see Zend_Xml_Exception */
  32. require_once 'Zend/Xml/Exception.php';
  33. /**
  34. * Query DOM structures based on CSS selectors and/or XPath
  35. *
  36. * @package Zend_Dom
  37. * @subpackage Query
  38. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  39. * @license http://framework.zend.com/license/new-bsd New BSD License
  40. */
  41. class Zend_Dom_Query
  42. {
  43. /**#@+
  44. * Document types
  45. */
  46. const DOC_DOM = 'docDom';
  47. const DOC_XML = 'docXml';
  48. const DOC_HTML = 'docHtml';
  49. const DOC_XHTML = 'docXhtml';
  50. /**#@-*/
  51. /**
  52. * @var string|DOMDocument
  53. */
  54. protected $_document;
  55. /**
  56. * DOMDocument errors, if any
  57. * @var false|array
  58. */
  59. protected $_documentErrors = false;
  60. /**
  61. * Document type
  62. * @var string
  63. */
  64. protected $_docType;
  65. /**
  66. * Document encoding
  67. * @var null|string
  68. */
  69. protected $_encoding;
  70. /**
  71. * XPath namespaces
  72. * @var array
  73. */
  74. protected $_xpathNamespaces = array();
  75. /**
  76. * Constructor
  77. *
  78. * @param null|string|DOMDocument $document
  79. * @param null|string $encoding
  80. */
  81. public function __construct($document = null, $encoding = null)
  82. {
  83. $this->setEncoding($encoding);
  84. $this->setDocument($document);
  85. }
  86. /**
  87. * Set document encoding
  88. *
  89. * @param string $encoding
  90. * @return Zend_Dom_Query
  91. */
  92. public function setEncoding($encoding)
  93. {
  94. $this->_encoding = (null === $encoding) ? null : (string) $encoding;
  95. return $this;
  96. }
  97. /**
  98. * Get document encoding
  99. *
  100. * @return null|string
  101. */
  102. public function getEncoding()
  103. {
  104. return $this->_encoding;
  105. }
  106. /**
  107. * Set document to query
  108. *
  109. * @param string|DOMDocument $document
  110. * @param null|string $encoding Document encoding
  111. * @return Zend_Dom_Query
  112. */
  113. public function setDocument($document, $encoding = null)
  114. {
  115. if ($document instanceof DOMDocument) {
  116. return $this->setDocumentDom($document);
  117. }
  118. if (0 === strlen($document)) {
  119. return $this;
  120. }
  121. // breaking XML declaration to make syntax highlighting work
  122. if ('<' . '?xml' == substr(trim($document), 0, 5)) {
  123. if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) {
  124. $this->_xpathNamespaces[] = $matches[1];
  125. return $this->setDocumentXhtml($document, $encoding);
  126. }
  127. return $this->setDocumentXml($document, $encoding);
  128. }
  129. if (strstr($document, 'DTD XHTML')) {
  130. return $this->setDocumentXhtml($document, $encoding);
  131. }
  132. return $this->setDocumentHtml($document, $encoding);
  133. }
  134. /**
  135. * @param DOMDocument $document
  136. * @param string $encoding
  137. */
  138. public function setDocumentDom(DOMDocument $document)
  139. {
  140. $this->_document = $document;
  141. $this->_docType = self::DOC_DOM;
  142. if (null !== $document->encoding) {
  143. $this->setEncoding($document->encoding);
  144. }
  145. return $this;
  146. }
  147. /**
  148. * Register HTML document
  149. *
  150. * @param string $document
  151. * @param null|string $encoding Document encoding
  152. * @return Zend_Dom_Query
  153. */
  154. public function setDocumentHtml($document, $encoding = null)
  155. {
  156. $this->_document = (string) $document;
  157. $this->_docType = self::DOC_HTML;
  158. if (null !== $encoding) {
  159. $this->setEncoding($encoding);
  160. }
  161. return $this;
  162. }
  163. /**
  164. * Register XHTML document
  165. *
  166. * @param string $document
  167. * @param null|string $encoding Document encoding
  168. * @return Zend_Dom_Query
  169. */
  170. public function setDocumentXhtml($document, $encoding = null)
  171. {
  172. $this->_document = (string) $document;
  173. $this->_docType = self::DOC_XHTML;
  174. if (null !== $encoding) {
  175. $this->setEncoding($encoding);
  176. }
  177. return $this;
  178. }
  179. /**
  180. * Register XML document
  181. *
  182. * @param string $document
  183. * @param null|string $encoding Document encoding
  184. * @return Zend_Dom_Query
  185. */
  186. public function setDocumentXml($document, $encoding = null)
  187. {
  188. $this->_document = (string) $document;
  189. $this->_docType = self::DOC_XML;
  190. if (null !== $encoding) {
  191. $this->setEncoding($encoding);
  192. }
  193. return $this;
  194. }
  195. /**
  196. * Retrieve current document
  197. *
  198. * @return string|DOMDocument
  199. */
  200. public function getDocument()
  201. {
  202. return $this->_document;
  203. }
  204. /**
  205. * Get document type
  206. *
  207. * @return string
  208. */
  209. public function getDocumentType()
  210. {
  211. return $this->_docType;
  212. }
  213. /**
  214. * Get any DOMDocument errors found
  215. *
  216. * @return false|array
  217. */
  218. public function getDocumentErrors()
  219. {
  220. return $this->_documentErrors;
  221. }
  222. /**
  223. * Perform a CSS selector query
  224. *
  225. * @param string $query
  226. * @return Zend_Dom_Query_Result
  227. */
  228. public function query($query)
  229. {
  230. $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query);
  231. return $this->queryXpath($xpathQuery, $query);
  232. }
  233. /**
  234. * Perform an XPath query
  235. *
  236. * @param string|array $xpathQuery
  237. * @param string $query CSS selector query
  238. * @throws Zend_Dom_Exception
  239. * @return Zend_Dom_Query_Result
  240. */
  241. public function queryXpath($xpathQuery, $query = null)
  242. {
  243. if (null === ($document = $this->getDocument())) {
  244. require_once 'Zend/Dom/Exception.php';
  245. throw new Zend_Dom_Exception('Cannot query; no document registered');
  246. }
  247. $encoding = $this->getEncoding();
  248. libxml_use_internal_errors(true);
  249. if (null === $encoding) {
  250. $domDoc = new DOMDocument('1.0');
  251. } else {
  252. $domDoc = new DOMDocument('1.0', $encoding);
  253. }
  254. $type = $this->getDocumentType();
  255. switch ($type) {
  256. case self::DOC_DOM:
  257. $domDoc = $this->_document;
  258. $success = true;
  259. break;
  260. case self::DOC_XML:
  261. try {
  262. $domDoc = Zend_Xml_Security::scan($document, $domDoc);
  263. $success = ($domDoc !== false);
  264. } catch (Zend_Xml_Exception $e) {
  265. require_once 'Zend/Dom/Exception.php';
  266. throw new Zend_Dom_Exception(
  267. $e->getMessage()
  268. );
  269. }
  270. break;
  271. case self::DOC_HTML:
  272. case self::DOC_XHTML:
  273. default:
  274. $success = $domDoc->loadHTML($document);
  275. break;
  276. }
  277. $errors = libxml_get_errors();
  278. if (!empty($errors)) {
  279. $this->_documentErrors = $errors;
  280. libxml_clear_errors();
  281. }
  282. libxml_use_internal_errors(false);
  283. if (!$success) {
  284. require_once 'Zend/Dom/Exception.php';
  285. throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type));
  286. }
  287. $nodeList = $this->_getNodeList($domDoc, $xpathQuery);
  288. return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList);
  289. }
  290. /**
  291. * Register XPath namespaces
  292. *
  293. * @param array $xpathNamespaces
  294. * @return void
  295. */
  296. public function registerXpathNamespaces($xpathNamespaces)
  297. {
  298. $this->_xpathNamespaces = $xpathNamespaces;
  299. }
  300. /**
  301. * Prepare node list
  302. *
  303. * @param DOMDocument $document
  304. * @param string|array $xpathQuery
  305. * @return array
  306. */
  307. protected function _getNodeList($document, $xpathQuery)
  308. {
  309. $xpath = new DOMXPath($document);
  310. foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) {
  311. $xpath->registerNamespace($prefix, $namespaceUri);
  312. }
  313. $xpathQuery = (string) $xpathQuery;
  314. if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) {
  315. foreach ($matches[1] as $attribute) {
  316. $queryString = '//*[' . $attribute . ']';
  317. $attributeName = substr($attribute, 1);
  318. $nodes = $xpath->query($queryString);
  319. foreach ($nodes as $node) {
  320. $attr = $node->attributes->getNamedItem($attributeName);
  321. $attr->value = ' ' . $attr->value . ' ';
  322. }
  323. }
  324. }
  325. return $xpath->query($xpathQuery);
  326. }
  327. }