Query.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Dom
  17. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @see Zend_Dom_Query_Css2Xpath
  23. */
  24. require_once 'Zend/Dom/Query/Css2Xpath.php';
  25. /**
  26. * @see Zend_Dom_Query_Result
  27. */
  28. require_once 'Zend/Dom/Query/Result.php';
  29. /** @see Zend_Xml_Security */
  30. require_once 'Zend/Xml/Security.php';
  31. /** @see Zend_Xml_Exception */
  32. require_once 'Zend/Xml/Exception.php';
  33. /**
  34. * Query DOM structures based on CSS selectors and/or XPath
  35. *
  36. * @package Zend_Dom
  37. * @subpackage Query
  38. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  39. * @license http://framework.zend.com/license/new-bsd New BSD License
  40. */
  41. class Zend_Dom_Query
  42. {
  43. /**#@+
  44. * Document types
  45. */
  46. const DOC_DOM = 'docDom';
  47. const DOC_XML = 'docXml';
  48. const DOC_HTML = 'docHtml';
  49. const DOC_XHTML = 'docXhtml';
  50. /**#@-*/
  51. /**
  52. * @var string|DOMDocument
  53. */
  54. protected $_document;
  55. /**
  56. * DOMDocument errors, if any
  57. * @var false|array
  58. */
  59. protected $_documentErrors = false;
  60. /**
  61. * Document type
  62. * @var string
  63. */
  64. protected $_docType;
  65. /**
  66. * Document encoding
  67. * @var null|string
  68. */
  69. protected $_encoding;
  70. /**
  71. * XPath namespaces
  72. * @var array
  73. */
  74. protected $_xpathNamespaces = array();
  75. /**
  76. * Constructor
  77. *
  78. * @param null|string|DOMDocument $document
  79. * @param null|string $encoding
  80. */
  81. public function __construct($document = null, $encoding = null)
  82. {
  83. $this->setEncoding($encoding);
  84. $this->setDocument($document);
  85. }
  86. /**
  87. * Set document encoding
  88. *
  89. * @param string $encoding
  90. * @return Zend_Dom_Query
  91. */
  92. public function setEncoding($encoding)
  93. {
  94. $this->_encoding = (null === $encoding) ? null : (string) $encoding;
  95. return $this;
  96. }
  97. /**
  98. * Get document encoding
  99. *
  100. * @return null|string
  101. */
  102. public function getEncoding()
  103. {
  104. return $this->_encoding;
  105. }
  106. /**
  107. * Set document to query
  108. *
  109. * @param string|DOMDocument $document
  110. * @param null|string $encoding Document encoding
  111. * @return Zend_Dom_Query
  112. */
  113. public function setDocument($document, $encoding = null)
  114. {
  115. if ($document instanceof DOMDocument) {
  116. return $this->setDocumentDom($document);
  117. }
  118. if (0 === strlen($document)) {
  119. return $this;
  120. }
  121. // breaking XML declaration to make syntax highlighting work
  122. if ('<' . '?xml' == substr(trim($document), 0, 5)) {
  123. if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) {
  124. $this->_xpathNamespaces[] = $matches[1];
  125. return $this->setDocumentXhtml($document, $encoding);
  126. }
  127. return $this->setDocumentXml($document, $encoding);
  128. }
  129. if (strstr($document, 'DTD XHTML')) {
  130. return $this->setDocumentXhtml($document, $encoding);
  131. }
  132. return $this->setDocumentHtml($document, $encoding);
  133. }
  134. /**
  135. * @param DOMDocument $document
  136. * @param string $encoding
  137. */
  138. public function setDocumentDom(DOMDocument $document)
  139. {
  140. $this->_document = $document;
  141. $this->_docType = self::DOC_DOM;
  142. return $this;
  143. }
  144. /**
  145. * Register HTML document
  146. *
  147. * @param string $document
  148. * @param null|string $encoding Document encoding
  149. * @return Zend_Dom_Query
  150. */
  151. public function setDocumentHtml($document, $encoding = null)
  152. {
  153. $this->_document = (string) $document;
  154. $this->_docType = self::DOC_HTML;
  155. if (null !== $encoding) {
  156. $this->setEncoding($encoding);
  157. }
  158. return $this;
  159. }
  160. /**
  161. * Register XHTML document
  162. *
  163. * @param string $document
  164. * @param null|string $encoding Document encoding
  165. * @return Zend_Dom_Query
  166. */
  167. public function setDocumentXhtml($document, $encoding = null)
  168. {
  169. $this->_document = (string) $document;
  170. $this->_docType = self::DOC_XHTML;
  171. if (null !== $encoding) {
  172. $this->setEncoding($encoding);
  173. }
  174. return $this;
  175. }
  176. /**
  177. * Register XML document
  178. *
  179. * @param string $document
  180. * @param null|string $encoding Document encoding
  181. * @return Zend_Dom_Query
  182. */
  183. public function setDocumentXml($document, $encoding = null)
  184. {
  185. $this->_document = (string) $document;
  186. $this->_docType = self::DOC_XML;
  187. if (null !== $encoding) {
  188. $this->setEncoding($encoding);
  189. }
  190. return $this;
  191. }
  192. /**
  193. * Retrieve current document
  194. *
  195. * @return string
  196. */
  197. public function getDocument()
  198. {
  199. return $this->_document;
  200. }
  201. /**
  202. * Get document type
  203. *
  204. * @return string
  205. */
  206. public function getDocumentType()
  207. {
  208. return $this->_docType;
  209. }
  210. /**
  211. * Get any DOMDocument errors found
  212. *
  213. * @return false|array
  214. */
  215. public function getDocumentErrors()
  216. {
  217. return $this->_documentErrors;
  218. }
  219. /**
  220. * Perform a CSS selector query
  221. *
  222. * @param string $query
  223. * @return Zend_Dom_Query_Result
  224. */
  225. public function query($query)
  226. {
  227. $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query);
  228. return $this->queryXpath($xpathQuery, $query);
  229. }
  230. /**
  231. * Perform an XPath query
  232. *
  233. * @param string|array $xpathQuery
  234. * @param string $query CSS selector query
  235. * @throws Zend_Dom_Exception
  236. * @return Zend_Dom_Query_Result
  237. */
  238. public function queryXpath($xpathQuery, $query = null)
  239. {
  240. if (null === ($document = $this->getDocument())) {
  241. require_once 'Zend/Dom/Exception.php';
  242. throw new Zend_Dom_Exception('Cannot query; no document registered');
  243. }
  244. $encoding = $this->getEncoding();
  245. libxml_use_internal_errors(true);
  246. if (null === $encoding) {
  247. $domDoc = new DOMDocument('1.0');
  248. } else {
  249. $domDoc = new DOMDocument('1.0', $encoding);
  250. }
  251. $type = $this->getDocumentType();
  252. switch ($type) {
  253. case self::DOC_DOM:
  254. $domDoc = $this->_document;
  255. break;
  256. case self::DOC_XML:
  257. try {
  258. $domDoc = Zend_Xml_Security::scan($document, $domDoc);
  259. $success = ($domDoc !== false);
  260. } catch (Zend_Xml_Exception $e) {
  261. require_once 'Zend/Dom/Exception.php';
  262. throw new Zend_Dom_Exception(
  263. $e->getMessage()
  264. );
  265. }
  266. break;
  267. case self::DOC_HTML:
  268. case self::DOC_XHTML:
  269. default:
  270. $success = $domDoc->loadHTML($document);
  271. break;
  272. }
  273. $errors = libxml_get_errors();
  274. if (!empty($errors)) {
  275. $this->_documentErrors = $errors;
  276. libxml_clear_errors();
  277. }
  278. libxml_use_internal_errors(false);
  279. if (!$success) {
  280. require_once 'Zend/Dom/Exception.php';
  281. throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type));
  282. }
  283. $nodeList = $this->_getNodeList($domDoc, $xpathQuery);
  284. return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList);
  285. }
  286. /**
  287. * Register XPath namespaces
  288. *
  289. * @param array $xpathNamespaces
  290. * @return void
  291. */
  292. public function registerXpathNamespaces($xpathNamespaces)
  293. {
  294. $this->_xpathNamespaces = $xpathNamespaces;
  295. }
  296. /**
  297. * Prepare node list
  298. *
  299. * @param DOMDocument $document
  300. * @param string|array $xpathQuery
  301. * @return array
  302. */
  303. protected function _getNodeList($document, $xpathQuery)
  304. {
  305. $xpath = new DOMXPath($document);
  306. foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) {
  307. $xpath->registerNamespace($prefix, $namespaceUri);
  308. }
  309. $xpathQuery = (string) $xpathQuery;
  310. if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) {
  311. foreach ($matches[1] as $attribute) {
  312. $queryString = '//*[' . $attribute . ']';
  313. $attributeName = substr($attribute, 1);
  314. $nodes = $xpath->query($queryString);
  315. foreach ($nodes as $node) {
  316. $attr = $node->attributes->getNamedItem($attributeName);
  317. $attr->value = ' ' . $attr->value . ' ';
  318. }
  319. }
  320. }
  321. return $xpath->query($xpathQuery);
  322. }
  323. }