Security.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Xml
  17. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @category Zend
  23. * @package Zend_Xml_SecurityScan
  24. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  25. * @license http://framework.zend.com/license/new-bsd New BSD License
  26. */
  27. class Zend_Xml_Security
  28. {
  29. const ENTITY_DETECT = 'Detected use of ENTITY in XML, disabled to prevent XXE/XEE attacks';
  30. /**
  31. * Heuristic scan to detect entity in XML
  32. *
  33. * @param string $xml
  34. * @throws Zend_Xml_Exception If entity expansion or external entity declaration was discovered.
  35. */
  36. protected static function heuristicScan($xml)
  37. {
  38. foreach (self::getEntityComparison($xml) as $compare) {
  39. if (strpos($xml, $compare) !== false) {
  40. throw new Zend_Xml_Exception(self::ENTITY_DETECT);
  41. }
  42. }
  43. }
  44. /**
  45. * @param integer $errno
  46. * @param string $errstr
  47. * @param string $errfile
  48. * @param integer $errline
  49. * @return bool
  50. */
  51. public static function loadXmlErrorHandler($errno, $errstr, $errfile, $errline)
  52. {
  53. if (substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
  54. return true;
  55. }
  56. return false;
  57. }
  58. /**
  59. * Scan XML string for potential XXE and XEE attacks
  60. *
  61. * @param string $xml
  62. * @param DomDocument $dom
  63. * @throws Zend_Xml_Exception
  64. * @return SimpleXMLElement|DomDocument|boolean
  65. */
  66. public static function scan($xml, DOMDocument $dom = null)
  67. {
  68. // If running with PHP-FPM we perform an heuristic scan
  69. // We cannot use libxml_disable_entity_loader because of this bug
  70. // @see https://bugs.php.net/bug.php?id=64938
  71. if (self::isPhpFpm()) {
  72. self::heuristicScan($xml);
  73. }
  74. if (null === $dom) {
  75. $simpleXml = true;
  76. $dom = new DOMDocument();
  77. }
  78. if (!self::isPhpFpm()) {
  79. $loadEntities = libxml_disable_entity_loader(true);
  80. $useInternalXmlErrors = libxml_use_internal_errors(true);
  81. }
  82. // Load XML with network access disabled (LIBXML_NONET)
  83. // error disabled with @ for PHP-FPM scenario
  84. set_error_handler(array('Zend_Xml_Security', 'loadXmlErrorHandler'), E_WARNING);
  85. $result = $dom->loadXml($xml, LIBXML_NONET);
  86. restore_error_handler();
  87. if (!$result) {
  88. // Entity load to previous setting
  89. if (!self::isPhpFpm()) {
  90. libxml_disable_entity_loader($loadEntities);
  91. libxml_use_internal_errors($useInternalXmlErrors);
  92. }
  93. return false;
  94. }
  95. // Scan for potential XEE attacks using ENTITY, if not PHP-FPM
  96. if (!self::isPhpFpm()) {
  97. foreach ($dom->childNodes as $child) {
  98. if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
  99. if ($child->entities->length > 0) {
  100. require_once 'Exception.php';
  101. throw new Zend_Xml_Exception(self::ENTITY_DETECT);
  102. }
  103. }
  104. }
  105. }
  106. // Entity load to previous setting
  107. if (!self::isPhpFpm()) {
  108. libxml_disable_entity_loader($loadEntities);
  109. libxml_use_internal_errors($useInternalXmlErrors);
  110. }
  111. if (isset($simpleXml)) {
  112. $result = simplexml_import_dom($dom);
  113. if (!$result instanceof SimpleXMLElement) {
  114. return false;
  115. }
  116. return $result;
  117. }
  118. return $dom;
  119. }
  120. /**
  121. * Scan XML file for potential XXE/XEE attacks
  122. *
  123. * @param string $file
  124. * @param DOMDocument $dom
  125. * @throws Zend_Xml_Exception
  126. * @return SimpleXMLElement|DomDocument
  127. */
  128. public static function scanFile($file, DOMDocument $dom = null)
  129. {
  130. if (!file_exists($file)) {
  131. require_once 'Exception.php';
  132. throw new Zend_Xml_Exception(
  133. "The file $file specified doesn't exist"
  134. );
  135. }
  136. return self::scan(file_get_contents($file), $dom);
  137. }
  138. /**
  139. * Return true if PHP is running with PHP-FPM
  140. *
  141. * This method is mainly used to determine whether or not heuristic checks
  142. * (vs libxml checks) should be made, due to threading issues in libxml;
  143. * under php-fpm, threading becomes a concern.
  144. *
  145. * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the
  146. * libxml support in PHP that makes the libxml checks viable; in such
  147. * versions, this method will return false to enforce those checks, which
  148. * are more strict and accurate than the heuristic checks.
  149. *
  150. * @return boolean
  151. */
  152. public static function isPhpFpm()
  153. {
  154. $isVulnerableVersion = (
  155. version_compare(PHP_VERSION, '5.5.22', 'lt')
  156. || (
  157. version_compare(PHP_VERSION, '5.6', 'gte')
  158. && version_compare(PHP_VERSION, '5.6.6', 'lt')
  159. )
  160. );
  161. if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) {
  162. return true;
  163. }
  164. return false;
  165. }
  166. /**
  167. * Determine and return the string(s) to use for the <!ENTITY comparison.
  168. *
  169. * @param string $xml
  170. * @return string[]
  171. */
  172. protected static function getEntityComparison($xml)
  173. {
  174. $encodingMap = self::getAsciiEncodingMap();
  175. return array_map(function ($encoding) use ($encodingMap) {
  176. $generator = isset($encodingMap[$encoding]) ? $encodingMap[$encoding] : $encodingMap['UTF-8'];
  177. return $generator('<!ENTITY');
  178. }, self::detectXmlEncoding($xml, self::detectStringEncoding($xml)));
  179. }
  180. /**
  181. * Determine the string encoding.
  182. *
  183. * Determines string encoding from either a detected BOM or a
  184. * heuristic.
  185. *
  186. * @param string $xml
  187. * @return string File encoding
  188. */
  189. protected static function detectStringEncoding($xml)
  190. {
  191. return self::detectBom($xml) ?: self::detectXmlStringEncoding($xml);
  192. }
  193. /**
  194. * Attempt to match a known BOM.
  195. *
  196. * Iterates through the return of getBomMap(), comparing the initial bytes
  197. * of the provided string to the BOM of each; if a match is determined,
  198. * it returns the encoding.
  199. *
  200. * @param string $string
  201. * @return false|string Returns encoding on success.
  202. */
  203. protected static function detectBom($string)
  204. {
  205. foreach (self::getBomMap() as $criteria) {
  206. if (0 === strncmp($string, $criteria['bom'], $criteria['length'])) {
  207. return $criteria['encoding'];
  208. }
  209. }
  210. return false;
  211. }
  212. /**
  213. * Attempt to detect the string encoding of an XML string.
  214. *
  215. * @param string $xml
  216. * @return string Encoding
  217. */
  218. protected static function detectXmlStringEncoding($xml)
  219. {
  220. foreach (self::getAsciiEncodingMap() as $encoding => $generator) {
  221. $prefix = $generator('<' . '?xml');
  222. if (0 === strncmp($xml, $prefix, strlen($prefix))) {
  223. return $encoding;
  224. }
  225. }
  226. // Fallback
  227. return 'UTF-8';
  228. }
  229. /**
  230. * Attempt to detect the specified XML encoding.
  231. *
  232. * Using the file's encoding, determines if an "encoding" attribute is
  233. * present and well-formed in the XML declaration; if so, it returns a
  234. * list with both the ASCII representation of that declaration and the
  235. * original file encoding.
  236. *
  237. * If not, a list containing only the provided file encoding is returned.
  238. *
  239. * @param string $xml
  240. * @param string $fileEncoding
  241. * @return string[] Potential XML encodings
  242. */
  243. protected static function detectXmlEncoding($xml, $fileEncoding)
  244. {
  245. $encodingMap = self::getAsciiEncodingMap();
  246. $generator = $encodingMap[$fileEncoding];
  247. $encAttr = $generator('encoding="');
  248. $quote = $generator('"');
  249. $close = $generator('>');
  250. $closePos = strpos($xml, $close);
  251. if (false === $closePos) {
  252. return array($fileEncoding);
  253. }
  254. $encPos = strpos($xml, $encAttr);
  255. if (false === $encPos
  256. || $encPos > $closePos
  257. ) {
  258. return array($fileEncoding);
  259. }
  260. $encPos += strlen($encAttr);
  261. $quotePos = strpos($xml, $quote, $encPos);
  262. if (false === $quotePos) {
  263. return array($fileEncoding);
  264. }
  265. $encoding = self::substr($xml, $encPos, $quotePos);
  266. return array(
  267. // Following line works because we're only supporting 8-bit safe encodings at this time.
  268. str_replace('\0', '', $encoding), // detected encoding
  269. $fileEncoding, // file encoding
  270. );
  271. }
  272. /**
  273. * Return a list of BOM maps.
  274. *
  275. * Returns a list of common encoding -> BOM maps, along with the character
  276. * length to compare against.
  277. *
  278. * @link https://en.wikipedia.org/wiki/Byte_order_mark
  279. * @return array
  280. */
  281. protected static function getBomMap()
  282. {
  283. return array(
  284. array(
  285. 'encoding' => 'UTF-32BE',
  286. 'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff),
  287. 'length' => 4,
  288. ),
  289. array(
  290. 'encoding' => 'UTF-32LE',
  291. 'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00),
  292. 'length' => 4,
  293. ),
  294. array(
  295. 'encoding' => 'GB-18030',
  296. 'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33),
  297. 'length' => 4,
  298. ),
  299. array(
  300. 'encoding' => 'UTF-16BE',
  301. 'bom' => pack('CC', 0xfe, 0xff),
  302. 'length' => 2,
  303. ),
  304. array(
  305. 'encoding' => 'UTF-16LE',
  306. 'bom' => pack('CC', 0xff, 0xfe),
  307. 'length' => 2,
  308. ),
  309. array(
  310. 'encoding' => 'UTF-8',
  311. 'bom' => pack('CCC', 0xef, 0xbb, 0xbf),
  312. 'length' => 3,
  313. ),
  314. );
  315. }
  316. /**
  317. * Return a map of encoding => generator pairs.
  318. *
  319. * Returns a map of encoding => generator pairs, where the generator is a
  320. * callable that accepts a string and returns the appropriate byte order
  321. * sequence of that string for the encoding.
  322. *
  323. * @return array
  324. */
  325. protected static function getAsciiEncodingMap()
  326. {
  327. return array(
  328. 'UTF-32BE' => function ($ascii) {
  329. return preg_replace('/(.)/', "\0\0\0\\1", $ascii);
  330. },
  331. 'UTF-32LE' => function ($ascii) {
  332. return preg_replace('/(.)/', "\\1\0\0\0", $ascii);
  333. },
  334. 'UTF-32odd1' => function ($ascii) {
  335. return preg_replace('/(.)/', "\0\\1\0\0", $ascii);
  336. },
  337. 'UTF-32odd2' => function ($ascii) {
  338. return preg_replace('/(.)/', "\0\0\\1\0", $ascii);
  339. },
  340. 'UTF-16BE' => function ($ascii) {
  341. return preg_replace('/(.)/', "\0\\1", $ascii);
  342. },
  343. 'UTF-16LE' => function ($ascii) {
  344. return preg_replace('/(.)/', "\\1\0", $ascii);
  345. },
  346. 'UTF-8' => function ($ascii) {
  347. return $ascii;
  348. },
  349. 'GB-18030' => function ($ascii) {
  350. return $ascii;
  351. },
  352. );
  353. }
  354. /**
  355. * Binary-safe substr.
  356. *
  357. * substr() is not binary-safe; this method loops by character to ensure
  358. * multi-byte characters are aggregated correctly.
  359. *
  360. * @param string $string
  361. * @param int $start
  362. * @param int $end
  363. * @return string
  364. */
  365. protected static function substr($string, $start, $end)
  366. {
  367. $substr = '';
  368. for ($i = $start; $i < $end; $i += 1) {
  369. $substr .= $string[$i];
  370. }
  371. return $substr;
  372. }
  373. }