Feed.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Feed
  17. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * Feed utility class
  23. *
  24. * Base Zend_Feed class, containing constants and the Zend_Http_Client instance
  25. * accessor.
  26. *
  27. * @category Zend
  28. * @package Zend_Feed
  29. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  30. * @license http://framework.zend.com/license/new-bsd New BSD License
  31. */
  32. class Zend_Feed
  33. {
  34. /**
  35. * HTTP client object to use for retrieving feeds
  36. *
  37. * @var Zend_Http_Client
  38. */
  39. protected static $_httpClient = null;
  40. /**
  41. * Override HTTP PUT and DELETE request methods?
  42. *
  43. * @var boolean
  44. */
  45. protected static $_httpMethodOverride = false;
  46. /**
  47. * @var array
  48. */
  49. protected static $_namespaces = array(
  50. 'opensearch' => 'http://a9.com/-/spec/opensearchrss/1.0/',
  51. 'atom' => 'http://www.w3.org/2005/Atom',
  52. 'rss' => 'http://blogs.law.harvard.edu/tech/rss',
  53. );
  54. /**
  55. * Set the HTTP client instance
  56. *
  57. * Sets the HTTP client object to use for retrieving the feeds.
  58. *
  59. * @param Zend_Http_Client $httpClient
  60. * @return void
  61. */
  62. public static function setHttpClient(Zend_Http_Client $httpClient)
  63. {
  64. self::$_httpClient = $httpClient;
  65. }
  66. /**
  67. * Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
  68. *
  69. * @return Zend_Http_Client_Abstract
  70. */
  71. public static function getHttpClient()
  72. {
  73. if (!self::$_httpClient instanceof Zend_Http_Client) {
  74. /**
  75. * @see Zend_Http_Client
  76. */
  77. require_once 'Zend/Http/Client.php';
  78. self::$_httpClient = new Zend_Http_Client();
  79. }
  80. return self::$_httpClient;
  81. }
  82. /**
  83. * Toggle using POST instead of PUT and DELETE HTTP methods
  84. *
  85. * Some feed implementations do not accept PUT and DELETE HTTP
  86. * methods, or they can't be used because of proxies or other
  87. * measures. This allows turning on using POST where PUT and
  88. * DELETE would normally be used; in addition, an
  89. * X-Method-Override header will be sent with a value of PUT or
  90. * DELETE as appropriate.
  91. *
  92. * @param boolean $override Whether to override PUT and DELETE.
  93. * @return void
  94. */
  95. public static function setHttpMethodOverride($override = true)
  96. {
  97. self::$_httpMethodOverride = $override;
  98. }
  99. /**
  100. * Get the HTTP override state
  101. *
  102. * @return boolean
  103. */
  104. public static function getHttpMethodOverride()
  105. {
  106. return self::$_httpMethodOverride;
  107. }
  108. /**
  109. * Get the full version of a namespace prefix
  110. *
  111. * Looks up a prefix (atom:, etc.) in the list of registered
  112. * namespaces and returns the full namespace URI if
  113. * available. Returns the prefix, unmodified, if it's not
  114. * registered.
  115. *
  116. * @return string
  117. */
  118. public static function lookupNamespace($prefix)
  119. {
  120. return isset(self::$_namespaces[$prefix]) ?
  121. self::$_namespaces[$prefix] :
  122. $prefix;
  123. }
  124. /**
  125. * Add a namespace and prefix to the registered list
  126. *
  127. * Takes a prefix and a full namespace URI and adds them to the
  128. * list of registered namespaces for use by
  129. * Zend_Feed::lookupNamespace().
  130. *
  131. * @param string $prefix The namespace prefix
  132. * @param string $namespaceURI The full namespace URI
  133. * @return void
  134. */
  135. public static function registerNamespace($prefix, $namespaceURI)
  136. {
  137. self::$_namespaces[$prefix] = $namespaceURI;
  138. }
  139. /**
  140. * Imports a feed located at $uri.
  141. *
  142. * @param string $uri
  143. * @throws Zend_Feed_Exception
  144. * @return Zend_Feed_Abstract
  145. */
  146. public static function import($uri)
  147. {
  148. $client = self::getHttpClient();
  149. $client->setUri($uri);
  150. $response = $client->request('GET');
  151. if ($response->getStatus() !== 200) {
  152. /**
  153. * @see Zend_Feed_Exception
  154. */
  155. require_once 'Zend/Feed/Exception.php';
  156. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  157. }
  158. $feed = $response->getBody();
  159. return self::importString($feed);
  160. }
  161. /**
  162. * Imports a feed represented by $string.
  163. *
  164. * @param string $string
  165. * @throws Zend_Feed_Exception
  166. * @return Zend_Feed_Abstract
  167. */
  168. public static function importString($string)
  169. {
  170. // Load the feed as an XML DOMDocument object
  171. $libxml_errflag = libxml_use_internal_errors(true);
  172. $libxml_entity_loader = libxml_disable_entity_loader(true);
  173. $doc = new DOMDocument;
  174. if (trim($string) == '') {
  175. require_once 'Zend/Feed/Exception.php';
  176. throw new Zend_Feed_Exception('Document/string being imported'
  177. . ' is an Empty string or comes from an empty HTTP response');
  178. }
  179. $status = $doc->loadXML($string);
  180. libxml_disable_entity_loader($libxml_entity_loader);
  181. libxml_use_internal_errors($libxml_errflag);
  182. if (!$status) {
  183. // prevent the class to generate an undefined variable notice (ZF-2590)
  184. // Build error message
  185. $error = libxml_get_last_error();
  186. if ($error && $error->message) {
  187. $errormsg = "DOMDocument cannot parse XML: {$error->message}";
  188. } else {
  189. $errormsg = "DOMDocument cannot parse XML";
  190. }
  191. /**
  192. * @see Zend_Feed_Exception
  193. */
  194. require_once 'Zend/Feed/Exception.php';
  195. throw new Zend_Feed_Exception($errormsg);
  196. }
  197. // Try to find the base feed element or a single <entry> of an Atom feed
  198. if ($doc->getElementsByTagName('feed')->item(0) ||
  199. $doc->getElementsByTagName('entry')->item(0)) {
  200. /**
  201. * @see Zend_Feed_Atom
  202. */
  203. require_once 'Zend/Feed/Atom.php';
  204. // return a newly created Zend_Feed_Atom object
  205. return new Zend_Feed_Atom(null, $string);
  206. }
  207. // Try to find the base feed element of an RSS feed
  208. if ($doc->getElementsByTagName('channel')->item(0)) {
  209. /**
  210. * @see Zend_Feed_Rss
  211. */
  212. require_once 'Zend/Feed/Rss.php';
  213. // return a newly created Zend_Feed_Rss object
  214. return new Zend_Feed_Rss(null, $string);
  215. }
  216. // $string does not appear to be a valid feed of the supported types
  217. /**
  218. * @see Zend_Feed_Exception
  219. */
  220. require_once 'Zend/Feed/Exception.php';
  221. throw new Zend_Feed_Exception('Invalid or unsupported feed format');
  222. }
  223. /**
  224. * Imports a feed from a file located at $filename.
  225. *
  226. * @param string $filename
  227. * @throws Zend_Feed_Exception
  228. * @return Zend_Feed_Abstract
  229. */
  230. public static function importFile($filename)
  231. {
  232. @ini_set('track_errors', 1);
  233. $feed = @file_get_contents($filename);
  234. @ini_restore('track_errors');
  235. if ($feed === false) {
  236. /**
  237. * @see Zend_Feed_Exception
  238. */
  239. require_once 'Zend/Feed/Exception.php';
  240. throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
  241. }
  242. return self::importString($feed);
  243. }
  244. /**
  245. * Attempts to find feeds at $uri referenced by <link ... /> tags. Returns an
  246. * array of the feeds referenced at $uri.
  247. *
  248. * @todo Allow findFeeds() to follow one, but only one, code 302.
  249. *
  250. * @param string $uri
  251. * @throws Zend_Feed_Exception
  252. * @return array
  253. */
  254. public static function findFeeds($uri)
  255. {
  256. // Get the HTTP response from $uri and save the contents
  257. $client = self::getHttpClient();
  258. $client->setUri($uri);
  259. $response = $client->request();
  260. if ($response->getStatus() !== 200) {
  261. /**
  262. * @see Zend_Feed_Exception
  263. */
  264. require_once 'Zend/Feed/Exception.php';
  265. throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
  266. }
  267. $contents = $response->getBody();
  268. // Parse the contents for appropriate <link ... /> tags
  269. @ini_set('track_errors', 1);
  270. $pattern = '~(<link[^>]+)/?>~i';
  271. $result = @preg_match_all($pattern, $contents, $matches);
  272. @ini_restore('track_errors');
  273. if ($result === false) {
  274. /**
  275. * @see Zend_Feed_Exception
  276. */
  277. require_once 'Zend/Feed/Exception.php';
  278. throw new Zend_Feed_Exception("Internal error: $php_errormsg");
  279. }
  280. // Try to fetch a feed for each link tag that appears to refer to a feed
  281. $feeds = array();
  282. if (isset($matches[1]) && count($matches[1]) > 0) {
  283. foreach ($matches[1] as $link) {
  284. // force string to be an utf-8 one
  285. if (!mb_check_encoding($link, 'UTF-8')) {
  286. $link = mb_convert_encoding($link, 'UTF-8');
  287. }
  288. $xml = @simplexml_load_string(rtrim($link, ' /') . ' />');
  289. if ($xml === false) {
  290. continue;
  291. }
  292. $attributes = $xml->attributes();
  293. if (!isset($attributes['rel']) || !@preg_match('~^(?:alternate|service\.feed)~i', $attributes['rel'])) {
  294. continue;
  295. }
  296. if (!isset($attributes['type']) ||
  297. !@preg_match('~^application/(?:atom|rss|rdf)\+xml~', $attributes['type'])) {
  298. continue;
  299. }
  300. if (!isset($attributes['href'])) {
  301. continue;
  302. }
  303. try {
  304. // checks if we need to canonize the given uri
  305. try {
  306. $uri = Zend_Uri::factory((string) $attributes['href']);
  307. } catch (Zend_Uri_Exception $e) {
  308. // canonize the uri
  309. $path = (string) $attributes['href'];
  310. $query = $fragment = '';
  311. if (substr($path, 0, 1) != '/') {
  312. // add the current root path to this one
  313. $path = rtrim($client->getUri()->getPath(), '/') . '/' . $path;
  314. }
  315. if (strpos($path, '?') !== false) {
  316. list($path, $query) = explode('?', $path, 2);
  317. }
  318. if (strpos($query, '#') !== false) {
  319. list($query, $fragment) = explode('#', $query, 2);
  320. }
  321. $uri = Zend_Uri::factory($client->getUri(true));
  322. $uri->setPath($path);
  323. $uri->setQuery($query);
  324. $uri->setFragment($fragment);
  325. }
  326. $feed = self::import($uri);
  327. } catch (Exception $e) {
  328. continue;
  329. }
  330. $feeds[$uri->getUri()] = $feed;
  331. }
  332. }
  333. // Return the fetched feeds
  334. return $feeds;
  335. }
  336. /**
  337. * Construct a new Zend_Feed_Abstract object from a custom array
  338. *
  339. * @param array $data
  340. * @param string $format (rss|atom) the requested output format
  341. * @return Zend_Feed_Abstract
  342. */
  343. public static function importArray(array $data, $format = 'atom')
  344. {
  345. $obj = 'Zend_Feed_' . ucfirst(strtolower($format));
  346. if (!class_exists($obj)) {
  347. require_once 'Zend/Loader.php';
  348. Zend_Loader::loadClass($obj);
  349. }
  350. /**
  351. * @see Zend_Feed_Builder
  352. */
  353. require_once 'Zend/Feed/Builder.php';
  354. return new $obj(null, null, new Zend_Feed_Builder($data));
  355. }
  356. /**
  357. * Construct a new Zend_Feed_Abstract object from a Zend_Feed_Builder_Interface data source
  358. *
  359. * @param Zend_Feed_Builder_Interface $builder this object will be used to extract the data of the feed
  360. * @param string $format (rss|atom) the requested output format
  361. * @return Zend_Feed_Abstract
  362. */
  363. public static function importBuilder(Zend_Feed_Builder_Interface $builder, $format = 'atom')
  364. {
  365. $obj = 'Zend_Feed_' . ucfirst(strtolower($format));
  366. if (!class_exists($obj)) {
  367. require_once 'Zend/Loader.php';
  368. Zend_Loader::loadClass($obj);
  369. }
  370. return new $obj(null, null, $builder);
  371. }
  372. }