Reader.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Feed_Reader
  17. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @see Zend_Feed
  23. */
  24. require_once 'Zend/Feed.php';
  25. /**
  26. * @see Zend_Feed_Reader_Feed_Rss
  27. */
  28. require_once 'Zend/Feed/Reader/Feed/Rss.php';
  29. /**
  30. * @see Zend_Feed_Reader_Feed_Atom
  31. */
  32. require_once 'Zend/Feed/Reader/Feed/Atom.php';
  33. /**
  34. * @see Zend_Feed_Reader_FeedSet
  35. */
  36. require_once 'Zend/Feed/Reader/FeedSet.php';
  37. /**
  38. * @category Zend
  39. * @package Zend_Feed_Reader
  40. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  41. * @license http://framework.zend.com/license/new-bsd New BSD License
  42. */
  43. class Zend_Feed_Reader
  44. {
  45. /**
  46. * Namespace constants
  47. */
  48. const NAMESPACE_ATOM_03 = 'http://purl.org/atom/ns#';
  49. const NAMESPACE_ATOM_10 = 'http://www.w3.org/2005/Atom';
  50. const NAMESPACE_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
  51. const NAMESPACE_RSS_090 = 'http://my.netscape.com/rdf/simple/0.9/';
  52. const NAMESPACE_RSS_10 = 'http://purl.org/rss/1.0/';
  53. /**
  54. * Feed type constants
  55. */
  56. const TYPE_ANY = 'any';
  57. const TYPE_ATOM_03 = 'atom-03';
  58. const TYPE_ATOM_10 = 'atom-10';
  59. const TYPE_ATOM_ANY = 'atom';
  60. const TYPE_RSS_090 = 'rss-090';
  61. const TYPE_RSS_091 = 'rss-091';
  62. const TYPE_RSS_091_NETSCAPE = 'rss-091n';
  63. const TYPE_RSS_091_USERLAND = 'rss-091u';
  64. const TYPE_RSS_092 = 'rss-092';
  65. const TYPE_RSS_093 = 'rss-093';
  66. const TYPE_RSS_094 = 'rss-094';
  67. const TYPE_RSS_10 = 'rss-10';
  68. const TYPE_RSS_20 = 'rss-20';
  69. const TYPE_RSS_ANY = 'rss';
  70. /**
  71. * Cache instance
  72. *
  73. * @var Zend_Cache_Core
  74. */
  75. protected static $_cache = null;
  76. /**
  77. * HTTP client object to use for retrieving feeds
  78. *
  79. * @var Zend_Http_Client
  80. */
  81. protected static $_httpClient = null;
  82. /**
  83. * Override HTTP PUT and DELETE request methods?
  84. *
  85. * @var boolean
  86. */
  87. protected static $_httpMethodOverride = false;
  88. protected static $_httpConditionalGet = false;
  89. protected static $_pluginLoader = null;
  90. protected static $_prefixPaths = array();
  91. protected static $_extensions = array(
  92. 'feed' => array(
  93. 'DublinCore_Feed',
  94. 'Atom_Feed'
  95. ),
  96. 'entry' => array(
  97. 'Content_Entry',
  98. 'DublinCore_Entry',
  99. 'Atom_Entry'
  100. ),
  101. 'core' => array(
  102. 'DublinCore_Feed',
  103. 'Atom_Feed',
  104. 'Content_Entry',
  105. 'DublinCore_Entry',
  106. 'Atom_Entry'
  107. )
  108. );
  109. /**
  110. * Get the Feed cache
  111. *
  112. * @return Zend_Cache_Core
  113. */
  114. public static function getCache()
  115. {
  116. return self::$_cache;
  117. }
  118. /**
  119. * Set the feed cache
  120. *
  121. * @param Zend_Cache_Core $cache
  122. * @return void
  123. */
  124. public static function setCache(Zend_Cache_Core $cache)
  125. {
  126. self::$_cache = $cache;
  127. }
  128. /**
  129. * Set the HTTP client instance
  130. *
  131. * Sets the HTTP client object to use for retrieving the feeds.
  132. *
  133. * @param Zend_Http_Client $httpClient
  134. * @return void
  135. */
  136. public static function setHttpClient(Zend_Http_Client $httpClient)
  137. {
  138. self::$_httpClient = $httpClient;
  139. }
  140. /**
  141. * Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
  142. *
  143. * @return Zend_Http_Client_Abstract
  144. */
  145. public static function getHttpClient()
  146. {
  147. if (!self::$_httpClient instanceof Zend_Http_Client) {
  148. /**
  149. * @see Zend_Http_Client
  150. */
  151. require_once 'Zend/Http/Client.php';
  152. self::$_httpClient = new Zend_Http_Client();
  153. }
  154. return self::$_httpClient;
  155. }
  156. /**
  157. * Toggle using POST instead of PUT and DELETE HTTP methods
  158. *
  159. * Some feed implementations do not accept PUT and DELETE HTTP
  160. * methods, or they can't be used because of proxies or other
  161. * measures. This allows turning on using POST where PUT and
  162. * DELETE would normally be used; in addition, an
  163. * X-Method-Override header will be sent with a value of PUT or
  164. * DELETE as appropriate.
  165. *
  166. * @param boolean $override Whether to override PUT and DELETE.
  167. * @return void
  168. */
  169. public static function setHttpMethodOverride($override = true)
  170. {
  171. self::$_httpMethodOverride = $override;
  172. }
  173. /**
  174. * Get the HTTP override state
  175. *
  176. * @return boolean
  177. */
  178. public static function getHttpMethodOverride()
  179. {
  180. return self::$_httpMethodOverride;
  181. }
  182. /**
  183. * Set the flag indicating whether or not to use HTTP conditional GET
  184. *
  185. * @param bool $bool
  186. * @return void
  187. */
  188. public static function useHttpConditionalGet($bool = true)
  189. {
  190. self::$_httpConditionalGet = $bool;
  191. }
  192. /**
  193. * Import a feed by providing a URL
  194. *
  195. * @param string $url The URL to the feed
  196. * @param string $etag OPTIONAL Last received ETag for this resource
  197. * @param string $lastModified OPTIONAL Last-Modified value for this resource
  198. * @return Zend_Feed_Reader_FeedInterface
  199. */
  200. public static function import($uri, $etag = null, $lastModified = null)
  201. {
  202. $cache = self::getCache();
  203. $feed = null;
  204. $responseXml = '';
  205. $client = self::getHttpClient();
  206. $client->resetParameters();
  207. $client->setHeaders('If-None-Match', null);
  208. $client->setHeaders('If-Modified-Since', null);
  209. $client->setUri($uri);
  210. $cacheId = 'Zend_Feed_Reader_' . md5($uri);
  211. if (self::$_httpConditionalGet && $cache) {
  212. $data = $cache->load($cacheId);
  213. if ($data) {
  214. if (is_null($etag)) {
  215. $etag = $cache->load($cacheId.'_etag');
  216. }
  217. if (is_null($lastModified)) {
  218. $lastModified = $cache->load($cacheId.'_lastmodified');;
  219. }
  220. if ($etag) {
  221. $client->setHeaders('If-None-Match', $etag);
  222. }
  223. if ($lastModified) {
  224. $client->setHeaders('If-Modified-Since', $lastModified);
  225. }
  226. }
  227. $response = $client->request('GET');
  228. if ($response->getStatus() !== 200 && $response->getStatus() !== 304) {
  229. require_once 'Zend/Feed/Exception.php';
  230. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  231. }
  232. if ($response->getStatus() == 304) {
  233. $responseXml = $data;
  234. } else {
  235. $responseXml = $response->getBody();
  236. $cache->save($responseXml, $cacheId);
  237. if ($response->getHeader('ETag')) {
  238. $cache->save($response->getHeader('ETag'), $cacheId.'_etag');
  239. }
  240. if ($response->getHeader('Last-Modified')) {
  241. $cache->save($response->getHeader('Last-Modified'), $cacheId.'_lastmodified');
  242. }
  243. }
  244. return self::importString($responseXml);
  245. } elseif ($cache) {
  246. $data = $cache->load($cacheId);
  247. if ($data !== false) {
  248. return self::importString($data);
  249. }
  250. $response = $client->request('GET');
  251. if ($response->getStatus() !== 200) {
  252. require_once 'Zend/Feed/Exception.php';
  253. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  254. }
  255. $responseXml = $response->getBody();
  256. $cache->save($responseXml, $cacheId);
  257. return self::importString($responseXml);
  258. } else {
  259. $response = $client->request('GET');
  260. if ($response->getStatus() !== 200) {
  261. require_once 'Zend/Feed/Exception.php';
  262. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  263. }
  264. return self::importString($response->getBody());
  265. }
  266. }
  267. /**
  268. * Import a feed by providing a Zend_Feed_Abstract object
  269. *
  270. * @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
  271. * @return Zend_Feed_Reader_FeedInterface
  272. */
  273. public static function importFeed(Zend_Feed_Abstract $feed)
  274. {
  275. $dom = $feed->getDOM()->ownerDocument;
  276. $type = self::detectType($dom);
  277. self::_registerCoreExtensions();
  278. if (substr($type, 0, 3) == 'rss') {
  279. $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
  280. } else {
  281. $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
  282. }
  283. return $reader;
  284. }
  285. /**
  286. * Import a feed froma string
  287. *
  288. * @param string $string
  289. * @return Zend_Feed_Reader_FeedInterface
  290. */
  291. public static function importString($string)
  292. {
  293. $libxml_errflag = libxml_use_internal_errors(true);
  294. $dom = new DOMDocument;
  295. $status = $dom->loadXML($string);
  296. libxml_use_internal_errors($libxml_errflag);
  297. if (!$status) {
  298. // Build error message
  299. $error = libxml_get_last_error();
  300. if ($error && $error->message) {
  301. $errormsg = "DOMDocument cannot parse XML: {$error->message}";
  302. } else {
  303. $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
  304. }
  305. require_once 'Zend/Feed/Exception.php';
  306. throw new Zend_Feed_Exception($errormsg);
  307. }
  308. $type = self::detectType($dom);
  309. self::_registerCoreExtensions();
  310. if (substr($type, 0, 3) == 'rss') {
  311. $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
  312. } elseif (substr($type, 0, 4) == 'atom') {
  313. $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
  314. } else {
  315. require_once 'Zend/Feed/Exception.php';
  316. throw new Zend_Feed_Exception('The URI used does not point to a '
  317. . 'valid Atom, RSS or RDF feed that Zend_Feed_Reader can parse.');
  318. }
  319. return $reader;
  320. }
  321. /**
  322. * Imports a feed from a file located at $filename.
  323. *
  324. * @param string $filename
  325. * @throws Zend_Feed_Exception
  326. * @return Zend_Feed_Reader_FeedInterface
  327. */
  328. public static function importFile($filename)
  329. {
  330. @ini_set('track_errors', 1);
  331. $feed = @file_get_contents($filename);
  332. @ini_restore('track_errors');
  333. if ($feed === false) {
  334. /**
  335. * @see Zend_Feed_Exception
  336. */
  337. require_once 'Zend/Feed/Exception.php';
  338. throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
  339. }
  340. return self::importString($feed);
  341. }
  342. public static function findFeedLinks($uri)
  343. {
  344. // Get the HTTP response from $uri and save the contents
  345. $client = self::getHttpClient();
  346. $client->setUri($uri);
  347. $response = $client->request();
  348. if ($response->getStatus() !== 200) {
  349. /**
  350. * @see Zend_Feed_Exception
  351. */
  352. require_once 'Zend/Feed/Exception.php';
  353. throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
  354. }
  355. $responseHtml = $response->getBody();
  356. $libxml_errflag = libxml_use_internal_errors(true);
  357. $dom = new DOMDocument;
  358. $status = $dom->loadHTML($responseHtml);
  359. libxml_use_internal_errors($libxml_errflag);
  360. if (!$status) {
  361. // Build error message
  362. $error = libxml_get_last_error();
  363. if ($error && $error->message) {
  364. $errormsg = "DOMDocument cannot parse HTML: {$error->message}";
  365. } else {
  366. $errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
  367. }
  368. require_once 'Zend/Feed/Exception.php';
  369. throw new Zend_Feed_Exception($errormsg);
  370. }
  371. $feedSet = new Zend_Feed_Reader_FeedSet;
  372. $links = $dom->getElementsByTagName('link');
  373. $feedSet->addLinks($links);
  374. return $feedSet;
  375. }
  376. /**
  377. * Detect the feed type of the provided feed
  378. *
  379. * @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
  380. * @return string
  381. */
  382. public static function detectType($feed)
  383. {
  384. if ($feed instanceof Zend_Feed_Reader_FeedInterface) {
  385. $dom = $feed->getDomDocument();
  386. } elseif($feed instanceof DomDocument) {
  387. $dom = $feed;
  388. } elseif(is_string($feed) && !empty($feed)) {
  389. @ini_set('track_errors', 1);
  390. $dom = new DOMDocument;
  391. $status = @$doc->loadXML($string);
  392. @ini_restore('track_errors');
  393. if (!$status) {
  394. if (!isset($php_errormsg)) {
  395. if (function_exists('xdebug_is_enabled')) {
  396. $php_errormsg = '(error message not available, when XDebug is running)';
  397. } else {
  398. $php_errormsg = '(error message not available)';
  399. }
  400. }
  401. require_once 'Zend/Feed/Exception.php';
  402. throw new Zend_Feed_Exception("DOMDocument cannot parse XML: $php_errormsg");
  403. }
  404. } else {
  405. require_once 'Zend/Feed/Exception.php';
  406. throw new Zend_Feed_Exception('Invalid object/scalar provided: must be of type Zend_Feed_Reader_FeedInterface, DomDocument or string');
  407. }
  408. $xpath = new DOMXPath($dom);
  409. if ($xpath->query('/rss')->length) {
  410. $type = self::TYPE_RSS_ANY;
  411. $version = $xpath->evaluate('string(/rss/@version)');
  412. if (strlen($version) > 0) {
  413. switch($version) {
  414. case '2.0':
  415. $type = self::TYPE_RSS_20;
  416. break;
  417. case '0.94':
  418. $type = self::TYPE_RSS_094;
  419. break;
  420. case '0.93':
  421. $type = self::TYPE_RSS_093;
  422. break;
  423. case '0.92':
  424. $type = self::TYPE_RSS_092;
  425. break;
  426. case '0.91':
  427. $type = self::TYPE_RSS_091;
  428. break;
  429. }
  430. }
  431. return $type;
  432. }
  433. $xpath->registerNamespace('rdf', self::NAMESPACE_RDF);
  434. if ($xpath->query('/rdf:RDF')->length) {
  435. $xpath->registerNamespace('rss', self::NAMESPACE_RSS_10);
  436. if ($xpath->query('/rdf:RDF/rss:channel')->length
  437. || $xpath->query('/rdf:RDF/rss:image')->length
  438. || $xpath->query('/rdf:RDF/rss:item')->length
  439. || $xpath->query('/rdf:RDF/rss:textinput')->length
  440. ) {
  441. return self::TYPE_RSS_10;
  442. }
  443. $xpath->registerNamespace('rss', self::NAMESPACE_RSS_090);
  444. if ($xpath->query('/rdf:RDF/rss:channel')->length
  445. || $xpath->query('/rdf:RDF/rss:image')->length
  446. || $xpath->query('/rdf:RDF/rss:item')->length
  447. || $xpath->query('/rdf:RDF/rss:textinput')->length
  448. ) {
  449. return self::TYPE_RSS_090;
  450. }
  451. }
  452. $type = self::TYPE_ATOM_ANY;
  453. $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_10);
  454. if ($xpath->query('//atom:feed')->length) {
  455. return self::TYPE_ATOM_10;
  456. }
  457. $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_03);
  458. if ($xpath->query('//atom:feed')->length) {
  459. return self::TYPE_ATOM_03;
  460. }
  461. return self::TYPE_ANY;
  462. }
  463. /**
  464. * Set plugin loader for use with Extensions
  465. *
  466. * @param Zend_Loader_PluginLoader_Interface $loader
  467. */
  468. public static function setPluginLoader(Zend_Loader_PluginLoader_Interface $loader)
  469. {
  470. self::$_pluginLoader = $loader;
  471. }
  472. /**
  473. * Get plugin loader for use with Extensions
  474. *
  475. * @return Zend_Loader_PluginLoader_Interface $loader
  476. */
  477. public static function getPluginLoader()
  478. {
  479. if (!isset(self::$_pluginLoader)) {
  480. require_once 'Zend/Loader/PluginLoader.php';
  481. self::$_pluginLoader = new Zend_Loader_PluginLoader(array(
  482. 'Zend_Feed_Reader_Extension_' => 'Zend/Feed/Reader/Extension/',
  483. ));
  484. }
  485. return self::$_pluginLoader;
  486. }
  487. /**
  488. * Add prefix path for loading Extensions
  489. *
  490. * @param string $prefix
  491. * @param string $path
  492. * @return void
  493. */
  494. public static function addPrefixPath($prefix, $path)
  495. {
  496. $prefix = rtrim($prefix, '_');
  497. $path = rtrim($path, DIRECTORY_SEPARATOR);
  498. self::getPluginLoader()->addPrefixPath($prefix, $path);
  499. }
  500. /**
  501. * Add multiple Extension prefix paths at once
  502. *
  503. * @param array $spec
  504. * @return void
  505. */
  506. public static function addPrefixPaths(array $spec)
  507. {
  508. if (isset($spec['prefix']) && isset($spec['path'])) {
  509. self::addPrefixPath($spec['prefix'], $spec['path']);
  510. }
  511. foreach ($spec as $prefixPath) {
  512. if (isset($prefixPath['prefix']) && isset($prefixPath['path'])) {
  513. self::addPrefixPath($prefixPath['prefix'], $prefixPath['path']);
  514. }
  515. }
  516. }
  517. /**
  518. * Register an Extension by name
  519. *
  520. * @param string $name
  521. * @return void
  522. * @throws Zend_Feed_Exception if unable to resolve Extension class
  523. */
  524. public static function registerExtension($name)
  525. {
  526. $feedName = $name . '_Feed';
  527. $entryName = $name . '_Entry';
  528. if (self::isRegistered($name)) {
  529. if (self::getPluginLoader()->isLoaded($feedName) ||
  530. self::getPluginLoader()->isLoaded($entryName)) {
  531. return;
  532. }
  533. }
  534. try {
  535. self::getPluginLoader()->load($feedName);
  536. self::$_extensions['feed'][] = $feedName;
  537. } catch (Zend_Loader_PluginLoader_Exception $e) {
  538. }
  539. try {
  540. self::getPluginLoader()->load($entryName);
  541. self::$_extensions['entry'][] = $entryName;
  542. } catch (Zend_Loader_PluginLoader_Exception $e) {
  543. }
  544. if (!self::getPluginLoader()->isLoaded($feedName)
  545. && !self::getPluginLoader()->isLoaded($entryName)
  546. ) {
  547. require_once 'Zend/Feed/Exception.php';
  548. throw new Zend_Feed_Exception('Could not load extension: ' . $name
  549. . 'using Plugin Loader. Check prefix paths are configured and extension exists.');
  550. }
  551. }
  552. /**
  553. * Is a given named Extension registered?
  554. *
  555. * @param string $extensionName
  556. * @return boolean
  557. */
  558. public static function isRegistered($extensionName)
  559. {
  560. $feedName = $extensionName . '_Feed';
  561. $entryName = $extensionName . '_Entry';
  562. if (in_array($feedName, self::$_extensions['feed'])
  563. || in_array($entryName, self::$_extensions['entry'])
  564. ) {
  565. return true;
  566. }
  567. return false;
  568. }
  569. /**
  570. * Get a list of extensions
  571. *
  572. * @return array
  573. */
  574. public static function getExtensions()
  575. {
  576. return self::$_extensions;
  577. }
  578. /**
  579. * Reset class state to defaults
  580. *
  581. * @return void
  582. */
  583. public static function reset()
  584. {
  585. self::$_cache = null;
  586. self::$_httpClient = null;
  587. self::$_httpMethodOverride = false;
  588. self::$_httpConditionalGet = false;
  589. self::$_pluginLoader = null;
  590. self::$_prefixPaths = array();
  591. self::$_extensions = array(
  592. 'feed' => array(
  593. 'DublinCore_Feed',
  594. 'Atom_Feed'
  595. ),
  596. 'entry' => array(
  597. 'Content_Entry',
  598. 'DublinCore_Entry',
  599. 'Atom_Entry'
  600. ),
  601. 'core' => array(
  602. 'DublinCore_Feed',
  603. 'Atom_Feed',
  604. 'Content_Entry',
  605. 'DublinCore_Entry',
  606. 'Atom_Entry'
  607. )
  608. );
  609. }
  610. /**
  611. * Register core (default) extensions
  612. *
  613. * @return void
  614. */
  615. protected static function _registerCoreExtensions()
  616. {
  617. self::registerExtension('DublinCore');
  618. self::registerExtension('Content');
  619. self::registerExtension('Atom');
  620. self::registerExtension('Slash');
  621. self::registerExtension('WellFormedWeb');
  622. self::registerExtension('Thread');
  623. self::registerExtension('Podcast');
  624. }
  625. }