Reader.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Feed_Reader
  17. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @see Zend_Feed
  23. */
  24. require_once 'Zend/Feed.php';
  25. /**
  26. * @see Zend_Feed_Reader_Feed_Rss
  27. */
  28. require_once 'Zend/Feed/Reader/Feed/Rss.php';
  29. /**
  30. * @see Zend_Feed_Reader_Feed_Atom
  31. */
  32. require_once 'Zend/Feed/Reader/Feed/Atom.php';
  33. /**
  34. * @see Zend_Feed_Reader_FeedSet
  35. */
  36. require_once 'Zend/Feed/Reader/FeedSet.php';
  37. /**
  38. * @category Zend
  39. * @package Zend_Feed_Reader
  40. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  41. * @license http://framework.zend.com/license/new-bsd New BSD License
  42. */
  43. class Zend_Feed_Reader
  44. {
  45. /**
  46. * Namespace constants
  47. */
  48. const NAMESPACE_ATOM_03 = 'http://purl.org/atom/ns#';
  49. const NAMESPACE_ATOM_10 = 'http://www.w3.org/2005/Atom';
  50. const NAMESPACE_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
  51. const NAMESPACE_RSS_090 = 'http://my.netscape.com/rdf/simple/0.9/';
  52. const NAMESPACE_RSS_10 = 'http://purl.org/rss/1.0/';
  53. /**
  54. * Feed type constants
  55. */
  56. const TYPE_ANY = 'any';
  57. const TYPE_ATOM_03 = 'atom-03';
  58. const TYPE_ATOM_10 = 'atom-10';
  59. const TYPE_ATOM_ANY = 'atom';
  60. const TYPE_RSS_090 = 'rss-090';
  61. const TYPE_RSS_091 = 'rss-091';
  62. const TYPE_RSS_091_NETSCAPE = 'rss-091n';
  63. const TYPE_RSS_091_USERLAND = 'rss-091u';
  64. const TYPE_RSS_092 = 'rss-092';
  65. const TYPE_RSS_093 = 'rss-093';
  66. const TYPE_RSS_094 = 'rss-094';
  67. const TYPE_RSS_10 = 'rss-10';
  68. const TYPE_RSS_20 = 'rss-20';
  69. const TYPE_RSS_ANY = 'rss';
  70. /**
  71. * Cache instance
  72. *
  73. * @var Zend_Cache_Core
  74. */
  75. protected static $_cache = null;
  76. /**
  77. * HTTP client object to use for retrieving feeds
  78. *
  79. * @var Zend_Http_Client
  80. */
  81. protected static $_httpClient = null;
  82. /**
  83. * Override HTTP PUT and DELETE request methods?
  84. *
  85. * @var boolean
  86. */
  87. protected static $_httpMethodOverride = false;
  88. protected static $_httpConditionalGet = false;
  89. protected static $_pluginLoader = null;
  90. protected static $_prefixPaths = array();
  91. protected static $_extensions = array(
  92. 'feed' => array(
  93. 'DublinCore_Feed',
  94. 'Atom_Feed'
  95. ),
  96. 'entry' => array(
  97. 'Content_Entry',
  98. 'DublinCore_Entry',
  99. 'Atom_Entry'
  100. ),
  101. 'core' => array(
  102. 'DublinCore_Feed',
  103. 'Atom_Feed',
  104. 'Content_Entry',
  105. 'DublinCore_Entry',
  106. 'Atom_Entry'
  107. )
  108. );
  109. /**
  110. * Get the Feed cache
  111. *
  112. * @return Zend_Cache_Core
  113. */
  114. public static function getCache()
  115. {
  116. return self::$_cache;
  117. }
  118. /**
  119. * Set the feed cache
  120. *
  121. * @param Zend_Cache_Core $cache
  122. * @return void
  123. */
  124. public static function setCache(Zend_Cache_Core $cache)
  125. {
  126. self::$_cache = $cache;
  127. }
  128. /**
  129. * Set the HTTP client instance
  130. *
  131. * Sets the HTTP client object to use for retrieving the feeds.
  132. *
  133. * @param Zend_Http_Client $httpClient
  134. * @return void
  135. */
  136. public static function setHttpClient(Zend_Http_Client $httpClient)
  137. {
  138. self::$_httpClient = $httpClient;
  139. }
  140. /**
  141. * Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
  142. *
  143. * @return Zend_Http_Client_Abstract
  144. */
  145. public static function getHttpClient()
  146. {
  147. if (!self::$_httpClient instanceof Zend_Http_Client) {
  148. /**
  149. * @see Zend_Http_Client
  150. */
  151. require_once 'Zend/Http/Client.php';
  152. self::$_httpClient = new Zend_Http_Client();
  153. }
  154. return self::$_httpClient;
  155. }
  156. /**
  157. * Toggle using POST instead of PUT and DELETE HTTP methods
  158. *
  159. * Some feed implementations do not accept PUT and DELETE HTTP
  160. * methods, or they can't be used because of proxies or other
  161. * measures. This allows turning on using POST where PUT and
  162. * DELETE would normally be used; in addition, an
  163. * X-Method-Override header will be sent with a value of PUT or
  164. * DELETE as appropriate.
  165. *
  166. * @param boolean $override Whether to override PUT and DELETE.
  167. * @return void
  168. */
  169. public static function setHttpMethodOverride($override = true)
  170. {
  171. self::$_httpMethodOverride = $override;
  172. }
  173. /**
  174. * Get the HTTP override state
  175. *
  176. * @return boolean
  177. */
  178. public static function getHttpMethodOverride()
  179. {
  180. return self::$_httpMethodOverride;
  181. }
  182. /**
  183. * Set the flag indicating whether or not to use HTTP conditional GET
  184. *
  185. * @param bool $bool
  186. * @return void
  187. */
  188. public static function useHttpConditionalGet($bool = true)
  189. {
  190. self::$_httpConditionalGet = $bool;
  191. }
  192. /**
  193. * Import a feed by providing a URL
  194. *
  195. * @param string $url The URL to the feed
  196. * @param string $etag OPTIONAL Last received ETag for this resource
  197. * @param string $lastModified OPTIONAL Last-Modified value for this resource
  198. * @return Zend_Feed_Reader_Feed_Interface
  199. */
  200. public static function import($uri, $etag = null, $lastModified = null)
  201. {
  202. $cache = self::getCache();
  203. $feed = null;
  204. $responseXml = '';
  205. $client = self::getHttpClient();
  206. $client->resetParameters();
  207. $client->setHeaders('If-None-Match', null);
  208. $client->setHeaders('If-Modified-Since', null);
  209. $client->setUri($uri);
  210. $cacheId = 'Zend_Feed_Reader_' . md5($uri);
  211. if (self::$_httpConditionalGet && $cache) {
  212. $data = $cache->load($cacheId);
  213. if ($data) {
  214. if (is_null($etag)) {
  215. $etag = $cache->load($cacheId.'_etag');
  216. }
  217. if (is_null($lastModified)) {
  218. $lastModified = $cache->load($cacheId.'_lastmodified');;
  219. }
  220. if ($etag) {
  221. $client->setHeaders('If-None-Match', $etag);
  222. }
  223. if ($lastModified) {
  224. $client->setHeaders('If-Modified-Since', $lastModified);
  225. }
  226. }
  227. $response = $client->request('GET');
  228. if ($response->getStatus() !== 200 && $response->getStatus() !== 304) {
  229. require_once 'Zend/Feed/Exception.php';
  230. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  231. }
  232. if ($response->getStatus() == 304) {
  233. $responseXml = $data;
  234. } else {
  235. $responseXml = $response->getBody();
  236. $cache->save($responseXml, $cacheId);
  237. if ($response->getHeader('ETag')) {
  238. $cache->save($response->getHeader('ETag'), $cacheId.'_etag');
  239. }
  240. if ($response->getHeader('Last-Modified')) {
  241. $cache->save($response->getHeader('Last-Modified'), $cacheId.'_lastmodified');
  242. }
  243. }
  244. return self::importString($responseXml);
  245. } elseif ($cache) {
  246. $data = $cache->load($cacheId);
  247. if ($data !== false) {
  248. return self::importString($data);
  249. }
  250. $response = $client->request('GET');
  251. if ($response->getStatus() !== 200) {
  252. require_once 'Zend/Feed/Exception.php';
  253. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  254. }
  255. $responseXml = $response->getBody();
  256. $cache->save($responseXml, $cacheId);
  257. return self::importString($responseXml);
  258. } else {
  259. $response = $client->request('GET');
  260. if ($response->getStatus() !== 200) {
  261. require_once 'Zend/Feed/Exception.php';
  262. throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
  263. }
  264. return self::importString($response->getBody());
  265. }
  266. }
  267. /**
  268. * Import a feed by providing a Zend_Feed_Abstract object
  269. *
  270. * @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
  271. * @return Zend_Feed_Reader_Feed_Interface
  272. */
  273. public static function importFeed(Zend_Feed_Abstract $feed)
  274. {
  275. $dom = $feed->getDOM()->ownerDocument;
  276. $type = self::detectType($dom);
  277. self::_registerCoreExtensions();
  278. if (substr($type, 0, 3) == 'rss') {
  279. $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
  280. } else {
  281. $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
  282. }
  283. return $reader;
  284. }
  285. /**
  286. * Import a feed froma string
  287. *
  288. * @param string $string
  289. * @return Zend_Feed_Reader_Feed_Interface
  290. */
  291. public static function importString($string)
  292. {
  293. $libxml_errflag = libxml_use_internal_errors(true);
  294. $dom = new DOMDocument;
  295. $status = $dom->loadXML($string);
  296. libxml_use_internal_errors($libxml_errflag);
  297. if (!$status) {
  298. // Build error message
  299. $error = libxml_get_last_error();
  300. if ($error && $error->message) {
  301. $errormsg = "DOMDocument cannot parse XML: {$error->message}";
  302. } else {
  303. $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
  304. }
  305. require_once 'Zend/Feed/Exception.php';
  306. throw new Zend_Feed_Exception($errormsg);
  307. }
  308. $type = self::detectType($dom);
  309. self::_registerCoreExtensions();
  310. if (substr($type, 0, 3) == 'rss') {
  311. $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
  312. } else {
  313. $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
  314. }
  315. return $reader;
  316. }
  317. /**
  318. * Imports a feed from a file located at $filename.
  319. *
  320. * @param string $filename
  321. * @throws Zend_Feed_Exception
  322. * @return Zend_Feed_Reader_FeedInterface
  323. */
  324. public static function importFile($filename)
  325. {
  326. @ini_set('track_errors', 1);
  327. $feed = @file_get_contents($filename);
  328. @ini_restore('track_errors');
  329. if ($feed === false) {
  330. /**
  331. * @see Zend_Feed_Exception
  332. */
  333. require_once 'Zend/Feed/Exception.php';
  334. throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
  335. }
  336. return self::importString($feed);
  337. }
  338. public static function findFeedLinks($uri)
  339. {
  340. // Get the HTTP response from $uri and save the contents
  341. $client = self::getHttpClient();
  342. $client->setUri($uri);
  343. $response = $client->request();
  344. if ($response->getStatus() !== 200) {
  345. /**
  346. * @see Zend_Feed_Exception
  347. */
  348. require_once 'Zend/Feed/Exception.php';
  349. throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
  350. }
  351. $responseHtml = $response->getBody();
  352. $libxml_errflag = libxml_use_internal_errors(true);
  353. $dom = new DOMDocument;
  354. $status = $dom->loadHTML($responseHtml);
  355. libxml_use_internal_errors($libxml_errflag);
  356. if (!$status) {
  357. // Build error message
  358. $error = libxml_get_last_error();
  359. if ($error && $error->message) {
  360. $errormsg = "DOMDocument cannot parse HTML: {$error->message}";
  361. } else {
  362. $errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
  363. }
  364. require_once 'Zend/Feed/Exception.php';
  365. throw new Zend_Feed_Exception($errormsg);
  366. }
  367. $feedSet = new Zend_Feed_Reader_FeedSet;
  368. $links = $dom->getElementsByTagName('link');
  369. $feedSet->addLinks($links);
  370. return $feedSet;
  371. }
  372. /**
  373. * Detect the feed type of the provided feed
  374. *
  375. * @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
  376. * @return string
  377. */
  378. public static function detectType($feed)
  379. {
  380. if ($feed instanceof Zend_Feed_Reader_FeedInterface) {
  381. $dom = $feed->getDomDocument();
  382. } elseif($feed instanceof DomDocument) {
  383. $dom = $feed;
  384. } elseif(is_string($feed) && !empty($feed)) {
  385. @ini_set('track_errors', 1);
  386. $dom = new DOMDocument;
  387. $status = @$doc->loadXML($string);
  388. @ini_restore('track_errors');
  389. if (!$status) {
  390. if (!isset($php_errormsg)) {
  391. if (function_exists('xdebug_is_enabled')) {
  392. $php_errormsg = '(error message not available, when XDebug is running)';
  393. } else {
  394. $php_errormsg = '(error message not available)';
  395. }
  396. }
  397. require_once 'Zend/Feed/Exception.php';
  398. throw new Zend_Feed_Exception("DOMDocument cannot parse XML: $php_errormsg");
  399. }
  400. } else {
  401. require_once 'Zend/Feed/Exception.php';
  402. throw new Zend_Feed_Exception('Invalid object/scalar provided: must be of type Zend_Feed_Reader_FeedInterface, DomDocument or string');
  403. }
  404. $xpath = new DOMXPath($dom);
  405. if ($xpath->query('/rss')->length) {
  406. $type = self::TYPE_RSS_ANY;
  407. $version = $xpath->evaluate('string(/rss/@version)');
  408. if (strlen($version) > 0) {
  409. switch($version) {
  410. case '2.0':
  411. $type = self::TYPE_RSS_20;
  412. break;
  413. case '0.94':
  414. $type = self::TYPE_RSS_094;
  415. break;
  416. case '0.93':
  417. $type = self::TYPE_RSS_093;
  418. break;
  419. case '0.92':
  420. $type = self::TYPE_RSS_092;
  421. break;
  422. case '0.91':
  423. $type = self::TYPE_RSS_091;
  424. break;
  425. }
  426. }
  427. return $type;
  428. }
  429. $xpath->registerNamespace('rdf', self::NAMESPACE_RDF);
  430. if ($xpath->query('/rdf:RDF')->length) {
  431. $xpath->registerNamespace('rss', self::NAMESPACE_RSS_10);
  432. if ($xpath->query('/rdf:RDF/rss:channel')->length
  433. || $xpath->query('/rdf:RDF/rss:image')->length
  434. || $xpath->query('/rdf:RDF/rss:item')->length
  435. || $xpath->query('/rdf:RDF/rss:textinput')->length
  436. ) {
  437. return self::TYPE_RSS_10;
  438. }
  439. $xpath->registerNamespace('rss', self::NAMESPACE_RSS_090);
  440. if ($xpath->query('/rdf:RDF/rss:channel')->length
  441. || $xpath->query('/rdf:RDF/rss:image')->length
  442. || $xpath->query('/rdf:RDF/rss:item')->length
  443. || $xpath->query('/rdf:RDF/rss:textinput')->length
  444. ) {
  445. return self::TYPE_RSS_090;
  446. }
  447. }
  448. $type = self::TYPE_ATOM_ANY;
  449. $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_10);
  450. if ($xpath->query('//atom:feed')->length) {
  451. return self::TYPE_ATOM_10;
  452. }
  453. $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_03);
  454. if ($xpath->query('//atom:feed')->length) {
  455. return self::TYPE_ATOM_03;
  456. }
  457. return self::TYPE_ANY;
  458. }
  459. /**
  460. * Set plugin loader for use with Extensions
  461. *
  462. * @param Zend_Loader_PluginLoader_Interface $loader
  463. */
  464. public static function setPluginLoader(Zend_Loader_PluginLoader_Interface $loader)
  465. {
  466. self::$_pluginLoader = $loader;
  467. }
  468. /**
  469. * Get plugin loader for use with Extensions
  470. *
  471. * @return Zend_Loader_PluginLoader_Interface $loader
  472. */
  473. public static function getPluginLoader()
  474. {
  475. if (!isset(self::$_pluginLoader)) {
  476. require_once 'Zend/Loader/PluginLoader.php';
  477. self::$_pluginLoader = new Zend_Loader_PluginLoader(array(
  478. 'Zend_Feed_Reader_Extension_' => 'Zend/Feed/Reader/Extension/',
  479. ));
  480. }
  481. return self::$_pluginLoader;
  482. }
  483. /**
  484. * Add prefix path for loading Extensions
  485. *
  486. * @param string $prefix
  487. * @param string $path
  488. * @return void
  489. */
  490. public static function addPrefixPath($prefix, $path)
  491. {
  492. $prefix = rtrim($prefix, '_');
  493. $path = rtrim($path, DIRECTORY_SEPARATOR);
  494. self::getPluginLoader()->addPrefixPath($prefix, $path);
  495. }
  496. /**
  497. * Add multiple Extension prefix paths at once
  498. *
  499. * @param array $spec
  500. * @return void
  501. */
  502. public static function addPrefixPaths(array $spec)
  503. {
  504. if (isset($spec['prefix']) && isset($spec['path'])) {
  505. self::addPrefixPath($spec['prefix'], $spec['path']);
  506. }
  507. foreach ($spec as $prefixPath) {
  508. if (isset($prefixPath['prefix']) && isset($prefixPath['path'])) {
  509. self::addPrefixPath($prefixPath['prefix'], $prefixPath['path']);
  510. }
  511. }
  512. }
  513. /**
  514. * Register an Extension by name
  515. *
  516. * @param string $name
  517. * @return void
  518. * @throws Zend_Feed_Exception if unable to resolve Extension class
  519. */
  520. public static function registerExtension($name)
  521. {
  522. $feedName = $name . '_Feed';
  523. $entryName = $name . '_Entry';
  524. if (self::isRegistered($name)) {
  525. if (self::getPluginLoader()->isLoaded($feedName) ||
  526. self::getPluginLoader()->isLoaded($entryName)) {
  527. return;
  528. }
  529. }
  530. try {
  531. self::getPluginLoader()->load($feedName);
  532. self::$_extensions['feed'][] = $feedName;
  533. } catch (Zend_Loader_PluginLoader_Exception $e) {
  534. }
  535. try {
  536. self::getPluginLoader()->load($entryName);
  537. self::$_extensions['entry'][] = $entryName;
  538. } catch (Zend_Loader_PluginLoader_Exception $e) {
  539. }
  540. if (!self::getPluginLoader()->isLoaded($feedName)
  541. && !self::getPluginLoader()->isLoaded($entryName)
  542. ) {
  543. require_once 'Zend/Feed/Exception.php';
  544. throw new Zend_Feed_Exception('Could not load extension: ' . $name
  545. . 'using Plugin Loader. Check prefix paths are configured and extension exists.');
  546. }
  547. }
  548. /**
  549. * Is a given named Extension registered?
  550. *
  551. * @param string $extensionName
  552. * @return boolean
  553. */
  554. public static function isRegistered($extensionName)
  555. {
  556. $feedName = $extensionName . '_Feed';
  557. $entryName = $extensionName . '_Entry';
  558. if (in_array($feedName, self::$_extensions['feed'])
  559. || in_array($entryName, self::$_extensions['entry'])
  560. ) {
  561. return true;
  562. }
  563. return false;
  564. }
  565. /**
  566. * Get a list of extensions
  567. *
  568. * @return array
  569. */
  570. public static function getExtensions()
  571. {
  572. return self::$_extensions;
  573. }
  574. /**
  575. * Reset class state to defaults
  576. *
  577. * @return void
  578. */
  579. public static function reset()
  580. {
  581. self::$_cache = null;
  582. self::$_httpClient = null;
  583. self::$_httpMethodOverride = false;
  584. self::$_httpConditionalGet = false;
  585. self::$_pluginLoader = null;
  586. self::$_prefixPaths = array();
  587. self::$_extensions = array(
  588. 'feed' => array(
  589. 'DublinCore_Feed',
  590. 'Atom_Feed'
  591. ),
  592. 'entry' => array(
  593. 'Content_Entry',
  594. 'DublinCore_Entry',
  595. 'Atom_Entry'
  596. ),
  597. 'core' => array(
  598. 'DublinCore_Feed',
  599. 'Atom_Feed',
  600. 'Content_Entry',
  601. 'DublinCore_Entry',
  602. 'Atom_Entry'
  603. )
  604. );
  605. }
  606. /**
  607. * Register core (default) extensions
  608. *
  609. * @return void
  610. */
  611. protected static function _registerCoreExtensions()
  612. {
  613. self::registerExtension('DublinCore');
  614. self::registerExtension('Content');
  615. self::registerExtension('Atom');
  616. self::registerExtension('Slash');
  617. self::registerExtension('WellFormedWeb');
  618. self::registerExtension('Thread');
  619. self::registerExtension('Podcast');
  620. }
  621. }