| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608 |
- <?xml version="1.0" encoding="UTF-8"?>
- <!-- Reviewed: no -->
- <sect1 id="zend.feed.reader">
- <title>Zend_Feed_Reader</title>
- <sect2 id="zend.feed.reader.introduction">
- <title>Introduction</title>
- <para>
- <classname>Zend_Feed_Reader</classname> is a component used to
- consume <acronym>RSS</acronym> and Atom feeds of any version, including
- <acronym>RDF</acronym>/<acronym>RSS</acronym> 1.0,
- <acronym>RSS</acronym> 2.0, Atom 0.3 and Atom 1.0. The <acronym>API</acronym> for
- retrieving feed data is
- deliberately simple since <classname>Zend_Feed_Reader</classname> is
- capable of searching any feed of any type for the information
- requested through the <acronym>API</acronym>. If the typical elements containing this
- information are not present, it will adapt and fall back on a
- variety of alternative elements instead. This ability to choose from
- alternatives removes the need for users to create their own
- abstraction layer on top of the component to make it useful or have
- any in-depth knowledge of the underlying standards, current
- alternatives, and namespaced extensions.
- </para>
- <para>
- Internally, <classname>Zend_Feed_Reader</classname> works almost
- entirely on the basis of making XPath queries against the feed <acronym>XML</acronym>'s
- Document Object Model. The <acronym>DOM</acronym> is not exposed though a chained
- property <acronym>API</acronym> like <classname>Zend_Feed</classname> though the
- underlying DOMDocument, DOMElement and DOMXPath objects are exposed for external
- manipulation. This singular approach to parsing is consistent and
- the component offers a plugin system to add to the Feed and Entry
- level <acronym>API</acronym> by writing Extensions on a similar basis.
- </para>
- <para>
- Performance is assisted in three ways. First of all,
- <classname>Zend_Feed_Reader</classname> supports caching using
- <classname>Zend_Cache</classname> to maintain a copy of the original
- feed <acronym>XML</acronym>. This allows you to skip network requests for a feed
- <acronym>URI</acronym> if
- the cache is valid. Second, the Feed and Entry level <acronym>API</acronym> is backed
- by an internal cache (non-persistant) so repeat <acronym>API</acronym> calls for the
- same feed will avoid additional <acronym>DOM</acronym> or XPath use. Thirdly, importing
- feeds from a <acronym>URI</acronym> can take advantage of
- <acronym>HTTP</acronym> Conditional <constant>GET</constant> requests
- which allow servers to issue an empty 304 response when the
- requested feed has not changed since the last time you requested it.
- In the final case, an instance of <classname>Zend_Cache</classname>
- will hold the last received feed along with the ETag and
- Last-Modified header values sent in the <acronym>HTTP</acronym> response.
- </para>
- <para>
- In relation to <classname>Zend_Feed</classname>,
- <classname>Zend_Feed_Reader</classname> was formulated as a free
- standing replacement for <classname>Zend_Feed</classname> but it is
- not backwards compatible with <classname>Zend_Feed</classname>.
- Rather it is an alternative following a different ideology focused
- on being simple to use, flexible, consistent and extendable through
- the plugin system. <classname>Zend_Feed_Reader</classname> is also
- not capable of constructing feeds and delegates this responsibility
- to <classname>Zend_Feed_Writer</classname>, its sibling in arms.
- </para>
- </sect2>
- <sect2 id="zend.feed.reader.import">
- <title>Importing Feeds</title>
- <para>
- Importing a feed with <classname>Zend_Feed_Reader</classname> is not
- that much different to <classname>Zend_Feed</classname>. Feeds can
- be imported from a string, file, <acronym>URI</acronym> or an instance of type
- <classname>Zend_Feed_Abstract</classname>. Importing from a <acronym>URI</acronym> can
- additionally utilise a <acronym>HTTP</acronym> Conditional <constant>GET</constant>
- request. If importing fails, an exception will be raised. The end result will be an
- object of type <classname>Zend_Feed_Reader_FeedInterface</classname>, the
- core implementations of which are
- <classname>Zend_Feed_Reader_Feed_Rss</classname> and
- <classname>Zend_Feed_Reader_Feed_Atom</classname>
- (<classname>Zend_Feed</classname> took all the short names!). Both
- objects support multiple (all existing) versions of these broad feed
- types.
- </para>
- <para>
- In the following example, we import an <acronym>RDF</acronym>/<acronym>RSS</acronym> 1.0
- feed and extract some basic information that can be saved to a database or
- elsewhere.
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- $data = array(
- 'title' => $feed->getTitle(),
- 'link' => $feed->getLink(),
- 'dateModified' => $feed->getDateModified(),
- 'description' => $feed->getDescription(),
- 'language' => $feed->getLanguage(),
- 'entries' => array(),
- );
- foreach ($feed as $entry) {
- $edata = array(
- 'title' => $entry->getTitle(),
- 'description' => $entry->getDescription(),
- 'dateModified' => $entry->getDateModified(),
- 'authors' => $entry->getAuthors(),
- 'link' => $entry->getLink(),
- 'content' => $entry->getContent()
- );
- $data['entries'][] = $edata;
- }
- ]]></programlisting>
- <para>
- The example above demonstrates
- <classname>Zend_Feed_Reader</classname>'s <acronym>API</acronym>, and it also
- demonstrates some of its internal operation. In reality, the <acronym>RDF</acronym>
- feed selected does not have any native date or author elements,
- however it does utilise the Dublin Core 1.1 module which offers
- namespaced creator and date elements.
- <classname>Zend_Feed_Reader</classname> falls back on these and
- similar options if no relevant native elements exist. If it
- absolutely cannot find an alternative it will return <constant>NULL</constant>,
- indicating the information could not be found in the feed. You
- should note that classes implementing
- <classname>Zend_Feed_Reader_FeedInterface</classname> also implement
- the <acronym>SPL</acronym> <classname>Iterator</classname> and
- <classname>Countable</classname> interfaces.
- </para>
- <para>
- Feeds can also be imported from strings, files, and even objects of
- type <classname>Zend_Feed_Abstract</classname>.
- </para>
- <programlisting language="php"><![CDATA[
- // from a URI
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- // from a String
- $feed = Zend_Feed_Reader::importString($feedXmlString);
- // from a file
- $feed = Zend_Feed_Reader::importFile('./feed.xml');
- // from a Zend_Feed_Abstract object
- $zfeed = Zend_Feed::import('http://www.planet-php.net/atom/');
- $feed = Zend_Feed_Reader::importFeed($zfeed);
- ]]></programlisting>
- </sect2>
- <sect2 id="zend.feed.reader.sources">
- <title>Retrieving Underlying Feed and Entry Sources</title>
- <para>
- <classname>Zend_Feed_Reader</classname> does its best not to stick
- you in a narrow confine. If you need to work on a feed outside of
- <classname>Zend_Feed_Reader</classname>, you can extract the base
- DOMDocument or DOMElement objects from any class, or even an <acronym>XML</acronym>
- string containing these. Also provided are methods to extract the current DOMXPath
- object (with all core and Extension namespaces registered) and the correct prefix used
- in all XPath queries for the current Feed or Entry. The basic methods
- to use (on any object) are <methodname>saveXml()</methodname>,
- <methodname>getDomDocument()</methodname>,
- <methodname>getElement()</methodname>,
- <methodname>getXpath()</methodname> and
- <methodname>getXpathPrefix()</methodname>. These will let you break
- free of <classname>Zend_Feed_Reader</classname> and do whatever else
- you want.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <methodname>saveXml()</methodname> returns an <acronym>XML</acronym> string
- containing only the element representing the current object.
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getDomDocument()</methodname> returns the DOMDocument object
- representing the entire feed (even if called from an Entry object).
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getElement()</methodname> returns the
- DOMElement of the current object (i.e. the Feed or current Entry).
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getXpath()</methodname> returns the DOMXPath object for the current
- feed (even if called from an Entry object) with the namespaces of
- the current feed type and all loaded Extensions
- pre-registered.
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getXpathPrefix()</methodname> returns the query
- prefix for the current object (i.e. the Feed or current
- Entry) which includes the correct XPath query path for that
- specific Feed or Entry.
- </para>
- </listitem>
- </itemizedlist>
- <para>
- Here's an example where a feed might include an <acronym>RSS</acronym> Extension not
- supported by <classname>Zend_Feed_Reader</classname> out of the box.
- Notably, you could write and register an Extension (covered later)
- to do this, but that's not always warranted for a quick check. You must register any
- new namespaces on the DOMXPath object before use unless they are
- registered by <classname>Zend_Feed_Reader</classname> or an
- Extension beforehand.
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- $xpathPrefix = $feed->getXpathPrefix();
- $xpath = $feed->getXpath();
- $xpath->registerNamespace('admin', 'http://webns.net/mvcb/');
- $reportErrorsTo = $xpath->evaluate('string('
- . $xpathPrefix
- . '/admin:errorReportsTo)');
- ]]></programlisting>
- <warning>
- <para>
- If you register an already registered namespace with a different
- prefix name to that used internally by
- <classname>Zend_Feed_Reader</classname>, it will break the
- internal operation of this component.
- </para>
- </warning>
- </sect2>
- <sect2 id="zend.feed.reader.cache-request">
- <title>Cache Support and Intelligent Requests</title>
- <sect3 id="zend.feed.reader.cache-request.cache">
- <title>Adding Cache Support to Zend_Feed_Reader</title>
- <para>
- <classname>Zend_Feed_Reader</classname> supports using an
- instance of <classname>Zend_Cache</classname> to cache feeds (as
- <acronym>XML</acronym>) to avoid unnecessary network requests. Adding a cache is as
- simple here as it is for other Zend Framework components, create
- and configure your cache and then tell
- <classname>Zend_Feed_Reader</classname> to use it! The cache key
- used is "<classname>Zend_Feed_Reader_</classname>" followed by the
- <acronym>MD5</acronym> hash of the feed's <acronym>URI</acronym>.
- </para>
- <programlisting language="php"><![CDATA[
- $frontendOptions = array(
- 'lifetime' => 7200,
- 'automatic_serialization' => true
- );
- $backendOptions = array('cache_dir' => './tmp/');
- $cache = Zend_Cache::factory(
- 'Core', 'File', $frontendOptions, $backendOptions
- );
- Zend_Feed_Reader::setCache($cache);
- ]]></programlisting>
- <note>
- <para>
- While it's a little off track, you should also consider
- adding a cache to
- <classname>Zend_Loader_PluginLoader</classname> which is
- used by <classname>Zend_Feed_Reader</classname> to load
- Extensions.
- </para>
- </note>
- </sect3>
- <sect3 id="zend.feed.reader.cache-request.http-conditional-get">
- <title>HTTP Conditional GET Support</title>
- <para>
- The big question often asked when importing a feed frequently, is
- if it has even changed. With a cache enabled, you can add <acronym>HTTP</acronym>
- Conditional <constant>GET</constant> support to your arsenal to answer that
- question.
- </para>
- <para>
- Using this method, you can request feeds from <acronym>URI</acronym>s and include
- their last known ETag and Last-Modified response header values
- with the request (using the If-None-Match and If-Modified-Since
- headers). If the feed on the server remains unchanged, you
- should receive a 304 response which tells
- <classname>Zend_Feed_Reader</classname> to use the cached
- version. If a full feed is sent in a response with a status code
- of 200, this means the feed has changed and
- <classname>Zend_Feed_Reader</classname> will parse the new
- version and save it to the cache. It will also cache the new
- ETag and Last-Modified header values for future use.
- </para>
- <para>
- These "conditional" requests are not guaranteed to be supported
- by the server you request a <acronym>URI</acronym> of, but can be attempted
- regardless. Most common feed sources like blogs should however
- have this supported. To enable conditional requests, you will
- need to provide a cache to <classname>Zend_Feed_Reader</classname>.
- </para>
- <programlisting language="php"><![CDATA[
- $frontendOptions = array(
- 'lifetime' => 86400,
- 'automatic_serialization' => true
- );
- $backendOptions = array('cache_dir' => './tmp/');
- $cache = Zend_Cache::factory(
- 'Core', 'File', $frontendOptions, $backendOptions
- );
- Zend_Feed_Reader::setCache($cache);
- Zend_Feed_Reader::useHttpConditionalGet();
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- ]]></programlisting>
- <para>
- In the example above, with <acronym>HTTP</acronym> Conditional
- <constant>GET</constant> requests enabled, the response header values for ETag and
- Last-Modified will be cached along with the feed. For the next 24hrs (the cache
- lifetime), feeds will only be updated on the cache if a non-304 response is received
- containing a valid <acronym>RSS</acronym> or Atom <acronym>XML</acronym> document.
- </para>
- <para>
- If you intend on managing request headers from outside
- <classname>Zend_Feed_Reader</classname>, you can set the
- relevant If-None-Matches and If-Modified-Since request headers
- via the <acronym>URI</acronym> import method.
- </para>
- <programlisting language="php"><![CDATA[
- $lastEtagReceived = '5e6cefe7df5a7e95c8b1ba1a2ccaff3d';
- $lastModifiedDateReceived = 'Wed, 08 Jul 2009 13:37:22 GMT';
- $feed = Zend_Feed_Reader::import(
- $uri, $lastEtagReceived, $lastModifiedDateReceived
- );
- ]]></programlisting>
- </sect3>
- </sect2>
- <sect2 id="zend.feed.reader.locate">
- <title>Locating Feed URIs from Websites</title>
- <para>
- These days, many websites are aware that the location of their <acronym>XML</acronym>
- feeds is not always obvious. A small <acronym>RDF</acronym>, <acronym>RSS</acronym> or
- Atom graphic helps when the user is reading the page, but what about when a machine
- visits trying to identify where your feeds are located? To assist in
- this, websites may point to their feeds using <link> tags in
- the <head> section of their <acronym>HTML</acronym>. To take advantage of this,
- you can use <classname>Zend_Feed_Reader</classname> to locate these
- feeds using the static <methodname>findFeedLinks()</methodname>
- method.
- </para>
- <para>
- This method calls any <acronym>URI</acronym> and searches for the location of
- <acronym>RSS</acronym>, <acronym>RDF</acronym>
- and Atom feeds assuming the website's <acronym>HTML</acronym> contains the relevant
- links. It then returns a value object where you can check for the existence of a
- <acronym>RSS</acronym>, <acronym>RDF</acronym> or Atom feed <acronym>URI</acronym>.
- </para>
- <para>
- The returned object is an <classname>ArrayObject</classname> subclass
- called <classname>Zend_Feed_Reader_Collection_FeedLink</classname> so you can cast
- it to an array, or iterate over it, to access all the detected links.
- However, as a simple shortcut, you can just grab the first <acronym>RSS</acronym>,
- <acronym>RDF</acronym> or Atom link using its public properties as in the example below.
- Otherwise, each element of the <classname>ArrayObject</classname> is a simple array
- with the keys "type" and "uri" where the type is one of "rdf", "rss" or
- "atom".
- </para>
- <programlisting language="php"><![CDATA[
- $links = Zend_Feed_Reader::findFeedLinks('http://www.planet-php.net');
- if(isset($links->rdf)) {
- echo $links->rdf, "\n"; // http://www.planet-php.org/rdf/
- }
- if(isset($links->rss)) {
- echo $links->rss, "\n"; // http://www.planet-php.org/rss/
- }
- if(isset($links->atom)) {
- echo $links->atom, "\n"; // http://www.planet-php.org/atom/
- }
- ]]></programlisting>
- <para>
- Based on these links, you can then import from whichever source you
- wish in the usual manner.
- </para>
- <para>
- This quick method only gives you one link for each feed type, but
- websites may indicate many links of any type. Perhaps it's a news
- site with a <acronym>RSS</acronym> feed for each news category. You can iterate over
- all links using the ArrayObject's iterator.
- </para>
- <programlisting language="php"><![CDATA[
- $links = Zend_Feed_Reader::findFeedLinks('http://www.planet-php.net');
- foreach ($links as $link) {
- echo $link['uri'], "\n";
- }
- ]]></programlisting>
- </sect2>
- <sect2 id="zend.feed.reader.attribute-collections">
- <title>Attribute Collections</title>
- <para>
- In an attempt to simplify return types, with Zend Framework 1.10 return
- types from the various feed and entry level methods may include an object
- of type <classname>Zend_Feed_Reader_Collection_CollectionAbstract</classname>.
- Despite the special class name which I'll explain below, this is just a simple
- subclass of <acronym>SPL</acronym>'s <classname>ArrayObject</classname>.
- </para>
- <para>
- The main purpose here is to allow the presentation of as much data as possible
- from the requested elements, while still allowing access to the most relevant
- data as a simple array. This also enforces a standard approach to returning
- such data which previously may have wandered between arrays and objects.
- </para>
- <para>
- The new class type acts identically to <classname>ArrayObject</classname>
- with the sole addition being a new method <methodname>getValues()</methodname>
- which returns a simple flat array containing the most relevant information.
- </para>
- <para>
- A simple example of this is
- <methodname>Zend_Feed_Reader_FeedInterface::getCategories()</methodname>. When used with
- any <acronym>RSS</acronym> or Atom feed, this method will return category data as a
- container object called <classname>Zend_Feed_Reader_Collection_Category</classname>. The
- container object will contain, per category, three fields of data: term, scheme and
- label. The "term" is the basic category name, often machine readable (i.e. plays nice
- with <acronym>URI</acronym>s). The scheme represents a categorisation scheme (usually a
- <acronym>URI</acronym> identifier) also known as a "domain" in <acronym>RSS</acronym>
- 2.0. The "label" is a human readable category name which supports
- <acronym>HTML</acronym> entities. In <acronym>RSS</acronym> 2.0, there is no label
- attribute so it is always set to the same value as the term for convenience.
- </para>
- <para>
- To access category labels by themselves in a simple value array,
- you might commit to something like:
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.example.com/atom.xml');
- $categories = $feed->getCategories();
- $labels = array();
- foreach ($categories as $cat) {
- $labels[] = $cat['label']
- }
- ]]></programlisting>
- <para>
- It's a contrived example, but the point is that the labels are tied up with
- other information.
- </para>
- <para>
- However, the container class allows you to access the "most relevant" data
- as a simple array using the <methodname>getValues()</methodname> method. The concept
- of "most relevant" is obviously a judgement call. For categories it means the category
- labels (not the terms or schemes) while for authors it would be the authors' names
- (not their email addresses or <acronym>URI</acronym>s). The simple array is flat (just
- values) and passed through <methodname>array_unique()</methodname> to remove
- duplication.
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.example.com/atom.xml');
- $categories = $feed->getCategories();
- $labels = $categories->getValues();
- ]]></programlisting>
- <para>
- The above example shows how to extract only labels and nothing else thus
- giving simple access to the category labels without any additional work to extract
- that data by itself.
- </para>
- </sect2>
- <sect2 id="zend.feed.reader.retrieve-info">
- <title>Retrieving Feed Information</title>
- <para>
- Retrieving information from a feed (we'll cover entries and items in the
- next section though they follow identical principals) uses a clearly
- defined <acronym>API</acronym> which is exactly the same regardless of whether the feed
- in question is <acronym>RSS</acronym>, <acronym>RDF</acronym> or Atom. The same goes for
- sub-versions of these standards and we've tested every single
- <acronym>RSS</acronym> and Atom version. While
- the underlying feed <acronym>XML</acronym> can differ substantially in terms of the
- tags and elements they present, they nonetheless are all trying to
- convey similar information and to reflect this all the differences
- and wrangling over alternative tags are handled internally by
- <classname>Zend_Feed_Reader</classname> presenting you with an
- identical interface for each. Ideally, you should not have to care
- whether a feed is <acronym>RSS</acronym> or Atom so long as you can extract the
- information you want.
- </para>
- <note>
- <para>
- While determining common ground between feed types is itself complex, it
- should be noted that <acronym>RSS</acronym> in particular is a constantly disputed
- "specification". This has its roots in the original <acronym>RSS</acronym> 2.0
- document which contains ambiguities and does not detail the correct treatment of all
- elements. As a result, this component rigorously applies the <acronym>RSS</acronym>
- 2.0.11 Specification published by the <acronym>RSS</acronym> Advisory Board and its
- accompanying <acronym>RSS</acronym> Best Practices Profile. No other interpretation
- of <acronym>RSS</acronym> 2.0 will be supported though exceptions may be allowed
- where it does not directly prevent the application of the two documents mentioned
- above.
- </para>
- </note>
- <para>
- Of course, we don't live in an ideal world so there may be times the
- <acronym>API</acronym> just does not cover what you're looking for. To assist you,
- <classname>Zend_Feed_Reader</classname> offers a plugin system which
- allows you to write Extensions to expand the core <acronym>API</acronym> and cover any
- additional data you are trying to extract from feeds. If writing
- another Extension is too much trouble, you can simply grab the
- underlying <acronym>DOM</acronym> or XPath objects and do it by hand in your
- application. Of course, we really do encourage writing an Extension
- simply to make it more portable and reusable, and useful Extensions may be proposed
- to the Framework for formal addition.
- </para>
- <para>
- Here's a summary of the Core <acronym>API</acronym> for Feeds. You should note it
- comprises not only the basic <acronym>RSS</acronym> and Atom standards, but also
- accounts for a number of included Extensions bundled with
- <classname>Zend_Feed_Reader</classname>. The naming of these
- Extension sourced methods remain fairly generic - all Extension
- methods operate at the same level as the Core <acronym>API</acronym> though we do allow
- you to retrieve any specific Extension object separately if required.
- </para>
- <table>
- <title>Feed Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getId()</methodname></entry>
- <entry>Returns a unique ID associated with this feed</entry>
- </row>
- <row>
- <entry><methodname>getTitle()</methodname></entry>
- <entry>Returns the title of the feed</entry>
- </row>
- <row>
- <entry><methodname>getDescription()</methodname></entry>
- <entry>Returns the text description of the feed.</entry>
- </row>
- <row>
- <entry><methodname>getLink()</methodname></entry>
- <entry>
- Returns a <acronym>URI</acronym> to the <acronym>HTML</acronym> website
- containing the same or
- similar information as this feed (i.e. if the feed is from a blog,
- it should provide the blog's <acronym>URI</acronym> where the
- <acronym>HTML</acronym> version of the entries can be read).
- </entry>
- </row>
- <row>
- <entry><methodname>getFeedLink()</methodname></entry>
- <entry>
- Returns the <acronym>URI</acronym> of this feed, which may be the
- same as the <acronym>URI</acronym> used to import the feed. There
- are important cases where the feed link may differ because the source
- <acronym>URI</acronym> is being updated and is intended to be removed in
- the future.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthors()</methodname></entry>
- <entry>
- Returns an object of type
- <classname>Zend_Feed_Reader_Collection_Author</classname> which is an
- <classname>ArrayObject</classname> whose elements are each simple arrays
- containing any combination of the keys "name", "email" and "uri". Where
- irrelevant to the source data, some of these keys may be omitted.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthor(integer $index = 0)</methodname></entry>
- <entry>
- Returns either the first author known, or with the
- optional <varname>$index</varname> parameter any specific
- index on the array of Authors as described above (returning
- <constant>NULL</constant> if an invalid index).
- </entry>
- </row>
- <row>
- <entry><methodname>getDateCreated()</methodname></entry>
- <entry>
- Returns the date on which this feed was created. Generally
- only applicable to Atom where it represents the date the resource
- described by an Atom 1.0 document was created. The returned date
- will be a <classname>Zend_Date</classname> object.
- </entry>
- </row>
- <row>
- <entry><methodname>getDateModified()</methodname></entry>
- <entry>
- Returns the date on which this feed was last modified. The returned date
- will be a <classname>Zend_Date</classname> object.
- </entry>
- </row>
- <row>
- <entry><methodname>getLastBuildDate()</methodname></entry>
- <entry>
- Returns the date on which this feed was last built. The returned date
- will be a <classname>Zend_Date</classname> object. This is only
- supported by <acronym>RSS</acronym> - Atom feeds will always return
- <constant>NULL</constant>.
- </entry>
- </row>
- <row>
- <entry><methodname>getLanguage()</methodname></entry>
- <entry>
- Returns the language of the feed (if defined) or simply the
- language noted in the <acronym>XML</acronym> document.
- </entry>
- </row>
- <row>
- <entry><methodname>getGenerator()</methodname></entry>
- <entry>
- Returns the generator of the feed, e.g. the software which
- generated it. This may differ between <acronym>RSS</acronym> and Atom
- since Atom defines a different notation.
- </entry>
- </row>
- <row>
- <entry><methodname>getCopyright()</methodname></entry>
- <entry>Returns any copyright notice associated with the feed.</entry>
- </row>
- <row>
- <entry><methodname>getHubs()</methodname></entry>
- <entry>
- Returns an array of all Hub Server <acronym>URI</acronym> endpoints
- which are advertised by the feed for use with the Pubsubhubbub
- Protocol, allowing subscriptions to the feed for real-time updates.
- </entry>
- </row>
- <row>
- <entry><methodname>getCategories()</methodname></entry>
- <entry>
- Returns a <classname>Zend_Feed_Reader_Collection_Category</classname>
- object containing the details of any categories associated with the
- overall feed. The supported fields include "term" (the machine readable
- category name), "scheme" (the categorisation scheme and domain for this
- category), and "label" (a <acronym>HTML</acronym> decoded human readable
- category name). Where any of the three fields are absent from the field,
- they are either set to the closest available alternative or, in the case
- of "scheme", set to <constant>NULL</constant>.
- </entry>
- </row>
- <row>
- <entry><methodname>getImage()</methodname></entry>
- <entry>
- Returns an array containing data relating to any feed image or logo,
- or <constant>NULL</constant> if no image found. The resulting array may
- contain the following keys: <property>uri</property>,
- <property>link</property>, <property>title</property>,
- <property>description</property>, <property>height</property>, and
- <property>width</property>. Atom logos only contain a
- <acronym>URI</acronym> so the remaining metadata is drawn from
- <acronym>RSS</acronym> feeds only.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- Given the variety of feeds in the wild, some of these methods will
- undoubtedly return <constant>NULL</constant> indicating the relevant information
- couldn't be located. Where possible, <classname>Zend_Feed_Reader</classname>
- will fall back on alternative elements during its search. For
- example, searching an <acronym>RSS</acronym> feed for a modification date is more
- complicated than it looks. <acronym>RSS</acronym> 2.0 feeds should include a
- <command><lastBuildDate></command> tag and (or) a
- <command><pubDate></command> element. But what if it doesn't, maybe
- this is an <acronym>RSS</acronym> 1.0 feed? Perhaps it instead has an
- <command><atom:updated></command> element with identical information
- (Atom may be used to supplement <acronym>RSS</acronym>'s syntax)? Failing that, we
- could simply look at the entries, pick the most recent, and use its
- <command><pubDate></command> element. Assuming it exists... Many
- feeds also use Dublin Core 1.0 or 1.1 <command><dc:date></command>
- elements for feeds and entries. Or we could find Atom lurking again.
- </para>
- <para>
- The point is, <classname>Zend_Feed_Reader</classname> was designed
- to know this. When you ask for the modification date (or anything
- else), it will run off and search for all these alternatives until
- it either gives up and returns <constant>NULL</constant>, or finds an
- alternative that should have the right answer.
- </para>
- <para>
- In addition to the above methods, all Feed objects implement methods
- for retrieving the <acronym>DOM</acronym> and XPath objects for the current feeds as
- described earlier. Feed objects also implement the <acronym>SPL</acronym> Iterator and
- Countable interfaces. The extended <acronym>API</acronym> is summarised below.
- </para>
- <table>
- <title>Extended Feed Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getDomDocument()</methodname></entry>
- <entry>
- Returns the parent DOMDocument object for the
- entire source <acronym>XML</acronym> document
- </entry>
- </row>
- <row>
- <entry><methodname>getElement()</methodname></entry>
- <entry>
- Returns the current feed level DOMElement object
- </entry>
- </row>
- <row>
- <entry><methodname>saveXml()</methodname></entry>
- <entry>
- Returns a string containing an <acronym>XML</acronym> document of the
- entire feed element (this is not the original
- document but a rebuilt version)
- </entry>
- </row>
- <row>
- <entry><methodname>getXpath()</methodname></entry>
- <entry>
- Returns the DOMXPath object used internally to run queries on the
- DOMDocument object (this includes core and Extension namespaces
- pre-registered)
- </entry>
- </row>
- <row>
- <entry><methodname>getXpathPrefix()</methodname></entry>
- <entry>
- Returns the valid <acronym>DOM</acronym> path prefix prepended
- to all XPath queries matching the feed being queried
- </entry>
- </row>
- <row>
- <entry><methodname>getEncoding()</methodname></entry>
- <entry>
- Returns the encoding of the source <acronym>XML</acronym> document
- (note: this cannot account for errors such as the
- server sending documents in a different encoding). Where not
- defined, the default <acronym>UTF-8</acronym> encoding of Unicode is
- applied.
- </entry>
- </row>
- <row>
- <entry><methodname>count()</methodname></entry>
- <entry>
- Returns a count of the entries or items this feed contains
- (implements <acronym>SPL</acronym> <classname>Countable</classname>
- interface)
- </entry>
- </row>
- <row>
- <entry><methodname>current()</methodname></entry>
- <entry>
- Returns either the current entry (using the current index
- from <methodname>key()</methodname>)
- </entry>
- </row>
- <row>
- <entry><methodname>key()</methodname></entry>
- <entry>Returns the current entry index</entry>
- </row>
- <row>
- <entry><methodname>next()</methodname></entry>
- <entry>Increments the entry index value by one</entry>
- </row>
- <row>
- <entry><methodname>rewind()</methodname></entry>
- <entry>Resets the entry index to 0</entry>
- </row>
- <row>
- <entry><methodname>valid()</methodname></entry>
- <entry>
- Checks that the current entry index is valid, i.e.
- it does fall below 0 and does not exceed the number
- of entries existing.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtensions()</methodname></entry>
- <entry>
- Returns an array of all Extension objects loaded for
- the current feed (note: both feed-level and entry-level Extensions
- exist, and only feed-level Extensions are returned here).
- The array keys are of the form {ExtensionName}_Feed.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtension(string $name)</methodname></entry>
- <entry>
- Returns an Extension object for the feed registered under the
- provided name. This allows more fine-grained access to
- Extensions which may otherwise be hidden within the implementation
- of the standard <acronym>API</acronym> methods.
- </entry>
- </row>
- <row>
- <entry><methodname>getType()</methodname></entry>
- <entry>
- Returns a static class constant (e.g.
- <constant>Zend_Feed_Reader::TYPE_ATOM_03</constant>,
- i.e. Atom 0.3) indicating exactly what kind of feed
- is being consumed.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </sect2>
- <sect2 id="zend.feed.reader.entry">
- <title>Retrieving Entry/Item Information</title>
- <para>
- Retrieving information for specific entries or items (depending on
- whether you speak Atom or <acronym>RSS</acronym>) is identical to feed level data.
- Accessing entries is simply a matter of iterating over a Feed object
- or using the <acronym>SPL</acronym> <classname>Iterator</classname> interface Feed
- objects implement and calling the appropriate method on each.
- </para>
- <table>
- <title>Entry Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getId()</methodname></entry>
- <entry>Returns a unique ID for the current entry.</entry>
- </row>
- <row>
- <entry><methodname>getTitle()</methodname></entry>
- <entry>Returns the title of the current entry.</entry>
- </row>
- <row>
- <entry><methodname>getDescription()</methodname></entry>
- <entry>Returns a description of the current entry.</entry>
- </row>
- <row>
- <entry><methodname>getLink()</methodname></entry>
- <entry>
- Returns a <acronym>URI</acronym> to the <acronym>HTML</acronym> version
- of the current entry.
- </entry>
- </row>
- <row>
- <entry><methodname>getPermaLink()</methodname></entry>
- <entry>
- Returns the permanent link to the current entry. In most cases,
- this is the same as using <methodname>getLink()</methodname>.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthors()</methodname></entry>
- <entry>
- Returns an object of type
- <classname>Zend_Feed_Reader_Collection_Author</classname> which is an
- <classname>ArrayObject</classname> whose elements are each simple arrays
- containing any combination of the keys "name", "email" and "uri". Where
- irrelevant to the source data, some of these keys may be omitted.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthor(integer $index = 0)</methodname></entry>
- <entry>
- Returns either the first author known, or with the
- optional <varname>$index</varname> parameter any specific
- index on the array of Authors as described above (returning
- <constant>NULL</constant> if an invalid index).
- </entry>
- </row>
- <row>
- <entry><methodname>getDateCreated()</methodname></entry>
- <entry>
- Returns the date on which the current entry was
- created. Generally only applicable to Atom where it
- represents the date the resource described by an
- Atom 1.0 document was created.
- </entry>
- </row>
- <row>
- <entry><methodname>getDateModified()</methodname></entry>
- <entry>
- Returns the date on which the current entry was last
- modified
- </entry>
- </row>
- <row>
- <entry><methodname>getContent()</methodname></entry>
- <entry>
- Returns the content of the current entry (this has any
- entities reversed if possible assuming the content type is
- <acronym>HTML</acronym>). The description is returned if a
- separate content element does not exist.
- </entry>
- </row>
- <row>
- <entry><methodname>getEnclosure()</methodname></entry>
- <entry>
- Returns an array containing the value of all
- attributes from a multi-media <enclosure> element including
- as array keys: <emphasis>url</emphasis>,
- <emphasis>length</emphasis>, <emphasis>type</emphasis>.
- In accordance with the <acronym>RSS</acronym> Best Practices Profile of
- the <acronym>RSS</acronym> Advisory Board, no support is offers for
- multiple enclosures since such support forms no part of the
- <acronym>RSS</acronym> specification.
- </entry>
- </row>
- <row>
- <entry><methodname>getCommentCount()</methodname></entry>
- <entry>
- Returns the number of comments made on this entry at the
- time the feed was last generated
- </entry>
- </row>
- <row>
- <entry><methodname>getCommentLink()</methodname></entry>
- <entry>
- Returns a <acronym>URI</acronym> pointing to the <acronym>HTML</acronym>
- page where comments can be made on this entry
- </entry>
- </row>
- <row>
- <entry>
- <methodname>getCommentFeedLink([string $type =
- 'atom'|'rss'])</methodname>
- </entry>
- <entry>
- Returns a <acronym>URI</acronym> pointing to a feed of the provided type
- containing all comments for this entry (type defaults to
- Atom/<acronym>RSS</acronym> depending on current feed type).
- </entry>
- </row>
- <row>
- <entry><methodname>getCategories()</methodname></entry>
- <entry>
- Returns a <classname>Zend_Feed_Reader_Collection_Category</classname>
- object containing the details of any categories associated with the
- entry. The supported fields include "term" (the machine readable
- category name), "scheme" (the categorisation scheme and domain for this
- category), and "label" (a <acronym>HTML</acronym> decoded human readable
- category name). Where any of the three fields are absent from the field,
- they are either set to the closest available alternative or, in the case
- of "scheme", set to <constant>NULL</constant>.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- The extended <acronym>API</acronym> for entries is identical to that for feeds with the
- exception of the Iterator methods which are not needed here.
- </para>
- <caution>
- <para>
- There is often confusion over the concepts of modified and
- created dates. In Atom, these are two clearly defined concepts
- (so knock yourself out) but in <acronym>RSS</acronym> they are vague.
- <acronym>RSS</acronym> 2.0
- defines a single <emphasis><pubDate></emphasis> element
- which typically refers to the date this entry was published,
- i.e. a creation date of sorts. This is not always the case, and
- it may change with updates or not. As a result, if you really
- want to check whether an entry has changed, don't rely on the
- results of <methodname>getDateModified()</methodname>. Instead,
- consider tracking the <acronym>MD5</acronym> hash of three other elements
- concatenated, e.g. using <methodname>getTitle()</methodname>,
- <methodname>getDescription()</methodname> and
- <methodname>getContent()</methodname>. If the entry was truly
- updated, this hash computation will give a different result than
- previously saved hashes for the same entry. This is obviously
- content oriented, and will not assist in detecting changes to other
- relevant elements. Atom feeds should not require such steps.
- </para>
- <para>
- Further muddying the
- waters, dates in feeds may follow different standards. Atom and
- Dublin Core dates should follow <acronym>ISO</acronym> 8601,
- and <acronym>RSS</acronym> dates should
- follow <acronym>RFC</acronym> 822 or <acronym>RFC</acronym> 2822
- which is also common. Date methods
- will throw an exception if <classname>Zend_Date</classname>
- cannot load the date string using one of the above standards, or the
- <acronym>PHP</acronym> recognised possibilities for <acronym>RSS</acronym> dates.
- </para>
- </caution>
- <warning>
- <para>
- The values returned from these methods are not validated. This
- means users must perform validation on all retrieved data
- including the filtering of any <acronym>HTML</acronym> such as from
- <methodname>getContent()</methodname> before it is output from
- your application. Remember that most feeds come from external
- sources, and therefore the default assumption should be that
- they cannot be trusted.
- </para>
- </warning>
- <table>
- <title>Extended Entry Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getDomDocument()</methodname></entry>
- <entry>
- Returns the parent DOMDocument object for the
- entire feed (not just the current entry)
- </entry>
- </row>
- <row>
- <entry><methodname>getElement()</methodname></entry>
- <entry>
- Returns the current entry level DOMElement object
- </entry>
- </row>
- <row>
- <entry><methodname>getXpath()</methodname></entry>
- <entry>
- Returns the DOMXPath object used internally to run queries on the
- DOMDocument object (this includes core and Extension namespaces
- pre-registered)
- </entry>
- </row>
- <row>
- <entry><methodname>getXpathPrefix()</methodname></entry>
- <entry>
- Returns the valid <acronym>DOM</acronym> path prefix prepended
- to all XPath queries matching the entry being queried
- </entry>
- </row>
- <row>
- <entry><methodname>getEncoding()</methodname></entry>
- <entry>
- Returns the encoding of the source <acronym>XML</acronym> document
- (note: this cannot account for errors such as the server sending
- documents in a different encoding). The default encoding applied
- in the absence of any other is the <acronym>UTF-8</acronym> encoding of
- Unicode.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtensions()</methodname></entry>
- <entry>
- Returns an array of all Extension objects loaded for
- the current entry (note: both feed-level and entry-level
- Extensions exist, and only entry-level Extensions are returned
- here). The array keys are in the form {ExtensionName}_Entry.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtension(string $name)</methodname></entry>
- <entry>
- Returns an Extension object for the entry registered under the
- provided name. This allows more fine-grained access to
- Extensions which may otherwise be hidden within the implementation
- of the standard <acronym>API</acronym> methods.
- </entry>
- </row>
- <row>
- <entry><methodname>getType()</methodname></entry>
- <entry>
- Returns a static class constant (e.g.
- <constant>Zend_Feed_Reader::TYPE_ATOM_03</constant>,
- i.e. Atom 0.3) indicating exactly what kind
- of feed is being consumed.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </sect2>
- <sect2 id="zend.feed.reader.extending">
- <title>Extending Feed and Entry APIs</title>
- <para>
- Extending <classname>Zend_Feed_Reader</classname> allows you to add
- methods at both the feed and entry level which cover the retrieval
- of information not already supported by
- <classname>Zend_Feed_Reader</classname>. Given the number of
- <acronym>RSS</acronym> and
- Atom extensions that exist, this is a good thing since
- <classname>Zend_Feed_Reader</classname> couldn't possibly add
- everything.
- </para>
- <para>
- There are two types of Extensions possible, those which retrieve
- information from elements which are immediate children of the root
- element (e.g. <command><channel></command> for <acronym>RSS</acronym> or
- <command><feed></command> for Atom) and those who retrieve
- information from child elements of an entry (e.g.
- <command><item></command> for <acronym>RSS</acronym> or
- <command><entry></command> for Atom). On the filesystem these are grouped as
- classes within a namespace based on the extension standard's name. For example,
- internally we have <classname>Zend_Feed_Reader_Extension_DublinCore_Feed</classname>
- and <classname>Zend_Feed_Reader_Extension_DublinCore_Entry</classname>
- classes which are two Extensions implementing Dublin Core
- 1.0 and 1.1 support.
- </para>
- <para>
- Extensions are loaded into <classname>Zend_Feed_Reader</classname>
- using <classname>Zend_Loader_PluginLoader</classname>, so their operation
- will be familiar from other Zend Framework components.
- <classname>Zend_Feed_Reader</classname> already bundles a number of
- these Extensions, however those which are not used internally and
- registered by default (so called Core Extensions) must be registered
- to <classname>Zend_Feed_Reader</classname> before they are used. The
- bundled Extensions include:
- </para>
- <table>
- <title>Core Extensions (pre-registered)</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry>DublinCore (Feed and Entry)</entry>
- <entry>
- Implements support for Dublin Core Metadata Element Set 1.0 and 1.1
- </entry>
- </row>
- <row>
- <entry>Content (Entry only)</entry>
- <entry>Implements support for Content 1.0</entry>
- </row>
- <row>
- <entry>Atom (Feed and Entry)</entry>
- <entry>Implements support for Atom 0.3 and Atom 1.0</entry>
- </row>
- <row>
- <entry>Slash</entry>
- <entry>
- Implements support for the Slash <acronym>RSS</acronym> 1.0 module
- </entry>
- </row>
- <row>
- <entry>WellFormedWeb</entry>
- <entry>Implements support for the Well Formed Web CommentAPI 1.0</entry>
- </row>
- <row>
- <entry>Thread</entry>
- <entry>
- Implements support for Atom Threading Extensions as described
- in <acronym>RFC</acronym> 4685
- </entry>
- </row>
- <row>
- <entry>Podcast</entry>
- <entry>
- Implements support for the Podcast 1.0 <constant>DTD</constant> from
- Apple
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- The Core Extensions are somewhat special since they are extremely
- common and multi-faceted. For example, we have a Core Extension for Atom.
- Atom is implemented as an Extension (not just a base class) because it
- doubles as a valid <acronym>RSS</acronym> module - you can insert
- Atom elements into <acronym>RSS</acronym> feeds. I've even seen
- <acronym>RDF</acronym> feeds which use a lot of Atom in place of more
- common Extensions like Dublin Core.
- </para>
- <table>
- <title>Non-Core Extensions (must register manually)</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry>Syndication</entry>
- <entry>
- Implements Syndication 1.0 support for <acronym>RSS</acronym> feeds
- </entry>
- </row>
- <row>
- <entry>CreativeCommons</entry>
- <entry>
- A <acronym>RSS</acronym> module that adds an element at the
- <channel> or <item> level that specifies which Creative
- Commons license applies.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- The additional non-Core Extensions are offered but not registered to
- <classname>Zend_Feed_Reader</classname> by default. If you want to
- use them, you'll need to tell
- <classname>Zend_Feed_Reader</classname> to load them in advance of
- importing a feed. Additional non-Core Extensions will be included
- in future iterations of the component.
- </para>
- <para>
- Registering an Extension with
- <classname>Zend_Feed_Reader</classname>, so it is loaded and its <acronym>API</acronym>
- is available to Feed and Entry objects, is a simple affair using the
- <classname>Zend_Loader_PluginLoader</classname>. Here we register
- the optional Slash Extension, and discover that it can be directly
- called from the Entry level <acronym>API</acronym> without any effort. Note that
- Extension names are case sensitive and use camel casing for multiple
- terms.
- </para>
- <programlisting language="php"><![CDATA[
- Zend_Feed_Reader::registerExtension('Syndication');
- $feed = Zend_Feed_Reader::import('http://rss.slashdot.org/Slashdot/slashdot');
- $updatePeriod = $feed->current()->getUpdatePeriod();
- ]]></programlisting>
- <para>
- In the simple example above, we checked how frequently a feed is being updated
- using the <methodname>getUpdatePeriod()</methodname>
- method. Since it's not part of
- <classname>Zend_Feed_Reader</classname>'s core <acronym>API</acronym>, it could only be
- a method supported by the newly registered Syndication Extension.
- </para>
- <para>
- As you can also notice, the new methods from Extensions are accessible from the main
- <acronym>API</acronym> using <acronym>PHP</acronym>'s magic methods. As an alternative,
- you can also directly access any Extension object for a similar result as seen below.
- </para>
- <programlisting language="php"><![CDATA[
- Zend_Feed_Reader::registerExtension('Syndication');
- $feed = Zend_Feed_Reader::import('http://rss.slashdot.org/Slashdot/slashdot');
- $syndication = $feed->getExtension('Syndication');
- $updatePeriod = $syndication->getUpdatePeriod();
- ]]></programlisting>
- <sect3 id="zend.feed.reader.extending.feed">
- <title>Writing Zend_Feed_Reader Extensions</title>
- <para>
- Inevitably, there will be times when the
- <classname>Zend_Feed_Reader</classname> <acronym>API</acronym> is just not capable
- of getting something you need from a feed or entry. You can use
- the underlying source objects, like DOMDocument, to get these by hand however
- there is a more reusable method available by writing Extensions
- supporting these new queries.
- </para>
- <para>
- As an example, let's take the case of a purely fictitious
- corporation named Jungle Books. Jungle Books have been
- publishing a lot of reviews on books they sell (from external
- sources and customers), which are distributed as an <acronym>RSS</acronym> 2.0
- feed. Their marketing department realises that web applications
- using this feed cannot currently figure out exactly what book is
- being reviewed. To make life easier for everyone, they determine
- that the geek department needs to extend <acronym>RSS</acronym> 2.0 to include a
- new element per entry supplying the <acronym>ISBN</acronym>-10 or
- <acronym>ISBN</acronym>-13 number of
- the publication the entry concerns. They define the new
- <command><isbn></command> element quite simply with a standard
- name and namespace <acronym>URI</acronym>:
- </para>
- <programlisting language="php"><![CDATA[
- JungleBooks 1.0:
- http://example.com/junglebooks/rss/module/1.0/
- ]]></programlisting>
- <para>
- A snippet of <acronym>RSS</acronym> containing this extension in practice could be
- something similar to:
- </para>
- <programlisting language="php"><![CDATA[
- <?xml version="1.0" encoding="utf-8" ?>
- <rss version="2.0"
- xmlns:content="http://purl.org/rss/1.0/modules/content/"
- xmlns:jungle="http://example.com/junglebooks/rss/module/1.0/">
- <channel>
- <title>Jungle Books Customer Reviews</title>
- <link>http://example.com/junglebooks</link>
- <description>Many book reviews!</description>
- <pubDate>Fri, 26 Jun 2009 19:15:10 GMT</pubDate>
- <jungle:dayPopular>
- http://example.com/junglebooks/book/938
- </jungle:dayPopular>
- <item>
- <title>Review Of Flatland: A Romance of Many Dimensions</title>
- <link>http://example.com/junglebooks/review/987</link>
- <author>Confused Physics Student</author>
- <content:encoded>
- A romantic square?!
- </content:encoded>
- <pubDate>Thu, 25 Jun 2009 20:03:28 -0700</pubDate>
- <jungle:isbn>048627263X</jungle:isbn>
- </item>
- </channel>
- </rss>
- ]]></programlisting>
- <para>
- Implementing this new <acronym>ISBN</acronym> element as a simple entry level
- extension would require the following class (using your own class
- namespace outside of Zend).
- </para>
- <programlisting language="php"><![CDATA[
- class My_FeedReader_Extension_JungleBooks_Entry
- extends Zend_Feed_Reader_Extension_EntryAbstract
- {
- public function getIsbn()
- {
- if (isset($this->_data['isbn'])) {
- return $this->_data['isbn'];
- }
- $isbn = $this->_xpath->evaluate(
- 'string(' . $this->getXpathPrefix() . '/jungle:isbn)'
- );
- if (!$isbn) {
- $isbn = null;
- }
- $this->_data['isbn'] = $isbn;
- return $this->_data['isbn'];
- }
- protected function _registerNamespaces()
- {
- $this->_xpath->registerNamespace(
- 'jungle', 'http://example.com/junglebooks/rss/module/1.0/'
- );
- }
- }
- ]]></programlisting>
- <para>
- This extension is easy enough to follow. It creates a new method
- <methodname>getIsbn()</methodname> which runs an XPath query on
- the current entry to extract the <acronym>ISBN</acronym> number enclosed by the
- <command><jungle:isbn></command> element. It can optionally
- store this to the internal non-persistent cache (no need to keep
- querying the <acronym>DOM</acronym> if it's called again on the same entry). The
- value is returned to the caller. At the end we have a protected
- method (it's abstract so it must exist) which registers the
- Jungle Books namespace for their custom <acronym>RSS</acronym> module. While we
- call this an <acronym>RSS</acronym> module, there's nothing to prevent the same
- element being used in Atom feeds - and all Extensions which use
- the prefix provided by <methodname>getXpathPrefix()</methodname>
- are actually neutral and work on <acronym>RSS</acronym> or Atom feeds with no
- extra code.
- </para>
- <para>
- Since this Extension is stored outside of Zend Framework, you'll
- need to register the path prefix for your Extensions so
- <classname>Zend_Loader_PluginLoader</classname> can find them.
- After that, it's merely a matter of registering the Extension,
- if it's not already loaded, and using it in practice.
- </para>
- <programlisting language="php"><![CDATA[
- if(!Zend_Feed_Reader::isRegistered('JungleBooks')) {
- Zend_Feed_Reader::addPrefixPath(
- 'My_FeedReader_Extension', '/path/to/My/FeedReader/Extension'
- );
- Zend_Feed_Reader::registerExtension('JungleBooks');
- }
- $feed = Zend_Feed_Reader::import('http://example.com/junglebooks/rss');
- // ISBN for whatever book the first entry in the feed was concerned with
- $firstIsbn = $feed->current()->getIsbn();
- ]]></programlisting>
- <para>
- Writing a feed level Extension is not much different. The
- example feed from earlier included an unmentioned
- <command><jungle:dayPopular></command> element which Jungle
- Books have added to their standard to include a link to the
- day's most popular book (in terms of visitor traffic). Here's
- an Extension which adds a
- <methodname>getDaysPopularBookLink()</methodname> method to the
- feel level <acronym>API</acronym>.
- </para>
- <programlisting language="php"><![CDATA[
- class My_FeedReader_Extension_JungleBooks_Feed
- extends Zend_Feed_Reader_Extension_FeedAbstract
- {
- public function getDaysPopularBookLink()
- {
- if (isset($this->_data['dayPopular'])) {
- return $this->_data['dayPopular'];
- }
- $dayPopular = $this->_xpath->evaluate(
- 'string(' . $this->getXpathPrefix() . '/jungle:dayPopular)'
- );
- if (!$dayPopular) {
- $dayPopular = null;
- }
- $this->_data['dayPopular'] = $dayPopular;
- return $this->_data['dayPopular'];
- }
- protected function _registerNamespaces()
- {
- $this->_xpath->registerNamespace(
- 'jungle', 'http://example.com/junglebooks/rss/module/1.0/'
- );
- }
- }
- ]]></programlisting>
- <para>
- Let's repeat the last example using a custom Extension to show the
- method being used.
- </para>
- <programlisting language="php"><![CDATA[
- if(!Zend_Feed_Reader::isRegistered('JungleBooks')) {
- Zend_Feed_Reader::addPrefixPath(
- 'My_FeedReader_Extension', '/path/to/My/FeedReader/Extension'
- );
- Zend_Feed_Reader::registerExtension('JungleBooks');
- }
- $feed = Zend_Feed_Reader::import('http://example.com/junglebooks/rss');
- // URI to the information page of the day's most popular book with visitors
- $daysPopularBookLink = $feed->getDaysPopularBookLink();
- // ISBN for whatever book the first entry in the feed was concerned with
- $firstIsbn = $feed->current()->getIsbn();
- ]]></programlisting>
- <para>
- Going through these examples, you'll note that we don't register
- feed and entry Extensions separately. Extensions within the same
- standard may or may not include both a feed and entry class, so
- <classname>Zend_Feed_Reader</classname> only requires you to
- register the overall parent name, e.g. JungleBooks, DublinCore,
- Slash. Internally, it can check at what level Extensions exist
- and load them up if found. In our case, we have a full set of
- Extensions now: <classname>JungleBooks_Feed</classname> and
- <classname>JungleBooks_Entry</classname>.
- </para>
- </sect3>
- </sect2>
- </sect1>
|