| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591 |
- <?xml version="1.0" encoding="UTF-8"?>
- <!-- Reviewed: no -->
- <sect1 id="zend.feed.reader">
- <title>Zend_Feed_Reader</title>
- <sect2 id="zend.feed.reader.introduction">
- <title>Introduction</title>
- <para>
- <classname>Zend_Feed_Reader</classname> is a component used to
- consume <acronym>RSS</acronym> and Atom feeds of any version, including
- <acronym>RDF</acronym>/<acronym>RSS</acronym> 1.0,
- <acronym>RSS</acronym> 2.0 and Atom 0.3/1.0. The <acronym>API</acronym> for
- retrieving feed data is
- deliberately simple since <classname>Zend_Feed_Reader</classname> is
- capable of searching any feed of any type for the information
- requested through the <acronym>API</acronym>. If the typical elements containing this
- information are not present, it will adapt and fall back on a
- variety of alternative elements instead. This ability to choose from
- alternatives removes the need for users to create their own
- abstraction layer on top of the component to make it useful or have
- any in-depth knowledge of the underlying standards, current
- alternatives, and namespaced extensions.
- </para>
- <para>
- Internally, <classname>Zend_Feed_Reader</classname> works almost
- entirely on the basis of making XPath queries against the feed <acronym>XML</acronym>'s
- Document Object Model. The <acronym>DOM</acronym> is not exposed though a chained
- property <acronym>API</acronym> like <classname>Zend_Feed</classname> though the
- underlying <classname>DOMDocument</classname>,
- <classname>DOMElement</classname> and
- <classname>DOMXPath</classname> objects are exposed for external
- manipulation. This singular approach to parsing is consistent and
- the component offers a plugin system to add to the Feed and Entry
- level <acronym>API</acronym> by writing Extensions on a similar basis.
- </para>
- <para>
- Performance is assisted in three ways. First of all,
- <classname>Zend_Feed_Reader</classname> supports caching using
- <classname>Zend_Cache</classname> to maintain a copy of the original
- feed <acronym>XML</acronym>. This allows you to skip network requests for a feed
- <acronym>URI</acronym> if
- the cache is valid. Second, the Feed and Entry level <acronym>API</acronym> is backed
- by an internal cache (non-persistant) so repeat <acronym>API</acronym> calls for the
- same feed will avoid additional <acronym>DOM</acronym>/XPath use. Thirdly, importing
- feeds from a <acronym>URI</acronym> can take advantage of
- <acronym>HTTP</acronym> Conditional GET requests
- which allow servers to issue an empty 304 response when the
- requested feed has not changed since the last time you requested it.
- In the final case, an instance of <classname>Zend_Cache</classname>
- will hold the last received feed along with the ETag and
- Last-Modified header values sent in the <acronym>HTTP</acronym> response.
- </para>
- <para>
- In relation to <classname>Zend_Feed</classname>,
- <classname>Zend_Feed_Reader</classname> was formulated as a free
- standing replacement for <classname>Zend_Feed</classname> but it is
- not backwards compatible with <classname>Zend_Feed</classname>.
- Rather it is an alternative following a different ideology focused
- on being simple to use, flexible, consistent and extendable through
- the plugin system. <classname>Zend_Feed_Reader</classname> is also
- not capable of constructing feeds and delegates this responsibility
- to <classname>Zend_Feed_Writer</classname>, its sibling in arms.
- </para>
- </sect2>
- <sect2 id="zend.feed.reader.import">
- <title>Importing Feeds</title>
- <para>
- Importing a feed with <classname>Zend_Feed_Reader</classname> is not
- that much different to <classname>Zend_Feed</classname>. Feeds can
- be imported from a string, file, <acronym>URI</acronym> or an instance of type
- <classname>Zend_Feed_Abstract</classname>. Importing from a <acronym>URI</acronym> can
- additionally utilise a <acronym>HTTP</acronym> Conditional GET request. If importing
- fails, an exception will be raised. The end result will be an object
- of type <classname>Zend_Feed_Reader_FeedInterface</classname>, the
- core implementations of which are
- <classname>Zend_Feed_Reader_Feed_Rss</classname> and
- <classname>Zend_Feed_Reader_Feed_Atom</classname>
- (<classname>Zend_Feed</classname> took all the short names!). Both
- objects support multiple (all existing) versions of these broad feed
- types.
- </para>
- <para>
- In the following example, we import an <acronym>RDF</acronym>/<acronym>RSS</acronym> 1.0
- feed and extract some basic information that can be saved to a database or
- elsewhere.
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- $data = array(
- 'title' => $feed->getTitle(),
- 'link' => $feed->getLink(),
- 'dateModified' => $feed->getDateModified(),
- 'description' => $feed->getDescription(),
- 'language' => $feed->getLanguage(),
- 'entries' => array(),
- );
- foreach ($feed as $entry) {
- $edata = array(
- 'title' => $entry->getTitle(),
- 'description' => $entry->getDescription(),
- 'dateModified' => $entry->getDateModified(),
- 'authors' => $entry->getAuthors(),
- 'link' => $entry->getLink(),
- 'content' => $entry->getContent()
- );
- $data['entries'][] = $edata;
- }
- ]]></programlisting>
- <para>
- The example above demonstrates
- <classname>Zend_Feed_Reader</classname>'s <acronym>API</acronym>, and it also
- demonstrates some of its internal operation. In reality, the <acronym>RDF</acronym>
- feed selected does not have any native date or author elements,
- however it does utilise the Dublin Core 1.1 module which offers
- namespaced creator and date elements.
- <classname>Zend_Feed_Reader</classname> falls back on these and
- similar options if no relevant native elements exist. If it
- absolutely cannot find an alternative it will return <constant>NULL</constant>,
- indicating the information could not be found in the feed. You
- should note that classes implementing
- <classname>Zend_Feed_Reader_FeedInterface</classname> also implement
- the <acronym>SPL</acronym> <classname>Iterator</classname> and
- <classname>Countable</classname> interfaces.
- </para>
- <para>
- Feeds can also be imported from strings, files, and even objects of
- type <classname>Zend_Feed_Abstract</classname>.
- </para>
- <programlisting language="php"><![CDATA[
- // from a URI
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- // from a String
- $feed = Zend_Feed_Reader::importString($feedXmlString);
- // from a file
- $feed = Zend_Feed_Reader::importFile('./feed.xml');
- // from a Zend_Feed_Abstract object
- $zfeed = Zend_Feed::import('http://www.planet-php.net/atom/');
- $feed = Zend_Feed_Reader::importFeed($zfeed);
- ]]></programlisting>
- </sect2>
- <sect2 id="zend.feed.reader.sources">
- <title>Retrieving Underlying Feed and Entry Sources</title>
- <para>
- <classname>Zend_Feed_Reader</classname> does its best not to stick
- you in a narrow confine. If you need to work on a feed outside of
- <classname>Zend_Feed_Reader</classname>, you can extract the base
- <classname>DOMDocument</classname> or
- <classname>DOMElement</classname> objects from any class, or even an
- <acronym>XML</acronym> string containing these. Also provided are methods to extract
- the current <classname>DOMXPath</classname> object (with all core
- and Extension namespaces registered) and the correct prefix used in
- all XPath queries for the current Feed or Entry. The basic methods
- to use (on any object) are <methodname>saveXml()</methodname>,
- <methodname>getDomDocument()</methodname>,
- <methodname>getElement()</methodname>,
- <methodname>getXpath()</methodname> and
- <methodname>getXpathPrefix()</methodname>. These will let you break
- free of <classname>Zend_Feed_Reader</classname> and do whatever else
- you want.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <methodname>saveXml()</methodname> returns an <acronym>XML</acronym> string
- containing only the element representing the current object.
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getDomDocument()</methodname> returns the
- <classname>DOMDocument</classname> object representing the
- entire feed (even if called from an Entry object).
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getElement()</methodname> returns the
- <classname>DOMElement</classname> of the current object
- (i.e. the Feed or current Entry).
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getXpath()</methodname> returns the
- <classname>DOMXPath</classname> object for the current feed
- (even if called from an Entry object) with the namespaces of
- the current feed type and all loaded Extensions
- pre-registered.
- </para>
- </listitem>
- <listitem>
- <para>
- <methodname>getXpathPrefix()</methodname> returns the query
- prefix for the current object (i.e. the Feed or current
- Entry) which includes the correct XPath query path for that
- specific Feed or Entry.
- </para>
- </listitem>
- </itemizedlist>
- <para>
- Here's an example where a feed might include an <acronym>RSS</acronym> Extension not
- supported by <classname>Zend_Feed_Reader</classname> out of the box.
- Notably, you could write and register an Extension (covered later)
- to do this, but that's not always warranted for a quick check. You
- must register any new namespaces on the
- <classname>DOMXPath</classname> object before use unless they are
- registered by <classname>Zend_Feed_Reader</classname> or an
- Extension beforehand.
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- $xpathPrefix = $feed->getXpathPrefix();
- $xpath = $feed->getXpath();
- $xpath->registerNamespace('admin', 'http://webns.net/mvcb/');
- $reportErrorsTo = $xpath->evaluate('string('
- . $xpathPrefix
- . '/admin:errorReportsTo)');
- ]]></programlisting>
- <warning>
- <para>
- If you register an already registered namespace with a different
- prefix name to that used internally by
- <classname>Zend_Feed_Reader</classname>, it will break the
- internal operation of this component.
- </para>
- </warning>
- </sect2>
- <sect2 id="zend.feed.reader.cache-request">
- <title>Cache Support and Intelligent Requests</title>
- <sect3 id="zend.feed.reader.cache-request.cache">
- <title>Adding Cache Support to Zend_Feed_Reader</title>
- <para>
- <classname>Zend_Feed_Reader</classname> supports using an
- instance of <classname>Zend_Cache</classname> to cache feeds (as
- <acronym>XML</acronym>) to avoid unnecessary network requests. Adding a cache is as
- simple here as it is for other Zend Framework components, create
- and configure your cache and then tell
- <classname>Zend_Feed_Reader</classname> to use it! The cache key
- used is "<classname>Zend_Feed_Reader_</classname>" followed by the
- <acronym>MD5</acronym> hash of the feed's <acronym>URI</acronym>.
- </para>
- <programlisting language="php"><![CDATA[
- $frontendOptions = array(
- 'lifetime' => 7200,
- 'automatic_serialization' => true
- );
- $backendOptions = array('cache_dir' => './tmp/');
- $cache = Zend_Cache::factory(
- 'Core', 'File', $frontendOptions, $backendOptions
- );
- Zend_Feed_Reader::setCache($cache);
- ]]></programlisting>
- <note>
- <para>
- While it's a little off track, you should also consider
- adding a cache to
- <classname>Zend_Loader_PluginLoader</classname> which is
- used by <classname>Zend_Feed_Reader</classname> to load
- Extensions.
- </para>
- </note>
- </sect3>
- <sect3 id="zend.feed.reader.cache-request.http-conditional-get">
- <title>HTTP Conditional GET Support</title>
- <para>
- The big question often asked when importing a feed frequently, is
- if it has even changed. With a cache enabled, you can add <acronym>HTTP</acronym>
- Conditional GET support to your arsenal to answer that question.
- </para>
- <para>
- Using this method, you can request feeds from <acronym>URI</acronym>s and include
- their last known ETag and Last-Modified response header values
- with the request (using the If-None-Match and If-Modified-Since
- headers). If the feed on the server remains unchanged, you
- should receive a 304 response which tells
- <classname>Zend_Feed_Reader</classname> to use the cached
- version. If a full feed is sent in a response with a status code
- of 200, this means the feed has changed and
- <classname>Zend_Feed_Reader</classname> will parse the new
- version and save it to the cache. It will also cache the new
- ETag and Last-Modified header values for future use.
- </para>
- <para>
- These "conditional" requests are not guaranteed to be supported
- by the server you request a <acronym>URI</acronym> of, but can be attempted
- regardless. Most common feed sources like blogs should however
- have this supported. To enable conditional requests, you will
- need to provide a cache to <classname>Zend_Feed_Reader</classname>.
- </para>
- <programlisting language="php"><![CDATA[
- $frontendOptions = array(
- 'lifetime' => 86400,
- 'automatic_serialization' => true
- );
- $backendOptions = array('cache_dir' => './tmp/');
- $cache = Zend_Cache::factory(
- 'Core', 'File', $frontendOptions, $backendOptions
- );
- Zend_Feed_Reader::setCache($cache);
- Zend_Feed_Reader::useHttpConditionalGet();
- $feed = Zend_Feed_Reader::import('http://www.planet-php.net/rdf/');
- ]]></programlisting>
- <para>
- In the example above, with <acronym>HTTP</acronym> Conditional GET requests enabled,
- the response header values for ETag and Last-Modified will be cached
- along with the feed. For the next 24hrs (the cache lifetime), feeds will
- only be updated on the cache if a non-304 response is received
- containing a valid <acronym>RSS</acronym> or Atom <acronym>XML</acronym> document.
- </para>
- <para>
- If you intend on managing request headers from outside
- <classname>Zend_Feed_Reader</classname>, you can set the
- relevant If-None-Matches and If-Modified-Since request headers
- via the <acronym>URI</acronym> import method.
- </para>
- <programlisting language="php"><![CDATA[
- $lastEtagReceived = '5e6cefe7df5a7e95c8b1ba1a2ccaff3d';
- $lastModifiedDateReceived = 'Wed, 08 Jul 2009 13:37:22 GMT';
- $feed = Zend_Feed_Reader::import(
- $uri, $lastEtagReceived, $lastModifiedDateReceived
- );
- ]]></programlisting>
- </sect3>
- </sect2>
- <sect2 id="zend.feed.reader.locate">
- <title>Locating Feed URIs from Websites</title>
- <para>
- These days, many websites are aware that the location of their <acronym>XML</acronym>
- feeds is not always obvious. A small <acronym>RDF</acronym>, <acronym>RSS</acronym> or
- Atom graphic helps when the user is reading the page, but what about when a machine
- visits trying to identify where your feeds are located? To assist in
- this, websites may point to their feeds using <link> tags in
- the <head> section of their <acronym>HTML</acronym>. To take advantage of this,
- you can use <classname>Zend_Feed_Reader</classname> to locate these
- feeds using the static <methodname>findFeedLinks()</methodname>
- method.
- </para>
- <para>
- This method calls any <acronym>URI</acronym> and searches for the location of
- <acronym>RSS</acronym>, <acronym>RDF</acronym>
- and Atom feeds assuming the website's <acronym>HTML</acronym> contains the relevant
- links. It then returns a value object where you can check for the existence of a
- <acronym>RSS</acronym>, <acronym>RDF</acronym> or Atom feed <acronym>URI</acronym>.
- </para>
- <para>
- The returned object is an <classname>ArrayObject</classname> subclass
- called <classname>Zend_Feed_Reader_Collection_FeedLink</classname> so you can cast
- it to an array, or iterate over it, to access all the detected links.
- However, as a simple shortcut, you can just grab the first RSS, RDF
- or Atom link using its public properties as in the example below. Otherwise,
- each element of the <classname>ArrayObject</classname> is a simple array
- with the keys "type" and "uri" where the type is one of "rdf", "rss" or
- "atom".
- </para>
- <programlisting language="php"><![CDATA[
- $links = Zend_Feed_Reader::findFeedLinks('http://www.planet-php.net');
- if(isset($links->rdf)) {
- echo $links->rdf, "\n"; // http://www.planet-php.org/rdf/
- }
- if(isset($links->rss)) {
- echo $links->rss, "\n"; // http://www.planet-php.org/rss/
- }
- if(isset($links->atom)) {
- echo $links->atom, "\n"; // http://www.planet-php.org/atom/
- }
- ]]></programlisting>
- <para>
- Based on these links, you can then import from whichever source you
- wish in the usual manner.
- </para>
- <para>
- This quick method only gives you one link for each feed type, but
- websites may indicate many links of any type. Perhaps it's a news
- site with a RSS feed for each news category. You can iterate over
- all links using the ArrayObject's iterator.
- </para>
- <programlisting language="php"><![CDATA[
- $links = Zend_Feed_Reader::findFeedLinks('http://www.planet-php.net');
- foreach ($links as $link) {
- echo $link['uri'], "\n";
- }
- ]]></programlisting>
- </sect2>
- <sect2 id="zend.feed.reader.attribute-collections">
- <title>Attribute Collections</title>
- <para>
- In an attempt to simplify return types, with Zend Framework 1.10 return
- types from the various feed and entry level methods may include an object
- of type <classname>Zend_Feed_Reader_Collection_CollectionAbstract</classname>.
- Despite the special class name which I'll explain below, this is just a simple
- subclass of SPL's <classname>ArrayObject</classname>.
- </para>
- <para>
- The main purpose here is to allow the presentation of as much data as possible
- from the requested elements, while still allowing access to the most relevant
- data as a simple array. This also enforces a standard approach to returning
- such data which previously may have wandered between arrays and objects.
- </para>
- <para>
- The new class type acts identically to <classname>ArrayObject</classname>
- with the sole addition being a new method <methodname>getValues()</methodname>
- which returns a simple flat array containing the most relevant information.
- </para>
- <para>
- A simple example of this is
- <methodname>Zend_Feed_Reader_FeedInterface::getCategories()</methodname>. When used with
- any RSS or Atom feed, this method will return category data as a container object called
- <classname>Zend_Feed_Reader_Collection_Category</classname>. The container object will
- contain, per category, three fields of data: term, scheme and label. The "term" is the
- basic category name, often machine readable (i.e. plays nice with URIs). The scheme
- represents a categorisation scheme (usually a URI identifier) also known as a "domain"
- in RSS 2.0. The "label" is a human readable category name which supports html entities.
- In RSS 2.0, there is no label attribute so it is always set to the same value as the
- term for convenience.
- </para>
- <para>
- To access category labels by themselves in a simple value array,
- you might commit to something like:
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.example.com/atom.xml');
- $categories = $feed->getCategories();
- $labels = array();
- foreach ($categories as $cat) {
- $labels[] = $cat['label']
- }
- ]]></programlisting>
- <para>
- It's a contrived example, but the point is that the labels are tied up with
- other information.
- </para>
- <para>
- However, the container class allows you to access the "most relevant" data
- as a simple array using the <methodname>getValues()</methodname> method. The concept
- of "most relevant" is obviously a judgement call. For categories it means the category
- labels (not the terms or schemes) while for authors it would be the authors' names
- (not their email addresses or URIs). The simple array is flat (just values) and passed
- through <methodname>array_unique()</methodname> to remove duplication.
- </para>
- <programlisting language="php"><![CDATA[
- $feed = Zend_Feed_Reader::import('http://www.example.com/atom.xml');
- $categories = $feed->getCategories();
- $labels = $categories->getValues();
- ]]></programlisting>
- <para>
- The above example shows how to extract only labels and nothing else thus
- giving simple access to the category labels without any additional work to extract
- that data by itself.
- </para>
- </sect2>
- <sect2 id="zend.feed.reader.retrieve-info">
- <title>Retrieving Feed Information</title>
- <para>
- Retrieving information from a feed (we'll cover entries/items in the
- next section though they follow identical principals) uses a clearly
- defined <acronym>API</acronym> which is exactly the same regardless of whether the feed
- in question is <acronym>RSS</acronym>/<acronym>RDF</acronym>/Atom. The same goes for
- sub-versions of these standards and we've tested every single
- <acronym>RSS</acronym> and Atom version. While
- the underlying feed <acronym>XML</acronym> can differ substantially in terms of the
- tags and elements they present, they nonetheless are all trying to
- convey similar information and to reflect this all the differences
- and wrangling over alternative tags are handled internally by
- <classname>Zend_Feed_Reader</classname> presenting you with an
- identical interface for each. Ideally, you should not have to care
- whether a feed is <acronym>RSS</acronym> or Atom so long as you can extract the
- information you want.
- </para>
- <note>
- <para>
- While determining common ground between feed types is itself complex, it
- should be noted that RSS in particular is a constantly disputed "specification".
- This has its roots in the original RSS 2.0 document which contains ambiguities
- and does not detail the correct treatment of all elements. As a result, this
- component rigorously applies the RSS 2.0.11 Specification published by the
- RSS Advisory Board and its accompanying RSS Best Practices Profile. No
- other interpretation of RSS 2.0 will be supported though exceptions may
- be allowed where it does not directly prevent the application of the two
- documents mentioned above.
- </para>
- </note>
- <para>
- Of course, we don't live in an ideal world so there may be times the
- <acronym>API</acronym> just does not cover what you're looking for. To assist you,
- <classname>Zend_Feed_Reader</classname> offers a plugin system which
- allows you to write Extensions to expand the core <acronym>API</acronym> and cover any
- additional data you are trying to extract from feeds. If writing
- another Extension is too much trouble, you can simply grab the
- underlying <acronym>DOM</acronym> or XPath objects and do it by hand in your
- application. Of course, we really do encourage writing an Extension
- simply to make it more portable and reusable, and useful Extensions may be proposed
- to the Framework for formal addition.
- </para>
- <para>
- Here's a summary of the Core <acronym>API</acronym> for Feeds. You should note it
- comprises not only the basic <acronym>RSS</acronym> and Atom standards, but also
- accounts for a number of included Extensions bundled with
- <classname>Zend_Feed_Reader</classname>. The naming of these
- Extension sourced methods remain fairly generic - all Extension
- methods operate at the same level as the Core <acronym>API</acronym> though we do allow
- you to retrieve any specific Extension object separately if required.
- </para>
- <table>
- <title>Feed Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getId()</methodname></entry>
- <entry>Returns a unique ID associated with this feed</entry>
- </row>
- <row>
- <entry><methodname>getTitle()</methodname></entry>
- <entry>Returns the title of the feed</entry>
- </row>
- <row>
- <entry><methodname>getDescription()</methodname></entry>
- <entry>Returns the text description of the feed.</entry>
- </row>
- <row>
- <entry><methodname>getLink()</methodname></entry>
- <entry>
- Returns a <acronym>URI</acronym> to the <acronym>HTML</acronym> website
- containing the same or
- similar information as this feed (i.e. if the feed is from a blog,
- it should provide the blog's <acronym>URI</acronym> where the
- <acronym>HTML</acronym> version of the entries can be read).
- </entry>
- </row>
- <row>
- <entry><methodname>getFeedLink()</methodname></entry>
- <entry>
- Returns the <acronym>URI</acronym> of this feed, which may be the
- same as the <acronym>URI</acronym> used to import the feed. There
- are important cases where the feed link may differ because the source
- URI is being updated and is intended to be removed in the future.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthors()</methodname></entry>
- <entry>
- Returns an object of type
- <classname>Zend_Feed_Reader_Collection_Author</classname> which is an
- <classname>ArrayObject</classname> whose elements are each simple arrays
- containing any combination of the keys "name", "email" and "uri". Where
- irrelevant to the source data, some of these keys may be omitted.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthor(integer $index = 0)</methodname></entry>
- <entry>
- Returns either the first author known, or with the
- optional <varname>$index</varname> parameter any specific
- index on the array of Authors as described above (returning
- <constant>NULL</constant> if an invalid index).
- </entry>
- </row>
- <row>
- <entry><methodname>getDateCreated()</methodname></entry>
- <entry>
- Returns the date on which this feed was created. Generally
- only applicable to Atom where it represents the date the resource
- described by an Atom 1.0 document was created. The returned date
- will be a <classname>Zend_Date</classname> object.
- </entry>
- </row>
- <row>
- <entry><methodname>getDateModified()</methodname></entry>
- <entry>
- Returns the date on which this feed was last modified. The returned date
- will be a <classname>Zend_Date</classname> object.
- </entry>
- </row>
- <row>
- <entry><methodname>getLanguage()</methodname></entry>
- <entry>
- Returns the language of the feed (if defined) or simply the
- language noted in the <acronym>XML</acronym> document.
- </entry>
- </row>
- <row>
- <entry><methodname>getGenerator()</methodname></entry>
- <entry>
- Returns the generator of the feed, e.g. the software which
- generated it. This may differ between <acronym>RSS</acronym> and Atom
- since Atom defines a different notation.
- </entry>
- </row>
- <row>
- <entry><methodname>getCopyright()</methodname></entry>
- <entry>Returns any copyright notice associated with the feed.</entry>
- </row>
- <row>
- <entry><methodname>getHubs()</methodname></entry>
- <entry>
- Returns an array of all Hub Server <acronym>URI</acronym> endpoints
- which are advertised by the feed for use with the Pubsubhubbub
- Protocol, allowing subscriptions to the feed for real-time updates.
- </entry>
- </row>
- <row>
- <entry><methodname>getCategories()</methodname></entry>
- <entry>
- Returns a <classname>Zend_Feed_Reader_Collection_Category</classname>
- object containing the details of any categories associated with the
- overall feed. The supported fields include "term" (the machine readable
- category name), "scheme" (the categorisation scheme/domain for this
- category), and "label" (a html decoded human readable category name).
- Where any of the three fields are absent from the field, they are either
- set to the closest available alternative or, in the case of "scheme",
- set to <constant>NULL</constant>.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- Given the variety of feeds in the wild, some of these methods will
- undoubtedly return <constant>NULL</constant> indicating the relevant information
- couldn't be located. Where possible, <classname>Zend_Feed_Reader</classname>
- will fall back on alternative elements during its search. For
- example, searching an <acronym>RSS</acronym> feed for a modification date is more
- complicated than it looks. <acronym>RSS</acronym> 2.0 feeds should include a
- <command><lastBuildDate></command> tag and (or) a
- <command><pubDate></command> element. But what if it doesn't, maybe
- this is an <acronym>RSS</acronym> 1.0 feed? Perhaps it instead has an
- <command><atom:updated></command> element with identical information
- (Atom may be used to supplement <acronym>RSS</acronym>'s syntax)? Failing that, we
- could simply look at the entries, pick the most recent, and use its
- <command><pubDate></command> element. Assuming it exists... Many
- feeds also use Dublin Core 1.0/1.1 <command><dc:date></command>
- elements for feeds and entries. Or we could find Atom lurking again.
- </para>
- <para>
- The point is, <classname>Zend_Feed_Reader</classname> was designed
- to know this. When you ask for the modification date (or anything
- else), it will run off and search for all these alternatives until
- it either gives up and returns <constant>NULL</constant>, or finds an
- alternative that should have the right answer.
- </para>
- <para>
- In addition to the above methods, all Feed objects implement methods
- for retrieving the <acronym>DOM</acronym> and XPath objects for the current feeds as
- described earlier. Feed objects also implement the <acronym>SPL</acronym> Iterator and
- Countable interfaces. The extended <acronym>API</acronym> is summarised below.
- </para>
- <table>
- <title>Extended Feed Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getDomDocument()</methodname></entry>
- <entry>
- Returns the parent
- <classname>DOMDocument</classname> object for the
- entire source <acronym>XML</acronym> document
- </entry>
- </row>
- <row>
- <entry><methodname>getElement()</methodname></entry>
- <entry>
- Returns the current feed level
- <classname>DOMElement</classname> object
- </entry>
- </row>
- <row>
- <entry><methodname>saveXml()</methodname></entry>
- <entry>
- Returns a string containing an <acronym>XML</acronym> document of the
- entire feed element (this is not the original
- document but a rebuilt version)
- </entry>
- </row>
- <row>
- <entry><methodname>getXpath()</methodname></entry>
- <entry>
- Returns the <classname>DOMXPath</classname> object
- used internally to run queries on the
- <classname>DOMDocument</classname> object (this
- includes core and Extension namespaces
- pre-registered)
- </entry>
- </row>
- <row>
- <entry><methodname>getXpathPrefix()</methodname></entry>
- <entry>
- Returns the valid <acronym>DOM</acronym> path prefix prepended
- to all XPath queries matching the feed being queried
- </entry>
- </row>
- <row>
- <entry><methodname>getEncoding()</methodname></entry>
- <entry>
- Returns the encoding of the source <acronym>XML</acronym> document
- (note: this cannot account for errors such as the
- server sending documents in a different encoding). Where not
- defined, the default UTF-8 encoding of Unicode is applied.
- </entry>
- </row>
- <row>
- <entry><methodname>count()</methodname></entry>
- <entry>
- Returns a count of the entries or items this feed contains
- (implements <acronym>SPL</acronym> <classname>Countable</classname>
- interface)
- </entry>
- </row>
- <row>
- <entry><methodname>current()</methodname></entry>
- <entry>
- Returns either the current entry (using the current index
- from <methodname>key()</methodname>)
- </entry>
- </row>
- <row>
- <entry><methodname>key()</methodname></entry>
- <entry>Returns the current entry index</entry>
- </row>
- <row>
- <entry><methodname>next()</methodname></entry>
- <entry>Increments the entry index value by one</entry>
- </row>
- <row>
- <entry><methodname>rewind()</methodname></entry>
- <entry>Resets the entry index to 0</entry>
- </row>
- <row>
- <entry><methodname>valid()</methodname></entry>
- <entry>
- Checks that the current entry index is valid, i.e.
- it does fall below 0 and does not exceed the number
- of entries existing.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtensions()</methodname></entry>
- <entry>
- Returns an array of all Extension objects loaded for
- the current feed (note: both feed-level and entry-level Extensions
- exist, and only feed-level Extensions are returned here).
- The array keys are of the form {ExtensionName}_Feed.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtension(string $name)</methodname></entry>
- <entry>
- Returns an Extension object for the feed registered under the
- provided name. This allows more fine-grained access to
- Extensions which may otherwise be hidden within the implementation
- of the standard <acronym>API</acronym> methods.
- </entry>
- </row>
- <row>
- <entry><methodname>getType()</methodname></entry>
- <entry>
- Returns a static class constant (e.g.
- <constant>Zend_Feed_Reader::TYPE_ATOM_03</constant>,
- i.e. Atom 0.3) indicating exactly what kind of feed
- is being consumed.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </sect2>
- <sect2 id="zend.feed.reader.entry">
- <title>Retrieving Entry/Item Information</title>
- <para>
- Retrieving information for specific entries or items (depending on
- whether you speak Atom or <acronym>RSS</acronym>) is identical to feed level data.
- Accessing entries is simply a matter of iterating over a Feed object
- or using the <acronym>SPL</acronym> <classname>Iterator</classname> interface Feed
- objects implement and calling the appropriate method on each.
- </para>
- <table>
- <title>Entry Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getId()</methodname></entry>
- <entry>Returns a unique ID for the current entry.</entry>
- </row>
- <row>
- <entry><methodname>getTitle()</methodname></entry>
- <entry>Returns the title of the current entry.</entry>
- </row>
- <row>
- <entry><methodname>getDescription()</methodname></entry>
- <entry>Returns a description of the current entry.</entry>
- </row>
- <row>
- <entry><methodname>getLink()</methodname></entry>
- <entry>
- Returns a <acronym>URI</acronym> to the <acronym>HTML</acronym> version
- of the current entry.
- </entry>
- </row>
- <row>
- <entry><methodname>getPermaLink()</methodname></entry>
- <entry>
- Returns the permanent link to the current entry. In most cases,
- this is the same as using <methodname>getLink()</methodname>.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthors()</methodname></entry>
- <entry>
- Returns an object of type
- <classname>Zend_Feed_Reader_Collection_Author</classname> which is an
- <classname>ArrayObject</classname> whose elements are each simple arrays
- containing any combination of the keys "name", "email" and "uri". Where
- irrelevant to the source data, some of these keys may be omitted.
- </entry>
- </row>
- <row>
- <entry><methodname>getAuthor(integer $index = 0)</methodname></entry>
- <entry>
- Returns either the first author known, or with the
- optional <varname>$index</varname> parameter any specific
- index on the array of Authors as described above (returning
- <constant>NULL</constant> if an invalid index).
- </entry>
- </row>
- <row>
- <entry><methodname>getDateCreated()</methodname></entry>
- <entry>
- Returns the date on which the current entry was
- created. Generally only applicable to Atom where it
- represents the date the resource described by an
- Atom 1.0 document was created.
- </entry>
- </row>
- <row>
- <entry><methodname>getDateModified()</methodname></entry>
- <entry>
- Returns the date on which the current entry was last
- modified
- </entry>
- </row>
- <row>
- <entry><methodname>getContent()</methodname></entry>
- <entry>
- Returns the content of the current entry (this has any
- entities reversed if possible assuming the content type is
- <acronym>HTML</acronym>). The description is returned if a
- separate content element does not exist.
- </entry>
- </row>
- <row>
- <entry><methodname>getEnclosure()</methodname></entry>
- <entry>
- Returns an array containing the value of all
- attributes from a multi-media <enclosure> element including
- as array keys: <emphasis>url</emphasis>,
- <emphasis>length</emphasis>, <emphasis>type</emphasis>.
- In accordance with the RSS Best Practices Profile of the RSS
- Advisory Board, no support is offers for multiple enclosures
- since such support forms no part of the RSS specification.
- </entry>
- </row>
- <row>
- <entry><methodname>getCommentCount()</methodname></entry>
- <entry>
- Returns the number of comments made on this entry at the
- time the feed was last generated
- </entry>
- </row>
- <row>
- <entry><methodname>getCommentLink()</methodname></entry>
- <entry>
- Returns a <acronym>URI</acronym> pointing to the <acronym>HTML</acronym>
- page where comments can be made on this entry
- </entry>
- </row>
- <row>
- <entry>
- <methodname>getCommentFeedLink([string $type =
- 'atom'|'rss'])</methodname>
- </entry>
- <entry>
- Returns a <acronym>URI</acronym> pointing to a feed of the provided type
- containing all comments for this entry (type defaults to
- Atom/<acronym>RSS</acronym> depending on current feed type).
- </entry>
- </row>
- <row>
- <entry><methodname>getCategories()</methodname></entry>
- <entry>
- Returns a <classname>Zend_Feed_Reader_Collection_Category</classname>
- object containing the details of any categories associated with the
- entry. The supported fields include "term" (the machine readable
- category name), "scheme" (the categorisation scheme/domain for this
- category), and "label" (a html decoded human readable category name).
- Where any of the three fields are absent from the field, they are either
- set to the closest available alternative or, in the case of "scheme",
- set to <constant>NULL</constant>.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- The extended <acronym>API</acronym> for entries is identical to that for feeds with the
- exception of the Iterator methods which are not needed here.
- </para>
- <caution>
- <para>
- There is often confusion over the concepts of modified and
- created dates. In Atom, these are two clearly defined concepts
- (so knock yourself out) but in <acronym>RSS</acronym> they are vague.
- <acronym>RSS</acronym> 2.0
- defines a single <emphasis><pubDate></emphasis> element
- which typically refers to the date this entry was published,
- i.e. a creation date of sorts. This is not always the case, and
- it may change with updates or not. As a result, if you really
- want to check whether an entry has changed, don't rely on the
- results of <methodname>getDateModified()</methodname>. Instead,
- consider tracking the <acronym>MD5</acronym> hash of three other elements
- concatenated, e.g. using <methodname>getTitle()</methodname>,
- <methodname>getDescription()</methodname> and
- <methodname>getContent()</methodname>. If the entry was truly
- updated, this hash computation will give a different result than
- previously saved hashes for the same entry. This is obviously
- content oriented, and will not assist in detecting changes to other
- relevant elements. Atom feeds should not require such steps.
- </para>
- <para>
- Further muddying the
- waters, dates in feeds may follow different standards. Atom and
- Dublin Core dates should follow <acronym>ISO</acronym> 8601,
- and <acronym>RSS</acronym> dates should
- follow <acronym>RFC</acronym> 822 or <acronym>RFC</acronym> 2822
- which is also common. Date methods
- will throw an exception if <classname>Zend_Date</classname>
- cannot load the date string using one of the above standards, or the
- <acronym>PHP</acronym> recognised possibilities for <acronym>RSS</acronym> dates.
- </para>
- </caution>
- <warning>
- <para>
- The values returned from these methods are not validated. This
- means users must perform validation on all retrieved data
- including the filtering of any <acronym>HTML</acronym> such as from
- <methodname>getContent()</methodname> before it is output from
- your application. Remember that most feeds come from external
- sources, and therefore the default assumption should be that
- they cannot be trusted.
- </para>
- </warning>
- <table>
- <title>Extended Entry Level API Methods</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry><methodname>getDomDocument()</methodname></entry>
- <entry>
- Returns the parent
- <classname>DOMDocument</classname> object for the
- entire feed (not just the current entry)
- </entry>
- </row>
- <row>
- <entry><methodname>getElement()</methodname></entry>
- <entry>
- Returns the current entry level
- <classname>DOMElement</classname> object
- </entry>
- </row>
- <row>
- <entry><methodname>getXpath()</methodname></entry>
- <entry>
- Returns the <classname>DOMXPath</classname> object
- used internally to run queries on the
- <classname>DOMDocument</classname> object (this
- includes core and Extension namespaces
- pre-registered)
- </entry>
- </row>
- <row>
- <entry><methodname>getXpathPrefix()</methodname></entry>
- <entry>
- Returns the valid <acronym>DOM</acronym> path prefix prepended
- to all XPath queries matching the entry being queried
- </entry>
- </row>
- <row>
- <entry><methodname>getEncoding()</methodname></entry>
- <entry>
- Returns the encoding of the source <acronym>XML</acronym> document
- (note: this cannot account for errors such as the server sending
- documents in a different encoding). The default encoding applied
- in the absence of any other is the UTF-8 encoding of Unicode.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtensions()</methodname></entry>
- <entry>
- Returns an array of all Extension objects loaded for
- the current entry (note: both feed-level and entry-level
- Extensions exist, and only entry-level Extensions are returned
- here). The array keys are in the form {ExtensionName}_Entry.
- </entry>
- </row>
- <row>
- <entry><methodname>getExtension(string $name)</methodname></entry>
- <entry>
- Returns an Extension object for the entry registered under the
- provided name. This allows more fine-grained access to
- Extensions which may otherwise be hidden within the implementation
- of the standard <acronym>API</acronym> methods.
- </entry>
- </row>
- <row>
- <entry><methodname>getType()</methodname></entry>
- <entry>
- Returns a static class constant (e.g.
- <constant>Zend_Feed_Reader::TYPE_ATOM_03</constant>,
- i.e. Atom 0.3) indicating exactly what kind
- of feed is being consumed.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </sect2>
- <sect2 id="zend.feed.reader.extending">
- <title>Extending Feed and Entry APIs</title>
- <para>
- Extending <classname>Zend_Feed_Reader</classname> allows you to add
- methods at both the feed and entry level which cover the retrieval
- of information not already supported by
- <classname>Zend_Feed_Reader</classname>. Given the number of
- <acronym>RSS</acronym> and
- Atom extensions that exist, this is a good thing since
- <classname>Zend_Feed_Reader</classname> couldn't possibly add
- everything.
- </para>
- <para>
- There are two types of Extensions possible, those which retrieve
- information from elements which are immediate children of the root
- element (e.g. <command><channel></command> for <acronym>RSS</acronym> or
- <command><feed></command> for Atom) and those who retrieve
- information from child elements of an entry (e.g.
- <command><item></command> for <acronym>RSS</acronym> or
- <command><entry></command> for Atom). On the filesystem these are grouped as
- classes within a namespace based on the extension standard's name. For example,
- internally we have <classname>Zend_Feed_Reader_Extension_DublinCore_Feed</classname>
- and <classname>Zend_Feed_Reader_Extension_DublinCore_Entry</classname>
- classes which are two Extensions implementing Dublin Core
- 1.0 and 1.1 support.
- </para>
- <para>
- Extensions are loaded into <classname>Zend_Feed_Reader</classname>
- using <classname>Zend_Loader_PluginLoader</classname>, so their operation
- will be familiar from other Zend Framework components.
- <classname>Zend_Feed_Reader</classname> already bundles a number of
- these Extensions, however those which are not used internally and
- registered by default (so called Core Extensions) must be registered
- to <classname>Zend_Feed_Reader</classname> before they are used. The
- bundled Extensions include:
- </para>
- <table>
- <title>Core Extensions (pre-registered)</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry>DublinCore (Feed and Entry)</entry>
- <entry>
- Implements support for Dublin Core Metadata Element Set 1.0 and 1.1
- </entry>
- </row>
- <row>
- <entry>Content (Entry only)</entry>
- <entry>Implements support for Content 1.0</entry>
- </row>
- <row>
- <entry>Atom (Feed and Entry)</entry>
- <entry>Implements support for Atom 0.3 and Atom 1.0</entry>
- </row>
- <row>
- <entry>Slash</entry>
- <entry>
- Implements support for the Slash <acronym>RSS</acronym> 1.0 module
- </entry>
- </row>
- <row>
- <entry>WellFormedWeb</entry>
- <entry>Implements support for the Well Formed Web CommentAPI 1.0</entry>
- </row>
- <row>
- <entry>Thread</entry>
- <entry>
- Implements support for Atom Threading Extensions as described
- in <acronym>RFC</acronym> 4685
- </entry>
- </row>
- <row>
- <entry>Podcast</entry>
- <entry>
- Implements support for the Podcast 1.0 <constant>DTD</constant> from
- Apple
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- The Core Extensions are somewhat special since they are extremely
- common and multi-faceted. For example, we have a Core Extension for Atom.
- Atom is implemented as an Extension (not just a base class) because it
- doubles as a valid <acronym>RSS</acronym> module - you can insert
- Atom elements into <acronym>RSS</acronym> feeds. I've even seen
- <acronym>RDF</acronym> feeds which use a lot of Atom in place of more
- common Extensions like Dublin Core.
- </para>
- <table>
- <title>Non-Core Extensions (must register manually)</title>
- <tgroup cols="2">
- <tbody>
- <row>
- <entry>Syndication</entry>
- <entry>
- Implements Syndication 1.0 support for <acronym>RSS</acronym> feeds
- </entry>
- </row>
- <row>
- <entry>CreativeCommons</entry>
- <entry>
- A <acronym>RSS</acronym> module that adds an element at the
- <channel> or <item> level that specifies which Creative
- Commons license applies.
- </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <para>
- The additional non-Core Extensions are offered but not registered to
- <classname>Zend_Feed_Reader</classname> by default. If you want to
- use them, you'll need to tell
- <classname>Zend_Feed_Reader</classname> to load them in advance of
- importing a feed. Additional non-Core Extensions will be included
- in future iterations of the component.
- </para>
- <para>
- Registering an Extension with
- <classname>Zend_Feed_Reader</classname>, so it is loaded and its <acronym>API</acronym>
- is available to Feed and Entry objects, is a simple affair using the
- <classname>Zend_Loader_PluginLoader</classname>. Here we register
- the optional Slash Extension, and discover that it can be directly
- called from the Entry level <acronym>API</acronym> without any effort. Note that
- Extension names are case sensitive and use camel casing for multiple
- terms.
- </para>
- <programlisting language="php"><![CDATA[
- Zend_Feed_Reader::registerExtension('Syndication');
- $feed = Zend_Feed_Reader::import('http://rss.slashdot.org/Slashdot/slashdot');
- $updatePeriod = $feed->current()->getUpdatePeriod();
- ]]></programlisting>
- <para>
- In the simple example above, we checked how frequently a feed is being updated
- using the <methodname>getUpdatePeriod()</methodname>
- method. Since it's not part of
- <classname>Zend_Feed_Reader</classname>'s core <acronym>API</acronym>, it could only be
- a method supported by the newly registered Syndication Extension.
- </para>
- <para>
- As you can also notice, the new methods from Extensions are accessible from the main
- <acronym>API</acronym> using <acronym>PHP</acronym>'s magic methods. As an alternative,
- you can also directly access any Extension object for a similar result as seen below.
- </para>
- <programlisting language="php"><![CDATA[
- Zend_Feed_Reader::registerExtension('Syndication');
- $feed = Zend_Feed_Reader::import('http://rss.slashdot.org/Slashdot/slashdot');
- $syndication = $feed->getExtension('Syndication');
- $updatePeriod = $syndication->getUpdatePeriod();
- ]]></programlisting>
- <sect3 id="zend.feed.reader.extending.feed">
- <title>Writing Zend_Feed_Reader Extensions</title>
- <para>
- Inevitably, there will be times when the
- <classname>Zend_Feed_Reader</classname> <acronym>API</acronym> is just not capable
- of getting something you need from a feed or entry. You can use
- the underlying source objects, like
- <classname>DOMDocument</classname>, to get these by hand however
- there is a more reusable method available by writing Extensions
- supporting these new queries.
- </para>
- <para>
- As an example, let's take the case of a purely fictitious
- corporation named Jungle Books. Jungle Books have been
- publishing a lot of reviews on books they sell (from external
- sources and customers), which are distributed as an <acronym>RSS</acronym> 2.0
- feed. Their marketing department realises that web applications
- using this feed cannot currently figure out exactly what book is
- being reviewed. To make life easier for everyone, they determine
- that the geek department needs to extend <acronym>RSS</acronym> 2.0 to include a
- new element per entry supplying the <acronym>ISBN</acronym>-10 or
- <acronym>ISBN</acronym>-13 number of
- the publication the entry concerns. They define the new
- <command><isbn></command> element quite simply with a standard
- name and namespace <acronym>URI</acronym>:
- </para>
- <programlisting language="php"><![CDATA[
- JungleBooks 1.0:
- http://example.com/junglebooks/rss/module/1.0/
- ]]></programlisting>
- <para>
- A snippet of <acronym>RSS</acronym> containing this extension in practice could be
- something similar to:
- </para>
- <programlisting language="php"><![CDATA[
- <?xml version="1.0" encoding="utf-8" ?>
- <rss version="2.0"
- xmlns:content="http://purl.org/rss/1.0/modules/content/"
- xmlns:jungle="http://example.com/junglebooks/rss/module/1.0/">
- <channel>
- <title>Jungle Books Customer Reviews</title>
- <link>http://example.com/junglebooks</link>
- <description>Many book reviews!</description>
- <pubDate>Fri, 26 Jun 2009 19:15:10 GMT</pubDate>
- <jungle:dayPopular>
- http://example.com/junglebooks/book/938
- </jungle:dayPopular>
- <item>
- <title>Review Of Flatland: A Romance of Many Dimensions</title>
- <link>http://example.com/junglebooks/review/987</link>
- <author>Confused Physics Student</author>
- <content:encoded>
- A romantic square?!
- </content:encoded>
- <pubDate>Thu, 25 Jun 2009 20:03:28 -0700</pubDate>
- <jungle:isbn>048627263X</jungle:isbn>
- </item>
- </channel>
- </rss>
- ]]></programlisting>
- <para>
- Implementing this new <acronym>ISBN</acronym> element as a simple entry level
- extension would require the following class (using your own class
- namespace outside of Zend).
- </para>
- <programlisting language="php"><![CDATA[
- class My_FeedReader_Extension_JungleBooks_Entry
- extends Zend_Feed_Reader_Extension_EntryAbstract
- {
- public function getIsbn()
- {
- if (isset($this->_data['isbn'])) {
- return $this->_data['isbn'];
- }
- $isbn = $this->_xpath->evaluate(
- 'string(' . $this->getXpathPrefix() . '/jungle:isbn)'
- );
- if (!$isbn) {
- $isbn = null;
- }
- $this->_data['isbn'] = $isbn;
- return $this->_data['isbn'];
- }
- protected function _registerNamespaces()
- {
- $this->_xpath->registerNamespace(
- 'jungle', 'http://example.com/junglebooks/rss/module/1.0/'
- );
- }
- }
- ]]></programlisting>
- <para>
- This extension is easy enough to follow. It creates a new method
- <methodname>getIsbn()</methodname> which runs an XPath query on
- the current entry to extract the <acronym>ISBN</acronym> number enclosed by the
- <command><jungle:isbn></command> element. It can optionally
- store this to the internal non-persistent cache (no need to keep
- querying the <acronym>DOM</acronym> if it's called again on the same entry). The
- value is returned to the caller. At the end we have a protected
- method (it's abstract so it must exist) which registers the
- Jungle Books namespace for their custom <acronym>RSS</acronym> module. While we
- call this an <acronym>RSS</acronym> module, there's nothing to prevent the same
- element being used in Atom feeds - and all Extensions which use
- the prefix provided by <methodname>getXpathPrefix()</methodname>
- are actually neutral and work on <acronym>RSS</acronym> or Atom feeds with no
- extra code.
- </para>
- <para>
- Since this Extension is stored outside of Zend Framework, you'll
- need to register the path prefix for your Extensions so
- <classname>Zend_Loader_PluginLoader</classname> can find them.
- After that, it's merely a matter of registering the Extension,
- if it's not already loaded, and using it in practice.
- </para>
- <programlisting language="php"><![CDATA[
- if(!Zend_Feed_Reader::isRegistered('JungleBooks')) {
- Zend_Feed_Reader::addPrefixPath(
- '/path/to/My/FeedReader/Extension', 'My_FeedReader_Extension'
- );
- Zend_Feed_Reader::registerExtension('JungleBooks');
- }
- $feed = Zend_Feed_Reader::import('http://example.com/junglebooks/rss');
- // ISBN for whatever book the first entry in the feed was concerned with
- $firstIsbn = $feed->current()->getIsbn();
- ]]></programlisting>
- <para>
- Writing a feed level Extension is not much different. The
- example feed from earlier included an unmentioned
- <command><jungle:dayPopular></command> element which Jungle
- Books have added to their standard to include a link to the
- day's most popular book (in terms of visitor traffic). Here's
- an Extension which adds a
- <methodname>getDaysPopularBookLink()</methodname> method to the
- feel level <acronym>API</acronym>.
- </para>
- <programlisting language="php"><![CDATA[
- class My_FeedReader_Extension_JungleBooks_Feed
- extends Zend_Feed_Reader_Extension_FeedAbstract
- {
- public function getDaysPopularBookLink()
- {
- if (isset($this->_data['dayPopular'])) {
- return $this->_data['dayPopular'];
- }
- $dayPopular = $this->_xpath->evaluate(
- 'string(' . $this->getXpathPrefix() . '/jungle:dayPopular)'
- );
- if (!$dayPopular) {
- $dayPopular = null;
- }
- $this->_data['dayPopular'] = $dayPopular;
- return $this->_data['dayPopular'];
- }
- protected function _registerNamespaces()
- {
- $this->_xpath->registerNamespace(
- 'jungle', 'http://example.com/junglebooks/rss/module/1.0/'
- );
- }
- }
- ]]></programlisting>
- <para>
- Let's repeat the last example using a custom Extension to show the
- method being used.
- </para>
- <programlisting language="php"><![CDATA[
- if(!Zend_Feed_Reader::isRegistered('JungleBooks')) {
- Zend_Feed_Reader::addPrefixPath(
- '/path/to/My/FeedReader/Extension', 'My_FeedReader_Extension'
- );
- Zend_Feed_Reader::registerExtension('JungleBooks');
- }
- $feed = Zend_Feed_Reader::import('http://example.com/junglebooks/rss');
- // URI to the information page of the day's most popular book with visitors
- $daysPopularBookLink = $feed->getDaysPopularBookLink();
- // ISBN for whatever book the first entry in the feed was concerned with
- $firstIsbn = $feed->current()->getIsbn();
- ]]></programlisting>
- <para>
- Going through these examples, you'll note that we don't register
- feed and entry Extensions separately. Extensions within the same
- standard may or may not include both a feed and entry class, so
- <classname>Zend_Feed_Reader</classname> only requires you to
- register the overall parent name, e.g. JungleBooks, DublinCore,
- Slash. Internally, it can check at what level Extensions exist
- and load them up if found. In our case, we have a full set of
- Extensions now: <classname>JungleBooks_Feed</classname> and
- <classname>JungleBooks_Entry</classname>.
- </para>
- </sect3>
- </sect2>
- </sect1>
|