Przeglądaj źródła

Improved Atom Content parsing to support XHTML completely - fixes ZF-8789

git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@20462 44c647ce-9c0f-0410-b52a-842ac1e357ba
padraic 16 lat temu
rodzic
commit
25d744b169

+ 42 - 6
library/Zend/Feed/Reader/Extension/Atom/Entry.php

@@ -127,21 +127,57 @@ class Zend_Feed_Reader_Extension_Atom_Entry
         if (array_key_exists('content', $this->_data)) {
             return $this->_data['content'];
         }
-
-        $content = $this->getXpath()->evaluate('string(' . $this->getXpathPrefix() . '/atom:content)');
-
-        if ($content) {
-            $content =  html_entity_decode($content, ENT_QUOTES, $this->getEncoding());
+        
+        $content = null;
+        
+        $el = $this->getXpath()->query($this->getXpathPrefix() . '/atom:content');
+        if($el->length > 0) {
+            $el = $el->item(0);
+            $type = $el->getAttribute('type');
+            switch ($type) {
+                case '':
+                case 'text':
+                case 'text/plain':
+                case 'html':
+                case 'text/html':
+                    $content = $el->nodeValue;
+                break;
+                case 'xhtml':
+                    $this->getXpath()->registerNamespace('xhtml', 'http://www.w3.org/1999/xhtml');
+                    $xhtml = $this->getXpath()->query(
+                        $this->getXpathPrefix() . '/atom:content/xhtml:div'
+                    )->item(0);
+                    $xhtml->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
+                    $d = new DOMDocument('1.0', $this->getEncoding());
+                    $xhtmls = $d->importNode($xhtml, true);
+                    $d->appendChild($xhtmls);
+                    $content = $this->_collectXhtml($d->saveXML());
+                break;
+            }
         }
 
         if (!$content) {
             $content = $this->getDescription();
         }
 
-        $this->_data['content'] = $content;
+        $this->_data['content'] = trim($content);
 
         return $this->_data['content'];
     }
+    
+    /**
+     * Parse out XHTML to remove the namespacing
+     */
+    protected function _collectXhtml($xhtml)
+    {
+        $matches = array(
+            "/<\?xml[^<]+/",
+            "/<div.*xmlns=[^<]+/",
+            "/<\/div>\s*$/"
+        );
+        $cleaned = preg_replace($matches, '', $xhtml);
+        return $cleaned;
+    }
 
     /**
      * Get the entry creation date

+ 40 - 4
tests/Zend/Feed/Reader/Entry/AtomStandaloneEntryTest.php

@@ -215,17 +215,53 @@ class Zend_Feed_Reader_Entry_AtomStandaloneEntryTest extends PHPUnit_Framework_T
 
         $this->assertEquals($expected, $entry->getEnclosure());
     }
-
+    
     /**
-     * Get Content (Unencoded Text)
-     * @group ZFR002
+     * TEXT
+     * @group ZFRATOMCONTENT
      */
     public function testGetsContentFromAtom10()
     {
         $entry = Zend_Feed_Reader::importString(
             file_get_contents($this->_feedSamplePath . '/content/atom10.xml')
         );
-        $this->assertEquals('Entry Content', $entry->getContent());
+        $this->assertEquals('Entry Content &amp;', $entry->getContent());
+    }
+    
+    /**
+     * HTML Escaped
+     * @group ZFRATOMCONTENT
+     */
+    public function testGetsContentFromAtom10Html()
+    {
+        $entry = Zend_Feed_Reader::importString(
+            file_get_contents($this->_feedSamplePath . '/content/atom10_Html.xml')
+        );
+        $this->assertEquals('<p>Entry Content &amp;</p>', $entry->getContent());
+    }
+    
+    /**
+     * HTML CDATA Escaped
+     * @group ZFRATOMCONTENT
+     */
+    public function testGetsContentFromAtom10HtmlCdata()
+    {
+        $entry = Zend_Feed_Reader::importString(
+            file_get_contents($this->_feedSamplePath . '/content/atom10_HtmlCdata.xml')
+        );
+        $this->assertEquals('<p>Entry Content &amp;</p>', $entry->getContent());
+    }
+    
+    /**
+     * XHTML
+     * @group ZFRATOMCONTENT
+     */
+    public function testGetsContentFromAtom10XhtmlNamespaced()
+    {
+        $entry = Zend_Feed_Reader::importString(
+            file_get_contents($this->_feedSamplePath . '/content/atom10_Xhtml.xml')
+        );
+        $this->assertEquals('<p class="x:"><em>Entry Content &amp;x:</em></p>', $entry->getContent());
     }
 
     /**

+ 44 - 1
tests/Zend/Feed/Reader/Entry/AtomTest.php

@@ -313,13 +313,56 @@ class Zend_Feed_Reader_Entry_AtomTest extends PHPUnit_Framework_TestCase
         $this->assertEquals('Entry Content', $entry->getContent());
     }
 
+    /**
+     * TEXT
+     * @group ZFRATOMCONTENT
+     */
     public function testGetsContentFromAtom10()
     {
         $feed = Zend_Feed_Reader::importString(
             file_get_contents($this->_feedSamplePath . '/content/plain/atom10.xml')
         );
         $entry = $feed->current();
-        $this->assertEquals('Entry Content', $entry->getContent());
+        $this->assertEquals('Entry Content &amp;', $entry->getContent());
+    }
+    
+    /**
+     * HTML Escaped
+     * @group ZFRATOMCONTENT
+     */
+    public function testGetsContentFromAtom10Html()
+    {
+        $feed = Zend_Feed_Reader::importString(
+            file_get_contents($this->_feedSamplePath . '/content/plain/atom10_Html.xml')
+        );
+        $entry = $feed->current();
+        $this->assertEquals('<p>Entry Content &amp;</p>', $entry->getContent());
+    }
+    
+    /**
+     * HTML CDATA Escaped
+     * @group ZFRATOMCONTENT
+     */
+    public function testGetsContentFromAtom10HtmlCdata()
+    {
+        $feed = Zend_Feed_Reader::importString(
+            file_get_contents($this->_feedSamplePath . '/content/plain/atom10_HtmlCdata.xml')
+        );
+        $entry = $feed->current();
+        $this->assertEquals('<p>Entry Content &amp;</p>', $entry->getContent());
+    }
+    
+    /**
+     * XHTML
+     * @group ZFRATOMCONTENT
+     */
+    public function testGetsContentFromAtom10XhtmlNamespaced()
+    {
+        $feed = Zend_Feed_Reader::importString(
+            file_get_contents($this->_feedSamplePath . '/content/plain/atom10_Xhtml.xml')
+        );
+        $entry = $feed->current();
+        $this->assertEquals('<p class="x:"><em>Entry Content &amp;x:</em></p>', $entry->getContent());
     }
 
     /**

+ 2 - 2
tests/Zend/Feed/Reader/Entry/_files/Atom/content/plain/atom03.xml

@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <feed version="0.3" xmlns="http://purl.org/atom/ns#">
     <entry>
-        <content type="xhtml" xml:lang="en" xml:base="http://diveintomark.org/">Entry Content</content>
+        <content type="html" xml:lang="en" xml:base="http://diveintomark.org/">Entry Content</content>
     </entry>
-</feed>
+</feed>

+ 2 - 2
tests/Zend/Feed/Reader/Entry/_files/Atom/content/plain/atom10.xml

@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
     <entry>
-		<content type="xhtml" xml:lang="en" xml:base="http://diveintomark.org/">Entry Content</content>
+		<content type="text">Entry Content &amp;amp;</content>
     </entry>
-</feed>
+</feed>

+ 6 - 0
tests/Zend/Feed/Reader/Entry/_files/Atom/content/plain/atom10_Html.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+    <entry>
+		<content type="html">&lt;p&gt;Entry Content &amp;amp;&lt;/p&gt;</content>
+    </entry>
+</feed>

+ 6 - 0
tests/Zend/Feed/Reader/Entry/_files/Atom/content/plain/atom10_HtmlCdata.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+    <entry>
+		<content type="html"><![CDATA[<p>Entry Content &amp;</p>]]></content>
+    </entry>
+</feed>

+ 10 - 0
tests/Zend/Feed/Reader/Entry/_files/Atom/content/plain/atom10_Xhtml.xml

@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+    <entry>
+		<content type="xhtml" xmlns:x="http://www.w3.org/1999/xhtml">
+		    <x:div>
+		        <x:p class="x:"><x:em>Entry Content &amp;x:</x:em></x:p>
+		    </x:div>
+		</content>
+    </entry>
+</feed>

+ 1 - 1
tests/Zend/Feed/Reader/Entry/_files/AtomStandaloneEntry/content/atom10.xml

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="utf-8"?>
     <entry xmlns="http://www.w3.org/2005/Atom">
-		<content type="xhtml" xml:lang="en" xml:base="http://diveintomark.org/">Entry Content</content>
+		<content type="text">Entry Content &amp;amp;</content>
     </entry>

+ 4 - 0
tests/Zend/Feed/Reader/Entry/_files/AtomStandaloneEntry/content/atom10_Html.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+    <entry xmlns="http://www.w3.org/2005/Atom">
+		<content type="html">&lt;p&gt;Entry Content &amp;amp;&lt;/p&gt;</content>
+    </entry>

+ 4 - 0
tests/Zend/Feed/Reader/Entry/_files/AtomStandaloneEntry/content/atom10_HtmlCdata.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+    <entry xmlns="http://www.w3.org/2005/Atom">
+		<content type="html"><![CDATA[<p>Entry Content &amp;</p>]]></content>
+    </entry>

+ 8 - 0
tests/Zend/Feed/Reader/Entry/_files/AtomStandaloneEntry/content/atom10_Xhtml.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+    <entry xmlns="http://www.w3.org/2005/Atom">
+		<content type="xhtml" xmlns:x="http://www.w3.org/1999/xhtml">
+		    <x:div>
+		        <x:p class="x:"><x:em>Entry Content &amp;x:</x:em></x:p>
+		    </x:div>
+		</content>
+    </entry>

+ 1 - 1
tests/Zend/Feed/Reader/Integration/WordpressAtom10Test.php

@@ -191,7 +191,7 @@ class Zend_Feed_Reader_Integration_WordpressAtom10Test extends PHPUnit_Framework
             file_get_contents($this->_feedSamplePath)
         );
         $entry = $feed->current();
-        $this->assertEquals('<p>Being in New Zealand does strange things to a person. Everybody who knows me, knows I don’t much like that crazy invention called a Book. However, being here I’ve already finished 4 books, all of which I can highly recommend.</p>'."\n\n".'<ul>'."\n".'<li><a href="http://www.amazon.com/Agile-Software-Development-Scrum/dp/0130676349/">Agile Software Development with Scrum, by Ken Schwaber and Mike Beedle</a></li>'."\n".'<li><a href="http://www.amazon.com/Domain-Driven-Design-Tackling-Complexity-Software/dp/0321125215/">Domain-Driven Design: Tackling Complexity in the Heart of Software, by Eric Evans</a></li>'."\n".'<li><a href="http://www.amazon.com/Enterprise-Application-Architecture-Addison-Wesley-Signature/dp/0321127420/">Patterns of Enterprise Application Architecture, by Martin Fowler</a></li>'."\n".'<li><a href="http://www.amazon.com/Refactoring-Improving-Existing-Addison-Wesley-Technology/dp/0201485672/">Refactoring: Improving the Design of Existing Code by Martin Fowler</a></li>'."\n\n".'</ul>'."\n".'<p>Next up: <a href="http://www.amazon.com/Design-Patterns-Object-Oriented-Addison-Wesley-Professional/dp/0201633612/">Design Patterns: Elements of Reusable Object-Oriented Software, by the Gang of Four</a>. Yes, talk about classics and shame on me for not having ordered it sooner! Also reading <a href="http://www.amazon.com/Implementation-Patterns-Addison-Wesley-Signature-Kent/dp/0321413091/">Implementation Patterns, by Kent Beck</a> at the moment.</p>'."\n", $entry->getContent());
+        $this->assertEquals('<p>Being in New Zealand does strange things to a person. Everybody who knows me, knows I don&#8217;t much like that crazy invention called a Book. However, being here I&#8217;ve already finished 4 books, all of which I can highly recommend.</p><ul><li><a href="http://www.amazon.com/Agile-Software-Development-Scrum/dp/0130676349/">Agile Software Development with Scrum, by Ken Schwaber and Mike Beedle</a></li><li><a href="http://www.amazon.com/Domain-Driven-Design-Tackling-Complexity-Software/dp/0321125215/">Domain-Driven Design: Tackling Complexity in the Heart of Software, by Eric Evans</a></li><li><a href="http://www.amazon.com/Enterprise-Application-Architecture-Addison-Wesley-Signature/dp/0321127420/">Patterns of Enterprise Application Architecture, by Martin Fowler</a></li><li><a href="http://www.amazon.com/Refactoring-Improving-Existing-Addison-Wesley-Technology/dp/0201485672/">Refactoring: Improving the Design of Existing Code by Martin Fowler</a></li></ul><p>Next up: <a href="http://www.amazon.com/Design-Patterns-Object-Oriented-Addison-Wesley-Professional/dp/0201633612/">Design Patterns: Elements of Reusable Object-Oriented Software, by the Gang of Four</a>. Yes, talk about classics and shame on me for not having ordered it sooner! Also reading <a href="http://www.amazon.com/Implementation-Patterns-Addison-Wesley-Signature-Kent/dp/0321413091/">Implementation Patterns, by Kent Beck</a> at the moment.</p>', str_replace("\n",'',$entry->getContent()));
     }
 
     public function testGetsEntryLinks()