Browse Source

Zend_Feed: Patched Atom content handling when XHTML to better deal with XHTML namespacing

git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@20506 44c647ce-9c0f-0410-b52a-842ac1e357ba
padraic 16 years ago
parent
commit
40e895b5af

+ 16 - 8
library/Zend/Feed/Reader/Extension/Atom/Entry.php

@@ -147,14 +147,19 @@ class Zend_Feed_Reader_Extension_Atom_Entry
                     $xhtml = $this->getXpath()->query(
                         $this->getXpathPrefix() . '/atom:content/xhtml:div'
                     )->item(0);
-                    $xhtml->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
+                    //$xhtml->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
                     $d = new DOMDocument('1.0', $this->getEncoding());
                     $xhtmls = $d->importNode($xhtml, true);
                     $d->appendChild($xhtmls);
-                    $content = $this->_collectXhtml($d->saveXML());
+                    $content = $this->_collectXhtml(
+                        $d->saveXML(),
+                        $d->lookupPrefix('http://www.w3.org/1999/xhtml')
+                    );
                 break;
             }
         }
+        
+        //var_dump($content); exit;
 
         if (!$content) {
             $content = $this->getDescription();
@@ -168,15 +173,18 @@ class Zend_Feed_Reader_Extension_Atom_Entry
     /**
      * Parse out XHTML to remove the namespacing
      */
-    protected function _collectXhtml($xhtml)
+    protected function _collectXhtml($xhtml, $prefix)
     {
+        if (!empty($prefix)) $prefix = $prefix . ':';
         $matches = array(
-            "/<\?xml[^<]+/",
-            "/<div.*xmlns=[^<]+/",
-            "/<\/div>\s*$/"
+            "/<\?xml[^<]*>[^<]*<" . $prefix . "div[^<]*/",
+            "/<\/" . $prefix . "div>\s*$/"
         );
-        $cleaned = preg_replace($matches, '', $xhtml);
-        return $cleaned;
+        $xhtml = preg_replace($matches, '', $xhtml);
+        if (!empty($prefix)) {
+            $xhtml = preg_replace("/(<[\/]?)" . $prefix . "([a-zA-Z]+)/", '$1$2', $xhtml);
+        }
+        return $xhtml;
     }
 
     /**

+ 32 - 3
library/Zend/Feed/Writer/Renderer/Entry/Atom.php

@@ -320,13 +320,42 @@ class Zend_Feed_Writer_Renderer_Entry_Atom
             return;
         }
         $element = $dom->createElement('content');
-        $element->setAttribute('type', 'html');
-        $cdata = $dom->createCDATASection($content);
-        $element->appendChild($cdata);
+        $element->setAttribute('type', 'xhtml');
+        $xhtmlElement = $this->_loadXhtml($content);
+        $xhtml = $dom->importNode($xhtmlElement, true);
+        $element->appendChild($xhtml);
         $root->appendChild($element);
     }
     
     /**
+     * Load a HTML string and attempt to normalise to XML
+     */
+    protected function _loadXhtml($content)
+    {
+        $xhtml = '';
+        if (class_exists('tidy', false)) {
+            $tidy = new tidy;
+            $config = array(
+                'output-xhtml' => true,
+                'show-body-only' => true
+            );
+            $encoding = str_replace('-', '', $this->getEncoding());
+            $tidy->parseString($content, $config, $encoding);
+            $tidy->cleanRepair();
+            $xhtml = (string) $tidy;
+        } else {
+            $xhtml = $content;
+        }
+        $xhtml = preg_replace(array(
+            "/(<[\/]?)([a-zA-Z]+)/"   
+        ), '$1xhtml:$2', $xhtml);
+        $dom = new DOMDocument('1.0', $this->getEncoding());
+        $dom->loadXML('<xhtml:div xmlns:xhtml="http://www.w3.org/1999/xhtml">'
+            . $xhtml . '</xhtml:div>');
+        return $dom->documentElement;
+    }
+    
+    /**
      * Set entry cateories 
      * 
      * @param  DOMDocument $dom 

+ 6 - 3
tests/Zend/Feed/Writer/Renderer/Entry/AtomTest.php

@@ -60,7 +60,7 @@ class Zend_Feed_Writer_Renderer_Entry_AtomTest extends PHPUnit_Framework_TestCas
         $this->_validEntry->setDateCreated(1234567000);
         $this->_validEntry->setLink('http://www.example.com/1');
         $this->_validEntry->addAuthor('Jane', 'jane@example.com', 'http://www.example.com/jane');
-        $this->_validEntry->setContent('This is test entry content.');
+        $this->_validEntry->setContent('<p class="xhtml:">This is test content for <em>xhtml:</em></p>');
         $this->_validWriter->addEntry($this->_validEntry);
     }
 
@@ -123,12 +123,15 @@ class Zend_Feed_Writer_Renderer_Entry_AtomTest extends PHPUnit_Framework_TestCas
         $this->assertEquals('This is a test entry description.', $entry->getDescription());
     }
 
-    public function testEntryContentHasBeenSet()
+    /**
+     * @group ZFWATOMCONTENT
+     */
+    public function testEntryContentHasBeenSet_Xhtml()
     {
         $renderer = new Zend_Feed_Writer_Renderer_Feed_Atom($this->_validWriter);
         $feed = Zend_Feed_Reader::importString($renderer->render()->saveXml());
         $entry = $feed->current();
-        $this->assertEquals('This is test entry content.', $entry->getContent());
+        $this->assertEquals('<p class="xhtml:">This is test content for <em>xhtml:</em></p>', $entry->getContent());
     }
     
     /**