Explorar o código

ZF-11344: handle encoding mismatches in HtmlEntities filter

- Detect when encoding mismatches occur
  - Convert value to filter's encoding, ignoring unknown characters, and
    re-run htmlentities()

git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@24004 44c647ce-9c0f-0410-b52a-842ac1e357ba
matthew %!s(int64=14) %!d(string=hai) anos
pai
achega
532ad468bb

+ 11 - 1
library/Zend/Filter/HtmlEntities.php

@@ -197,6 +197,16 @@ class Zend_Filter_HtmlEntities implements Zend_Filter_Interface
      */
     public function filter($value)
     {
-        return htmlentities((string) $value, $this->getQuoteStyle(), $this->getEncoding(), $this->getDoubleQuote());
+        $filtered = htmlentities((string) $value, $this->getQuoteStyle(), $this->getEncoding(), $this->getDoubleQuote());
+        if (strlen((string) $value) && !strlen($filtered)) {
+            if (!function_exists('iconv')) {
+                require_once 'Zend/Filter/Exception.php';
+                throw new Zend_Filter_Exception(sprintf('Encoding mismatch has resulted in htmlentities errors'));
+            }
+            $enc      = $this->getEncoding();
+            $value    = iconv('', $enc . '//IGNORE', (string) $value);
+            $filtered = htmlentities($value, $this->getQuoteStyle(), $enc, $this->getDoubleQuote());
+        }
+        return $filtered;
     }
 }

+ 39 - 0
tests/Zend/Filter/HtmlEntitiesTest.php

@@ -208,4 +208,43 @@ class Zend_Filter_HtmlEntitiesTest extends PHPUnit_Framework_TestCase
         $this->_filter->setQuoteStyle(ENT_NOQUOTES);
         $this->assertEquals($result, $this->_filter->filter($input));
     }
+
+    /**
+     * @group ZF-11344
+     */
+    public function testCorrectsForEncodingMismatch()
+    {
+        $string = file_get_contents(dirname(__FILE__) . '/_files/latin-1-text.txt');
+
+        // restore_error_handler can emit an E_WARNING; let's ignore that, as 
+        // we want to test the returned value
+        set_error_handler(array($this, 'errorHandler'), E_NOTICE | E_WARNING);
+        $result = $this->_filter->filter($string);
+        restore_error_handler();
+
+        $this->assertTrue(strlen($result) > 0);
+    }
+
+    /**
+     * @group ZF-11344
+     */
+    public function testStripsUnknownCharactersWhenEncodingMismatchDetected()
+    {
+        $string = file_get_contents(dirname(__FILE__) . '/_files/latin-1-text.txt');
+
+        // restore_error_handler can emit an E_WARNING; let's ignore that, as 
+        // we want to test the returned value
+        set_error_handler(array($this, 'errorHandler'), E_NOTICE | E_WARNING);
+        $result = $this->_filter->filter($string);
+        restore_error_handler();
+
+        $this->assertContains('""', $result);
+    }
+
+    /**
+     * Null error handler; used when wanting to ignore specific error types
+     */
+    public function errorHandler($errno, $errstr)
+    {
+    }
 }

+ 1 - 0
tests/Zend/Filter/_files/latin-1-text.txt

@@ -0,0 +1 @@
+Here is a dash: "—"