瀏覽代碼

[ZF-10686] Zend_Search_Lucene

- Fixed load html acceptance of attributes in tag <html> 


git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@23391 44c647ce-9c0f-0410-b52a-842ac1e357ba
ramon 15 年之前
父節點
當前提交
d7fc70bac6
共有 2 個文件被更改,包括 11 次插入2 次删除
  1. 1 1
      library/Zend/Search/Lucene/Document/Html.php
  2. 10 1
      tests/Zend/Search/Lucene/DocumentTest.php

+ 1 - 1
library/Zend/Search/Lucene/Document/Html.php

@@ -102,7 +102,7 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
             // Document encoding is not recognized
 
             /** @todo improve HTML vs HTML fragment recognition */
-            if (preg_match('/<html>/i', $htmlData, $matches, PREG_OFFSET_CAPTURE)) {
+            if (preg_match('/<html[^>]*>/i', $htmlData, $matches, PREG_OFFSET_CAPTURE)) {
                 // It's an HTML document
                 // Add additional HEAD section and recognize document
                 $htmlTagOffset = $matches[0][1] + strlen($matches[0][0]);

+ 10 - 1
tests/Zend/Search/Lucene/DocumentTest.php

@@ -209,7 +209,7 @@ class Zend_Search_Lucene_DocumentTest extends PHPUnit_Framework_TestCase
 
         $hits = $index->find('ZendFramework');
         $this->assertEquals(count($hits), 1);
-        
+
         unset($index);
         $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
     }
@@ -319,6 +319,15 @@ class Zend_Search_Lucene_DocumentTest extends PHPUnit_Framework_TestCase
         $this->assertTrue($xlsxDocument->getFieldValue('body') != '');
         $this->assertTrue( strpos($xlsxDocument->getFieldValue('body'), 'ipsum') !== false );
     }
+
+    /**
+     * @group ZF-10686
+     */
+    public function testLoadHtmlWithAttributesInTagHTML()
+    {
+        $doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML lang="en_US"><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
+        $this->assertEquals('Page title ', $doc->title);
+    }
 }