Browse Source

Zend_Search_Lucene: improved Document_Html::getLinks() functionality. Applied patch provided by John Nunez and Christopher Thomas.

git-svn-id: http://framework.zend.com/svn/framework/standard/trunk@21943 44c647ce-9c0f-0410-b52a-842ac1e357ba
alexander 15 năm trước cách đây
mục cha
commit
275b28348e

+ 8 - 0
library/Zend/Search/Lucene/Document/Html.php

@@ -166,6 +166,14 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
                 $this->_links[] = $href;
                 $this->_links[] = $href;
             }
             }
         }
         }
+        $linkNodes = $this->_doc->getElementsByTagName('area');
+        foreach ($linkNodes as $linkNode) {
+            if (($href = $linkNode->getAttribute('href')) != '' &&
+                (!self::$_excludeNoFollowLinks  ||  strtolower($linkNode->getAttribute('rel')) != 'nofollow' )
+               ) {
+                $this->_links[] = $href;
+            }
+        }
         $this->_links = array_unique($this->_links);
         $this->_links = array_unique($this->_links);
 
 
         $linkNodes = $xpath->query('/html/head/link');
         $linkNodes = $xpath->query('/html/head/link');

+ 28 - 0
tests/Zend/Search/Lucene/DocumentTest.php

@@ -214,6 +214,34 @@ class Zend_Search_Lucene_DocumentTest extends PHPUnit_Framework_TestCase
         $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
         $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
     }
     }
 
 
+    /**
+     * @group ZF-8740
+     */
+    public function testHtmlAreaTags()
+    {
+        $html = '<HTML>'
+                . '<HEAD><TITLE>Page title</TITLE></HEAD>'
+                . '<BODY>'
+                .   'Document body.'
+                .   '<img src="img.png" width="640" height="480" alt="some image" usemap="#some_map" />'
+                .   '<map name="some_map">'
+                .     '<area shape="rect" coords="0,0,100,100" href="link3.html" alt="Link 3" />'
+                .     '<area shape="rect" coords="200,200,300,300" href="link4.html" alt="Link 4" />'
+                .   '</map>'
+                .   '<a href="link1.html">Link 1</a>.'
+                .   '<a href="link2.html" rel="nofollow">Link 1</a>.'
+                . '</BODY>'
+              . '</HTML>';
+
+        $oldNoFollowValue = Zend_Search_Lucene_Document_Html::getExcludeNoFollowLinks();
+
+        Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(false);
+        $doc1 = Zend_Search_Lucene_Document_Html::loadHTML($html);
+        $this->assertTrue($doc1 instanceof Zend_Search_Lucene_Document_Html);
+        $links = array('link1.html', 'link2.html', 'link3.html', 'link4.html');
+        $this->assertTrue(array_values($doc1->getLinks()) == $links);
+    }
+
     public function testHtmlNoFollowLinks()
     public function testHtmlNoFollowLinks()
     {
     {
         $html = '<HTML>'
         $html = '<HTML>'