|
|
@@ -68,6 +68,17 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
|
|
private static $_excludeNoFollowLinks = false;
|
|
|
|
|
|
/**
|
|
|
+ *
|
|
|
+ * List of inline tags
|
|
|
+ *
|
|
|
+ * @var array
|
|
|
+ */
|
|
|
+ private $_inlineTags = array('a', 'abbr', 'acronym', 'dfn', 'em', 'strong', 'code',
|
|
|
+ 'samp', 'kbd', 'var', 'b', 'i', 'big', 'small', 'strike',
|
|
|
+ 'tt', 'u', 'font', 'span', 'bdo', 'cite', 'del', 'ins',
|
|
|
+ 'q', 'sub', 'sup');
|
|
|
+
|
|
|
+ /**
|
|
|
* Object constructor
|
|
|
*
|
|
|
* @param string $data HTML string (may be HTML fragment, )
|
|
|
@@ -197,8 +208,10 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
|
|
private function _retrieveNodeText(DOMNode $node, &$text)
|
|
|
{
|
|
|
if ($node->nodeType == XML_TEXT_NODE) {
|
|
|
- $text .= $node->nodeValue ;
|
|
|
- $text .= ' ';
|
|
|
+ $text .= $node->nodeValue;
|
|
|
+ if(!in_array($node->parentNode->tagName, $this->_inlineTags)) {
|
|
|
+ $text .= ' ';
|
|
|
+ }
|
|
|
} else if ($node->nodeType == XML_ELEMENT_NODE && $node->nodeName != 'script') {
|
|
|
foreach ($node->childNodes as $childNode) {
|
|
|
$this->_retrieveNodeText($childNode, $text);
|