| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- <?php
- /**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to the new BSD license that is bundled
- * with this package in the file LICENSE.txt.
- * It is also available through the world-wide-web at this URL:
- * http://framework.zend.com/license/new-bsd
- * If you did not receive a copy of the license and are unable to
- * obtain it through the world-wide-web, please send an email
- * to license@zend.com so we can send you a copy immediately.
- *
- * @category Zend
- * @package Zend_Search_Lucene
- * @subpackage Search
- * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- /**
- * @category Zend
- * @package Zend_Search_Lucene
- * @subpackage Search
- * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- class Zend_Search_Lucene_Search_QueryToken
- {
- /**
- * Token types.
- */
- const TT_WORD = 0; // Word
- const TT_PHRASE = 1; // Phrase (one or several quoted words)
- const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
- const TT_FIELD_INDICATOR = 3; // ':'
- const TT_REQUIRED = 4; // '+'
- const TT_PROHIBITED = 5; // '-'
- const TT_FUZZY_PROX_MARK = 6; // '~'
- const TT_BOOSTING_MARK = 7; // '^'
- const TT_RANGE_INCL_START = 8; // '['
- const TT_RANGE_INCL_END = 9; // ']'
- const TT_RANGE_EXCL_START = 10; // '{'
- const TT_RANGE_EXCL_END = 11; // '}'
- const TT_SUBQUERY_START = 12; // '('
- const TT_SUBQUERY_END = 13; // ')'
- const TT_AND_LEXEME = 14; // 'AND' or 'and'
- const TT_OR_LEXEME = 15; // 'OR' or 'or'
- const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
- const TT_TO_LEXEME = 17; // 'TO' or 'to'
- const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
- /**
- * Returns all possible lexeme types.
- * It's used for syntax analyzer state machine initialization
- *
- * @return array
- */
- public static function getTypes()
- {
- return array( self::TT_WORD,
- self::TT_PHRASE,
- self::TT_FIELD,
- self::TT_FIELD_INDICATOR,
- self::TT_REQUIRED,
- self::TT_PROHIBITED,
- self::TT_FUZZY_PROX_MARK,
- self::TT_BOOSTING_MARK,
- self::TT_RANGE_INCL_START,
- self::TT_RANGE_INCL_END,
- self::TT_RANGE_EXCL_START,
- self::TT_RANGE_EXCL_END,
- self::TT_SUBQUERY_START,
- self::TT_SUBQUERY_END,
- self::TT_AND_LEXEME,
- self::TT_OR_LEXEME,
- self::TT_NOT_LEXEME,
- self::TT_TO_LEXEME,
- self::TT_NUMBER
- );
- }
- /**
- * TokenCategories
- */
- const TC_WORD = 0; // Word
- const TC_PHRASE = 1; // Phrase (one or several quoted words)
- const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
- const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
- /**
- * Token type.
- *
- * @var integer
- */
- public $type;
- /**
- * Token text.
- *
- * @var integer
- */
- public $text;
- /**
- * Token position within query.
- *
- * @var integer
- */
- public $position;
- /**
- * IndexReader constructor needs token type and token text as a parameters.
- *
- * @param integer $tokenCategory
- * @param string $tokText
- * @param integer $position
- */
- public function __construct($tokenCategory, $tokenText, $position)
- {
- $this->text = $tokenText;
- $this->position = $position + 1; // Start from 1
- switch ($tokenCategory) {
- case self::TC_WORD:
- if ( strtolower($tokenText) == 'and') {
- $this->type = self::TT_AND_LEXEME;
- } else if (strtolower($tokenText) == 'or') {
- $this->type = self::TT_OR_LEXEME;
- } else if (strtolower($tokenText) == 'not') {
- $this->type = self::TT_NOT_LEXEME;
- } else if (strtolower($tokenText) == 'to') {
- $this->type = self::TT_TO_LEXEME;
- } else {
- $this->type = self::TT_WORD;
- }
- break;
- case self::TC_PHRASE:
- $this->type = self::TT_PHRASE;
- break;
- case self::TC_NUMBER:
- $this->type = self::TT_NUMBER;
- break;
- case self::TC_SYNTAX_ELEMENT:
- switch ($tokenText) {
- case ':':
- $this->type = self::TT_FIELD_INDICATOR;
- break;
- case '+':
- $this->type = self::TT_REQUIRED;
- break;
- case '-':
- $this->type = self::TT_PROHIBITED;
- break;
- case '~':
- $this->type = self::TT_FUZZY_PROX_MARK;
- break;
- case '^':
- $this->type = self::TT_BOOSTING_MARK;
- break;
- case '[':
- $this->type = self::TT_RANGE_INCL_START;
- break;
- case ']':
- $this->type = self::TT_RANGE_INCL_END;
- break;
- case '{':
- $this->type = self::TT_RANGE_EXCL_START;
- break;
- case '}':
- $this->type = self::TT_RANGE_EXCL_END;
- break;
- case '(':
- $this->type = self::TT_SUBQUERY_START;
- break;
- case ')':
- $this->type = self::TT_SUBQUERY_END;
- break;
- case '!':
- $this->type = self::TT_NOT_LEXEME;
- break;
- case '&&':
- $this->type = self::TT_AND_LEXEME;
- break;
- case '||':
- $this->type = self::TT_OR_LEXEME;
- break;
- default:
- require_once 'Zend/Search/Lucene/Exception.php';
- throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
- }
- break;
- case self::TC_NUMBER:
- $this->type = self::TT_NUMBER;
- default:
- require_once 'Zend/Search/Lucene/Exception.php';
- throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
- }
- }
- }
|