Bbcode.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Markup
  17. * @subpackage Parser
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * @see Zend_Markup_TokenList
  24. */
  25. require_once 'Zend/Markup/TokenList.php';
  26. /**
  27. * @see Zend_Markup_Parser_ParserInterface
  28. */
  29. require_once 'Zend/Markup/Parser/ParserInterface.php';
  30. /**
  31. * @category Zend
  32. * @package Zend_Markup
  33. * @subpackage Parser
  34. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. */
  37. class Zend_Markup_Parser_Bbcode implements Zend_Markup_Parser_ParserInterface
  38. {
  39. const NEWLINE = "[newline\0]";
  40. // there is a parsing difference between the default tags and single tags
  41. const TYPE_DEFAULT = 'default';
  42. const TYPE_SINGLE = 'single';
  43. const NAME_CHARSET = '^\[\]=\s';
  44. const STATE_SCAN = 0;
  45. const STATE_SCANATTRS = 1;
  46. const STATE_PARSEVALUE = 2;
  47. /**
  48. * Token tree
  49. *
  50. * @var Zend_Markup_TokenList
  51. */
  52. protected $_tree;
  53. /**
  54. * Current token
  55. *
  56. * @var Zend_Markup_Token
  57. */
  58. protected $_current;
  59. /**
  60. * Source to tokenize
  61. *
  62. * @var string
  63. */
  64. protected $_value = '';
  65. /**
  66. * Length of the value
  67. *
  68. * @var int
  69. */
  70. protected $_valueLen = 0;
  71. /**
  72. * Current pointer
  73. *
  74. * @var int
  75. */
  76. protected $_pointer = 0;
  77. /**
  78. * The buffer
  79. *
  80. * @var string
  81. */
  82. protected $_buffer = '';
  83. /**
  84. * Temporary tag storage
  85. *
  86. * @var array
  87. */
  88. protected $_temp;
  89. /**
  90. * Stoppers that we are searching for
  91. *
  92. * @var array
  93. */
  94. protected $_searchedStoppers = array();
  95. /**
  96. * Tag information
  97. *
  98. * @var array
  99. */
  100. protected $_tags = array(
  101. 'Zend_Markup_Root' => array(
  102. 'type' => self::TYPE_DEFAULT,
  103. 'stoppers' => array(),
  104. ),
  105. '*' => array(
  106. 'type' => self::TYPE_DEFAULT,
  107. 'stoppers' => array(self::NEWLINE, '[/*]', '[/]'),
  108. ),
  109. 'hr' => array(
  110. 'type' => self::TYPE_SINGLE,
  111. 'stoppers' => array(),
  112. ),
  113. );
  114. /**
  115. * Token array
  116. *
  117. * @var array
  118. */
  119. protected $_tokens = array();
  120. /**
  121. * State
  122. *
  123. * @var int
  124. */
  125. protected $_state = self::STATE_SCAN;
  126. /**
  127. * Prepare the parsing of a bbcode string, the real parsing is done in {@link _parse()}
  128. *
  129. * @param string $value
  130. * @return Zend_Markup_TokenList
  131. */
  132. public function parse($value)
  133. {
  134. if (!is_string($value)) {
  135. /**
  136. * @see Zend_Markup_Parser_Exception
  137. */
  138. require_once 'Zend/Markup/Parser/Exception.php';
  139. throw new Zend_Markup_Parser_Exception('Value to parse should be a string.');
  140. }
  141. if (empty($value)) {
  142. /**
  143. * @see Zend_Markup_Parser_Exception
  144. */
  145. require_once 'Zend/Markup/Parser/Exception.php';
  146. throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.');
  147. }
  148. $this->_value = str_replace(array("\r\n", "\r", "\n"), self::NEWLINE, $value);
  149. // variable initialization for tokenizer
  150. $this->_valueLen = strlen($this->_value);
  151. $this->_pointer = 0;
  152. $this->_buffer = '';
  153. $this->_temp = array();
  154. $this->_state = self::STATE_SCAN;
  155. $this->_tokens = array();
  156. $this->_tokenize();
  157. // variable initialization for treebuilder
  158. $this->_searchedStoppers = array();
  159. $this->_tree = new Zend_Markup_TokenList();
  160. $this->_current = new Zend_Markup_Token(
  161. '',
  162. Zend_Markup_Token::TYPE_NONE,
  163. 'Zend_Markup_Root'
  164. );
  165. $this->_tree->addChild($this->_current);
  166. $this->_createTree();
  167. return $this->_tree;
  168. }
  169. /**
  170. * Tokenize
  171. *
  172. * @param string $input
  173. *
  174. * @return void
  175. */
  176. protected function _tokenize()
  177. {
  178. $attribute = '';
  179. while ($this->_pointer < $this->_valueLen) {
  180. switch ($this->_state) {
  181. case self::STATE_SCAN:
  182. $matches = array();
  183. $regex = '#\G(?<text>[^\[]*)(?<open>\[(?<name>[' . self::NAME_CHARSET . ']+)?)?#';
  184. preg_match($regex, $this->_value, $matches, null, $this->_pointer);
  185. $this->_pointer += strlen($matches[0]);
  186. if (!empty($matches['text'])) {
  187. $this->_buffer .= $matches['text'];
  188. }
  189. if (!isset($matches['open'])) {
  190. // great, no tag, we are ending the string
  191. break;
  192. }
  193. if (!isset($matches['name'])) {
  194. $this->_buffer .= $matches['open'];
  195. break;
  196. }
  197. $this->_temp = array(
  198. 'tag' => '[' . $matches['name'],
  199. 'name' => $matches['name'],
  200. 'attributes' => array()
  201. );
  202. if ($this->_pointer >= $this->_valueLen) {
  203. // damn, no tag
  204. $this->_buffer .= $this->_temp['tag'];
  205. break 2;
  206. }
  207. if ($this->_value[$this->_pointer] == '=') {
  208. $this->_pointer++;
  209. $this->_temp['tag'] .= '=';
  210. $this->_state = self::STATE_PARSEVALUE;
  211. $attribute = $this->_temp['name'];
  212. } else {
  213. $this->_state = self::STATE_SCANATTRS;
  214. }
  215. break;
  216. case self::STATE_SCANATTRS:
  217. $matches = array();
  218. $regex = '#\G((?<end>\s*\])|\s+(?<attribute>[' . self::NAME_CHARSET . ']+)(?<eq>=?))#';
  219. if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
  220. break 2;
  221. }
  222. $this->_pointer += strlen($matches[0]);
  223. if (!empty($matches['end'])) {
  224. if (!empty($this->_buffer)) {
  225. $this->_tokens[] = array(
  226. 'tag' => $this->_buffer,
  227. 'type' => Zend_Markup_Token::TYPE_NONE
  228. );
  229. $this->_buffer = '';
  230. }
  231. $this->_temp['tag'] .= $matches['end'];
  232. $this->_temp['type'] = Zend_Markup_Token::TYPE_TAG;
  233. $this->_tokens[] = $this->_temp;
  234. $this->_temp = array();
  235. $this->_state = self::STATE_SCAN;
  236. } else {
  237. // attribute name
  238. $attribute = $matches['attribute'];
  239. $this->_temp['tag'] .= $matches[0];
  240. $this->_temp['attributes'][$attribute] = '';
  241. if (empty($matches['eq'])) {
  242. $this->_state = self::STATE_SCANATTRS;
  243. } else {
  244. $this->_state = self::STATE_PARSEVALUE;
  245. }
  246. }
  247. break;
  248. case self::STATE_PARSEVALUE:
  249. $matches = array();
  250. $regex = '#\G((?<quote>"|\')(?<valuequote>[^\\2]*)\\2|(?<value>[^\]\s]+))#';
  251. if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
  252. $this->_state = self::STATE_SCANATTRS;
  253. break;
  254. }
  255. $this->_pointer += strlen($matches[0]);
  256. if (!empty($matches['quote'])) {
  257. $this->_temp['attributes'][$attribute] = $matches['valuequote'];
  258. } else {
  259. $this->_temp['attributes'][$attribute] = $matches['value'];
  260. }
  261. $this->_temp['tag'] .= $matches[0];
  262. $this->_state = self::STATE_SCANATTRS;
  263. break;
  264. }
  265. }
  266. if (!empty($this->_buffer)) {
  267. $this->_tokens[] = array(
  268. 'tag' => $this->_buffer,
  269. 'type' => Zend_Markup_Token::TYPE_NONE
  270. );
  271. }
  272. }
  273. /**
  274. * Parse the token array into a tree
  275. *
  276. * @param array $tokens
  277. *
  278. * @return void
  279. */
  280. public function _createTree()
  281. {
  282. foreach ($this->_tokens as $token) {
  283. // first we want to know if this tag is a stopper, or at least a searched one
  284. if ($this->_isStopper($token['tag'])) {
  285. // find the stopper
  286. $oldItems = array();
  287. while (!in_array($token['tag'], $this->_tags[$this->_current->getName()]['stoppers'])) {
  288. $oldItems[] = clone $this->_current;
  289. $this->_current = $this->_current->getParent();
  290. }
  291. // we found the stopper, so stop the tag
  292. $this->_current->setStopper($token['tag']);
  293. $this->_removeFromSearchedStoppers($this->_current);
  294. $this->_current = $this->_current->getParent();
  295. // add the old items again if there are any
  296. if (!empty($oldItems)) {
  297. foreach (array_reverse($oldItems) as $item) {
  298. /* @var $token Zend_Markup_Token */
  299. $this->_current->addChild($item);
  300. $item->setParent($this->_current);
  301. $this->_current = $item;
  302. }
  303. }
  304. } else {
  305. if ($token['type'] == Zend_Markup_Token::TYPE_TAG) {
  306. if ($token['tag'] == self::NEWLINE) {
  307. // this is a newline tag, add it as a token
  308. $this->_current->addChild(new Zend_Markup_Token(
  309. "\n",
  310. Zend_Markup_Token::TYPE_NONE,
  311. '',
  312. array(),
  313. $this->_current
  314. ));
  315. } elseif (isset($token['name']) && ($token['name'][0] == '/')) {
  316. // this is a stopper, add it as a empty token
  317. $this->_current->addChild(new Zend_Markup_Token(
  318. $token['tag'],
  319. Zend_Markup_Token::TYPE_NONE,
  320. '',
  321. array(),
  322. $this->_current
  323. ));
  324. } else {
  325. // add the tag
  326. $child = new Zend_Markup_Token(
  327. $token['tag'],
  328. $token['type'],
  329. $token['name'],
  330. $token['attributes'],
  331. $this->_current
  332. );
  333. $this->_current->addChild($child);
  334. // add stoppers for this tag, if its has stoppers
  335. if ($this->_getType($token['name']) == self::TYPE_DEFAULT) {
  336. $this->_current = $child;
  337. $this->_addToSearchedStoppers($this->_current);
  338. }
  339. }
  340. } else {
  341. // no tag, just add it as a simple token
  342. $this->_current->addChild(new Zend_Markup_Token(
  343. $token['tag'],
  344. Zend_Markup_Token::TYPE_NONE,
  345. '',
  346. array(),
  347. $this->_current
  348. ));
  349. }
  350. }
  351. }
  352. }
  353. /**
  354. * Check if there is a tag declaration, and if it isnt there, add it
  355. *
  356. * @param string $name
  357. *
  358. * @return void
  359. */
  360. protected function _checkTagDeclaration($name)
  361. {
  362. if (!isset($this->_tags[$name])) {
  363. $this->_tags[$name] = array(
  364. 'type' => self::TYPE_DEFAULT,
  365. 'stoppers' => array(
  366. '[/' . $name . ']',
  367. '[/]'
  368. )
  369. );
  370. }
  371. }
  372. /**
  373. * Check the tag's type
  374. *
  375. * @param string $name
  376. * @return string
  377. */
  378. protected function _getType($name)
  379. {
  380. $this->_checkTagDeclaration($name);
  381. return $this->_tags[$name]['type'];
  382. }
  383. /**
  384. * Check if the tag is a stopper
  385. *
  386. * @param string $tag
  387. * @return bool
  388. */
  389. protected function _isStopper($tag)
  390. {
  391. $this->_checkTagDeclaration($this->_current->getName());
  392. if (!empty($this->_searchedStoppers[$tag])) {
  393. return true;
  394. }
  395. return false;
  396. }
  397. /**
  398. * Add to searched stoppers
  399. *
  400. * @param Zend_Markup_Token $token
  401. * @return void
  402. */
  403. protected function _addToSearchedStoppers(Zend_Markup_Token $token)
  404. {
  405. $this->_checkTagDeclaration($token->getName());
  406. foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) {
  407. if (!isset($this->_searchedStoppers[$stopper])) {
  408. $this->_searchedStoppers[$stopper] = 0;
  409. }
  410. ++$this->_searchedStoppers[$stopper];
  411. }
  412. }
  413. /**
  414. * Remove from searched stoppers
  415. *
  416. * @param Zend_Markup_Token $token
  417. * @return void
  418. */
  419. protected function _removeFromSearchedStoppers(Zend_Markup_Token $token)
  420. {
  421. $this->_checkTagDeclaration($token->getName());
  422. foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) {
  423. --$this->_searchedStoppers[$stopper];
  424. }
  425. }
  426. }