Bbcode.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Markup
  17. * @subpackage Parser
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * @see Zend_Markup_TokenList
  24. */
  25. require_once 'Zend/Markup/TokenList.php';
  26. /**
  27. * @see Zend_Markup_Parser_ParserInterface
  28. */
  29. require_once 'Zend/Markup/Parser/ParserInterface.php';
  30. /**
  31. * @category Zend
  32. * @package Zend_Markup
  33. * @subpackage Parser
  34. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. */
  37. class Zend_Markup_Parser_Bbcode implements Zend_Markup_Parser_ParserInterface
  38. {
  39. const TAG_START = '[';
  40. const TAG_END = ']';
  41. const NEWLINE = "[newline\0]";
  42. // there is a parsing difference between the default tags and single tags
  43. const TYPE_DEFAULT = 'default';
  44. const TYPE_SINGLE = 'single';
  45. /**
  46. * Token tree
  47. *
  48. * @var Zend_Markup_TokenList
  49. */
  50. protected $_tree;
  51. /**
  52. * Current token
  53. *
  54. * @var Zend_Markup_Token
  55. */
  56. protected $_current;
  57. /**
  58. * Source to tokenize
  59. *
  60. * @var string
  61. */
  62. protected $_value = '';
  63. /**
  64. * Length of the value
  65. *
  66. * @var int
  67. */
  68. protected $_valueLen = 0;
  69. /**
  70. * Current pointer
  71. *
  72. * @var int
  73. */
  74. protected $_pointer = 0;
  75. /**
  76. * The buffer
  77. *
  78. * @var string
  79. */
  80. protected $_buffer = '';
  81. /**
  82. * The current tag we are working on
  83. *
  84. * @var string
  85. */
  86. protected $_tag = '';
  87. /**
  88. * The current tag name
  89. *
  90. * @var string
  91. */
  92. protected $_name;
  93. /**
  94. * Attributes of the tag we are working on
  95. *
  96. * @var array
  97. */
  98. protected $_attributes = array();
  99. /**
  100. * Stoppers that we are searching for
  101. *
  102. * @var array
  103. */
  104. protected $_searchedStoppers = array();
  105. /**
  106. * Tag information
  107. *
  108. * @var array
  109. */
  110. protected $_tags = array(
  111. 'Zend_Markup_Root' => array(
  112. 'type' => self::TYPE_DEFAULT,
  113. 'stoppers' => array(),
  114. ),
  115. '*' => array(
  116. 'type' => self::TYPE_DEFAULT,
  117. 'stoppers' => array(self::NEWLINE),
  118. ),
  119. 'hr' => array(
  120. 'type' => self::TYPE_SINGLE,
  121. 'stoppers' => array(),
  122. ),
  123. );
  124. /**
  125. * Prepare the parsing of a bbcode string, the real parsing is done in {@link _parse()}
  126. *
  127. * @param string $value
  128. * @return Zend_Markup_TokenList
  129. */
  130. public function parse($value)
  131. {
  132. if (!is_string($value)) {
  133. /**
  134. * @see Zend_Markup_Parser_Exception
  135. */
  136. require_once 'Zend/Markup/Parser/Exception.php';
  137. throw new Zend_Markup_Parser_Exception('Value to parse should be a string.');
  138. }
  139. if (empty($value)) {
  140. /**
  141. * @see Zend_Markup_Parser_Exception
  142. */
  143. require_once 'Zend/Markup/Parser/Exception.php';
  144. throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.');
  145. }
  146. // first make we only have LF newlines
  147. $this->_value = str_replace(array("\r\n", "\r", "\n"), self::NEWLINE, $value);
  148. // initialize variables
  149. $this->_tree = new Zend_Markup_TokenList();
  150. $this->_valueLen = strlen($this->_value);
  151. $this->_pointer = 0;
  152. $this->_buffer = '';
  153. $this->_temp = array();
  154. $this->_searchedStoppers = array();
  155. $this->_current = new Zend_Markup_Token(
  156. '',
  157. Zend_Markup_Token::TYPE_NONE,
  158. 'Zend_Markup_Root'
  159. );
  160. $this->_tree->addChild($this->_current);
  161. // start the parsing process
  162. $this->_parse();
  163. return $this->_tree;
  164. }
  165. /**
  166. * Parse a bbcode string
  167. *
  168. * @return void
  169. */
  170. protected function _parse()
  171. {
  172. // just keep looping until the parsing is done
  173. do {
  174. $this->_parseTagStart();
  175. } while ($this->_pointer < $this->_valueLen);
  176. if (!empty($this->_buffer)) {
  177. // no tag start found, add the buffer to the current tag and stop parsing
  178. $token = new Zend_Markup_Token(
  179. $this->_buffer,
  180. Zend_Markup_Token::TYPE_NONE,
  181. $this->_current
  182. );
  183. $this->_current->addChild($token);
  184. $this->_buffer = '';
  185. }
  186. }
  187. /**
  188. * Parse the start of a tag
  189. *
  190. * @return void
  191. */
  192. protected function _parseTagStart()
  193. {
  194. $start = strpos($this->_value, self::TAG_START, $this->_pointer);
  195. if ($start === false) {
  196. if ($this->_valueLen > $this->_pointer) {
  197. $this->_buffer .= substr($this->_value, $this->_pointer);
  198. $this->_pointer = $this->_valueLen;
  199. }
  200. return;
  201. }
  202. // add the prepended text to the buffer
  203. if ($start > $this->_pointer) {
  204. $this->_buffer .= substr($this->_value, $this->_pointer, $start - $this->_pointer);
  205. }
  206. $this->_pointer = $start;
  207. // we have the start of this tag, now we need its name
  208. $this->_parseTag();
  209. }
  210. /**
  211. * Get the tag information
  212. *
  213. * @return void
  214. */
  215. protected function _parseTag()
  216. {
  217. // get the tag's name
  218. $len = strcspn($this->_value, " \n\r\t=" . self::TAG_END, $this->_pointer + 1);
  219. $this->_name = substr($this->_value, $this->_pointer + 1, $len);
  220. $this->_tag = self::TAG_START . $this->_name;
  221. $this->_pointer += $len + 1;
  222. if (!isset($this->_value[$this->_pointer])) {
  223. // this is not a tag
  224. $this->_buffer .= $this->_tag;
  225. return;
  226. }
  227. switch ($this->_value[$this->_pointer]) {
  228. case self::TAG_END:
  229. // ending the tag
  230. $this->_tag .= self::TAG_END;
  231. $this->_endTag();
  232. return;
  233. break;
  234. case '=':
  235. // we are dealing with an name-attribute
  236. $this->_tag .= '=';
  237. ++$this->_pointer;
  238. $value = $this->_parseAttributeValue();
  239. if (false === $value) {
  240. // this isn't a tag, just end it right here, right now
  241. $this->_buffer .= $this->_tag;
  242. return;
  243. }
  244. $this->_attributes[$this->_name] = $value;
  245. break;
  246. default:
  247. // the tag didn't end, so get the rest of the tag.
  248. break;
  249. }
  250. $this->_parseAttributes();
  251. }
  252. /**
  253. * Parse attributes
  254. *
  255. * @return void
  256. */
  257. protected function _parseAttributes()
  258. {
  259. while ($this->_pointer < $this->_valueLen) {
  260. // we are looping until we find something
  261. switch ($this->_value[$this->_pointer]) {
  262. case self::TAG_END:
  263. // end the tag and return
  264. $this->_tag .= self::TAG_END;
  265. $this->_endTag();
  266. return;
  267. break;
  268. default:
  269. // just go further
  270. if (ctype_space($this->_value[$this->_pointer])) {
  271. //@TODO: implement this speedhack later
  272. $len = strspn($this->_value, " \n\r\t", $this->_pointer + 1);
  273. $this->_tag .= substr($this->_value, $this->_pointer, $len - 1);
  274. $this->_tag .= $this->_value[$this->_pointer];
  275. ++$this->_pointer;
  276. } else {
  277. $this->_parseAttribute();
  278. }
  279. break;
  280. }
  281. }
  282. // end tags without ']'
  283. $this->_endTag();
  284. }
  285. /**
  286. * Parse an attribute
  287. *
  288. * @return void
  289. */
  290. protected function _parseAttribute()
  291. {
  292. // first find the =, or a ] when the attribute is empty
  293. $len = strcspn($this->_value, "=" . self::TAG_END, $this->_pointer);
  294. // get the name and value
  295. $name = substr($this->_value, $this->_pointer, $len);
  296. $this->_pointer += $len;
  297. if (isset($this->_value[$this->_pointer]) && ($this->_value[$this->_pointer] == '=')) {
  298. ++$this->_pointer;
  299. // ending attribute
  300. $this->_tag .= $name . '=';
  301. $value = $this->_parseAttributeValue();
  302. $this->_attributes[trim($name)] = $value;
  303. } else {
  304. // empty attribute
  305. $this->_tag .= $name;
  306. }
  307. }
  308. /**
  309. * Parse the value from an attribute
  310. *
  311. * @return string
  312. */
  313. protected function _parseAttributeValue()
  314. {
  315. //$delimiter = $this->_value[$this->_pointer];
  316. $delimiter = substr($this->_value, $this->_pointer, 1);
  317. if (($delimiter == "'") || ($delimiter == '"')) {
  318. $delimiter = $this->_value[$this->_pointer];
  319. // just find the delimiter
  320. $len = strcspn($this->_value, $delimiter, $this->_pointer + 1);
  321. $value = substr($this->_value, $this->_pointer + 1, $len);
  322. if ($this->_pointer + $len + 1 >= $this->_valueLen) {
  323. // i think we just ran out of gas....
  324. $this->_pointer++;
  325. $this->_tag .= $delimiter;
  326. return false;
  327. }
  328. $this->_pointer += $len + 2;
  329. $this->_tag .= $delimiter . $value . $delimiter;
  330. } else {
  331. // find a tag end or a whitespace
  332. $len = strcspn($this->_value, " \n\r\t" . self::TAG_END, $this->_pointer);
  333. $value = substr($this->_value, $this->_pointer, $len);
  334. $this->_pointer += $len;
  335. $this->_tag .= $value;
  336. }
  337. return $value;
  338. }
  339. /**
  340. * End the found tag
  341. *
  342. * @return void
  343. */
  344. protected function _endTag()
  345. {
  346. // rule out empty tags (just '[]')
  347. if (strlen($this->_name) == 0) {
  348. $this->_buffer .= $this->_tag;
  349. $this->_pointer++;
  350. return;
  351. }
  352. // first check if the tag is a newline or a stopper without a tag
  353. if (!$this->_isStopper($this->_tag, true)) {
  354. if ($this->_tag == self::NEWLINE) {
  355. $this->_buffer .= "\n";
  356. ++$this->_pointer;
  357. return;
  358. } elseif ($this->_name[0] == '/') {
  359. $this->_buffer .= $this->_tag;
  360. ++$this->_pointer;
  361. return;
  362. }
  363. }
  364. // first add the buffer as token and clear the buffer
  365. if (!empty($this->_buffer)) {
  366. $token = new Zend_Markup_Token(
  367. $this->_buffer,
  368. Zend_Markup_Token::TYPE_NONE,
  369. '',
  370. array(),
  371. $this->_current
  372. );
  373. $this->_current->addChild($token);
  374. $this->_buffer = '';
  375. }
  376. $attributes = $this->_attributes;
  377. // check if this tag is a stopper
  378. if ($this->_isStopper($this->_tag)) {
  379. // we got a stopper, end the current tag and get back to the parent
  380. $this->_current->setStopper($this->_tag);
  381. $this->_removeFromSearchedStoppers($this->_current);
  382. $this->_current = $this->_current->getParent();
  383. } elseif (!empty($this->_searchedStoppers[$this->_tag])) {
  384. // hell has broken loose, these stoppers are searched somewere
  385. // lower in the tree
  386. $oldItems = array();
  387. while (!in_array($this->_tag, $this->_tags[$this->_current->getName()]['stoppers'])) {
  388. $oldItems[] = clone $this->_current;
  389. $this->_current = $this->_current->getParent();
  390. }
  391. // ladies and gentlemen... WE GOT HIM!
  392. $this->_current->setStopper($this->_tag);
  393. $this->_removeFromSearchedStoppers($this->_current);
  394. $this->_current = $this->_current->getParent();
  395. // add those old items again
  396. foreach (array_reverse($oldItems) as $token) {
  397. /* @var $token Zend_Markup_Token */
  398. $this->_current->addChild($token);
  399. $token->setParent($this->_current);
  400. $this->_current = $token;
  401. }
  402. } elseif ($this->_getType($this->_name) == self::TYPE_SINGLE) {
  403. $token = new Zend_Markup_Token(
  404. $this->_tag,
  405. Zend_Markup_Token::TYPE_TAG,
  406. $this->_name,
  407. $attributes,
  408. $this->_current
  409. );
  410. $this->_current->addChild($token);
  411. } else {
  412. // add the tag and jump into it
  413. $token = new Zend_Markup_Token(
  414. $this->_tag,
  415. Zend_Markup_Token::TYPE_TAG,
  416. $this->_name,
  417. $attributes,
  418. $this->_current
  419. );
  420. $this->_current->addChild($token);
  421. $this->_current = $token;
  422. $this->_addToSearchedStoppers($token);
  423. }
  424. ++$this->_pointer;
  425. $this->_attributes = array();
  426. }
  427. /**
  428. * Check the tag's type
  429. *
  430. * @param string $name
  431. * @return string
  432. */
  433. protected function _getType($name)
  434. {
  435. // first check if the current tag has a row for this
  436. if (!isset($this->_tags[$name])) {
  437. $this->_tags[$name] = array(
  438. 'type' => self::TYPE_DEFAULT,
  439. 'stoppers' => array(
  440. self::TAG_START . '/' . $name . self::TAG_END,
  441. self::TAG_START . '/' . self::TAG_END
  442. )
  443. );
  444. }
  445. return $this->_tags[$name]['type'];
  446. }
  447. /**
  448. * Check if the tag is a stopper
  449. *
  450. * @param string $tag
  451. * @return bool
  452. */
  453. protected function _isStopper($tag, $searched = false)
  454. {
  455. // first check if the current tag has registered stoppers
  456. if (!isset($this->_tags[$this->_current->getName()])) {
  457. $this->_tags[$this->_current->getName()] = array(
  458. 'type' => self::TYPE_DEFAULT,
  459. 'stoppers' => array(
  460. self::TAG_START . '/' . $this->_current->getName() . self::TAG_END,
  461. self::TAG_START . '/' . self::TAG_END
  462. )
  463. );
  464. }
  465. // and now check if it is a stopper
  466. $tags = $this->_tags[$this->_current->getName()]['stoppers'];
  467. if (in_array($tag, $tags)
  468. || (!empty($this->_searchedStoppers[$this->_tag]) && $searched)
  469. ) {
  470. return true;
  471. }
  472. return false;
  473. }
  474. /**
  475. * Add to searched stoppers
  476. *
  477. * @param Zend_Markup_Token $token
  478. * @return void
  479. */
  480. protected function _addToSearchedStoppers(Zend_Markup_Token $token)
  481. {
  482. if (!isset($this->_tags[$token->getName()])) {
  483. $this->_tags[$token->getName()] = array(
  484. 'type' => self::TYPE_DEFAULT,
  485. 'stoppers' => array(
  486. self::TAG_START . '/' . $token->getName() . self::TAG_END,
  487. self::TAG_START . '/' . self::TAG_END
  488. )
  489. );
  490. }
  491. foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) {
  492. if (!isset($this->_searchedStoppers[$stopper])) {
  493. $this->_searchedStoppers[$stopper] = 0;
  494. }
  495. ++$this->_searchedStoppers[$stopper];
  496. }
  497. }
  498. /**
  499. * Remove from searched stoppers
  500. *
  501. * @param Zend_Markup_Token $token
  502. * @return void
  503. */
  504. protected function _removeFromSearchedStoppers(Zend_Markup_Token $token)
  505. {
  506. foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) {
  507. --$this->_searchedStoppers[$stopper];
  508. }
  509. }
  510. }