Decoder.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Json
  17. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. */
  20. /**
  21. * @see Zend_Json
  22. */
  23. require_once 'Zend/Json.php';
  24. /**
  25. * Decode JSON encoded string to PHP variable constructs
  26. *
  27. * @category Zend
  28. * @package Zend_Json
  29. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  30. * @license http://framework.zend.com/license/new-bsd New BSD License
  31. */
  32. class Zend_Json_Decoder
  33. {
  34. /**
  35. * Parse tokens used to decode the JSON object. These are not
  36. * for public consumption, they are just used internally to the
  37. * class.
  38. */
  39. const EOF = 0;
  40. const DATUM = 1;
  41. const LBRACE = 2;
  42. const LBRACKET = 3;
  43. const RBRACE = 4;
  44. const RBRACKET = 5;
  45. const COMMA = 6;
  46. const COLON = 7;
  47. /**
  48. * Use to maintain a "pointer" to the source being decoded
  49. *
  50. * @var string
  51. */
  52. protected $_source;
  53. /**
  54. * Caches the source length
  55. *
  56. * @var int
  57. */
  58. protected $_sourceLength;
  59. /**
  60. * The offset within the souce being decoded
  61. *
  62. * @var int
  63. *
  64. */
  65. protected $_offset;
  66. /**
  67. * The current token being considered in the parser cycle
  68. *
  69. * @var int
  70. */
  71. protected $_token;
  72. /**
  73. * Flag indicating how objects should be decoded
  74. *
  75. * @var int
  76. * @access protected
  77. */
  78. protected $_decodeType;
  79. /**
  80. * Constructor
  81. *
  82. * @param string $source String source to decode
  83. * @param int $decodeType How objects should be decoded -- see
  84. * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for
  85. * valid values
  86. * @return void
  87. */
  88. protected function __construct($source, $decodeType)
  89. {
  90. // Set defaults
  91. $this->_source = self::decodeUnicodeString($source);
  92. $this->_sourceLength = strlen($this->_source);
  93. $this->_token = self::EOF;
  94. $this->_offset = 0;
  95. // Normalize and set $decodeType
  96. if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT)))
  97. {
  98. $decodeType = Zend_Json::TYPE_ARRAY;
  99. }
  100. $this->_decodeType = $decodeType;
  101. // Set pointer at first token
  102. $this->_getNextToken();
  103. }
  104. /**
  105. * Decode a JSON source string
  106. *
  107. * Decodes a JSON encoded string. The value returned will be one of the
  108. * following:
  109. * - integer
  110. * - float
  111. * - boolean
  112. * - null
  113. * - StdClass
  114. * - array
  115. * - array of one or more of the above types
  116. *
  117. * By default, decoded objects will be returned as associative arrays; to
  118. * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to
  119. * the $objectDecodeType parameter.
  120. *
  121. * Throws a Zend_Json_Exception if the source string is null.
  122. *
  123. * @static
  124. * @access public
  125. * @param string $source String to be decoded
  126. * @param int $objectDecodeType How objects should be decoded; should be
  127. * either or {@link Zend_Json::TYPE_ARRAY} or
  128. * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
  129. * @return mixed
  130. * @throws Zend_Json_Exception
  131. */
  132. public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY)
  133. {
  134. if (null === $source) {
  135. require_once 'Zend/Json/Exception.php';
  136. throw new Zend_Json_Exception('Must specify JSON encoded source for decoding');
  137. } elseif (!is_string($source)) {
  138. require_once 'Zend/Json/Exception.php';
  139. throw new Zend_Json_Exception('Can only decode JSON encoded strings');
  140. }
  141. $decoder = new self($source, $objectDecodeType);
  142. return $decoder->_decodeValue();
  143. }
  144. /**
  145. * Recursive driving rountine for supported toplevel tops
  146. *
  147. * @return mixed
  148. */
  149. protected function _decodeValue()
  150. {
  151. switch ($this->_token) {
  152. case self::DATUM:
  153. $result = $this->_tokenValue;
  154. $this->_getNextToken();
  155. return($result);
  156. break;
  157. case self::LBRACE:
  158. return($this->_decodeObject());
  159. break;
  160. case self::LBRACKET:
  161. return($this->_decodeArray());
  162. break;
  163. default:
  164. return null;
  165. break;
  166. }
  167. }
  168. /**
  169. * Decodes an object of the form:
  170. * { "attribute: value, "attribute2" : value,...}
  171. *
  172. * If Zend_Json_Encoder was used to encode the original object then
  173. * a special attribute called __className which specifies a class
  174. * name that should wrap the data contained within the encoded source.
  175. *
  176. * Decodes to either an array or StdClass object, based on the value of
  177. * {@link $_decodeType}. If invalid $_decodeType present, returns as an
  178. * array.
  179. *
  180. * @return array|StdClass
  181. */
  182. protected function _decodeObject()
  183. {
  184. $members = array();
  185. $tok = $this->_getNextToken();
  186. while ($tok && $tok != self::RBRACE) {
  187. if ($tok != self::DATUM || ! is_string($this->_tokenValue)) {
  188. require_once 'Zend/Json/Exception.php';
  189. throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source);
  190. }
  191. $key = $this->_tokenValue;
  192. $tok = $this->_getNextToken();
  193. if ($tok != self::COLON) {
  194. require_once 'Zend/Json/Exception.php';
  195. throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source);
  196. }
  197. $tok = $this->_getNextToken();
  198. $members[$key] = $this->_decodeValue();
  199. $tok = $this->_token;
  200. if ($tok == self::RBRACE) {
  201. break;
  202. }
  203. if ($tok != self::COMMA) {
  204. require_once 'Zend/Json/Exception.php';
  205. throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source);
  206. }
  207. $tok = $this->_getNextToken();
  208. }
  209. switch ($this->_decodeType) {
  210. case Zend_Json::TYPE_OBJECT:
  211. // Create new StdClass and populate with $members
  212. $result = new StdClass();
  213. foreach ($members as $key => $value) {
  214. $result->$key = $value;
  215. }
  216. break;
  217. case Zend_Json::TYPE_ARRAY:
  218. default:
  219. $result = $members;
  220. break;
  221. }
  222. $this->_getNextToken();
  223. return $result;
  224. }
  225. /**
  226. * Decodes a JSON array format:
  227. * [element, element2,...,elementN]
  228. *
  229. * @return array
  230. */
  231. protected function _decodeArray()
  232. {
  233. $result = array();
  234. $starttok = $tok = $this->_getNextToken(); // Move past the '['
  235. $index = 0;
  236. while ($tok && $tok != self::RBRACKET) {
  237. $result[$index++] = $this->_decodeValue();
  238. $tok = $this->_token;
  239. if ($tok == self::RBRACKET || !$tok) {
  240. break;
  241. }
  242. if ($tok != self::COMMA) {
  243. require_once 'Zend/Json/Exception.php';
  244. throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source);
  245. }
  246. $tok = $this->_getNextToken();
  247. }
  248. $this->_getNextToken();
  249. return($result);
  250. }
  251. /**
  252. * Removes whitepsace characters from the source input
  253. */
  254. protected function _eatWhitespace()
  255. {
  256. if (preg_match(
  257. '/([\t\b\f\n\r ])*/s',
  258. $this->_source,
  259. $matches,
  260. PREG_OFFSET_CAPTURE,
  261. $this->_offset)
  262. && $matches[0][1] == $this->_offset)
  263. {
  264. $this->_offset += strlen($matches[0][0]);
  265. }
  266. }
  267. /**
  268. * Retrieves the next token from the source stream
  269. *
  270. * @return int Token constant value specified in class definition
  271. */
  272. protected function _getNextToken()
  273. {
  274. $this->_token = self::EOF;
  275. $this->_tokenValue = null;
  276. $this->_eatWhitespace();
  277. if ($this->_offset >= $this->_sourceLength) {
  278. return(self::EOF);
  279. }
  280. $str = $this->_source;
  281. $str_length = $this->_sourceLength;
  282. $i = $this->_offset;
  283. $start = $i;
  284. switch ($str{$i}) {
  285. case '{':
  286. $this->_token = self::LBRACE;
  287. break;
  288. case '}':
  289. $this->_token = self::RBRACE;
  290. break;
  291. case '[':
  292. $this->_token = self::LBRACKET;
  293. break;
  294. case ']':
  295. $this->_token = self::RBRACKET;
  296. break;
  297. case ',':
  298. $this->_token = self::COMMA;
  299. break;
  300. case ':':
  301. $this->_token = self::COLON;
  302. break;
  303. case '"':
  304. $result = '';
  305. do {
  306. $i++;
  307. if ($i >= $str_length) {
  308. break;
  309. }
  310. $chr = $str{$i};
  311. if ($chr == '\\') {
  312. $i++;
  313. if ($i >= $str_length) {
  314. break;
  315. }
  316. $chr = $str{$i};
  317. switch ($chr) {
  318. case '"' :
  319. $result .= '"';
  320. break;
  321. case '\\':
  322. $result .= '\\';
  323. break;
  324. case '/' :
  325. $result .= '/';
  326. break;
  327. case 'b' :
  328. $result .= chr(8);
  329. break;
  330. case 'f' :
  331. $result .= chr(12);
  332. break;
  333. case 'n' :
  334. $result .= chr(10);
  335. break;
  336. case 'r' :
  337. $result .= chr(13);
  338. break;
  339. case 't' :
  340. $result .= chr(9);
  341. break;
  342. case '\'' :
  343. $result .= '\'';
  344. break;
  345. default:
  346. require_once 'Zend/Json/Exception.php';
  347. throw new Zend_Json_Exception("Illegal escape "
  348. . "sequence '" . $chr . "'");
  349. }
  350. } elseif($chr == '"') {
  351. break;
  352. } else {
  353. $result .= $chr;
  354. }
  355. } while ($i < $str_length);
  356. $this->_token = self::DATUM;
  357. //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1);
  358. $this->_tokenValue = $result;
  359. break;
  360. case 't':
  361. if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") {
  362. $this->_token = self::DATUM;
  363. }
  364. $this->_tokenValue = true;
  365. $i += 3;
  366. break;
  367. case 'f':
  368. if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") {
  369. $this->_token = self::DATUM;
  370. }
  371. $this->_tokenValue = false;
  372. $i += 4;
  373. break;
  374. case 'n':
  375. if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") {
  376. $this->_token = self::DATUM;
  377. }
  378. $this->_tokenValue = NULL;
  379. $i += 3;
  380. break;
  381. }
  382. if ($this->_token != self::EOF) {
  383. $this->_offset = $i + 1; // Consume the last token character
  384. return($this->_token);
  385. }
  386. $chr = $str{$i};
  387. if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
  388. if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
  389. $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
  390. $datum = $matches[0][0];
  391. if (is_numeric($datum)) {
  392. if (preg_match('/^0\d+$/', $datum)) {
  393. require_once 'Zend/Json/Exception.php';
  394. throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)");
  395. } else {
  396. $val = intval($datum);
  397. $fVal = floatval($datum);
  398. $this->_tokenValue = ($val == $fVal ? $val : $fVal);
  399. }
  400. } else {
  401. require_once 'Zend/Json/Exception.php';
  402. throw new Zend_Json_Exception("Illegal number format: $datum");
  403. }
  404. $this->_token = self::DATUM;
  405. $this->_offset = $start + strlen($datum);
  406. }
  407. } else {
  408. require_once 'Zend/Json/Exception.php';
  409. throw new Zend_Json_Exception('Illegal Token');
  410. }
  411. return($this->_token);
  412. }
  413. /**
  414. * Decode Unicode Characters from \u0000 ASCII syntax.
  415. *
  416. * This algorithm was originally developed for the
  417. * Solar Framework by Paul M. Jones
  418. *
  419. * @link http://solarphp.com/
  420. * @link http://svn.solarphp.com/core/trunk/Solar/Json.php
  421. * @param string $value
  422. * @return string
  423. */
  424. public static function decodeUnicodeString($chrs)
  425. {
  426. $delim = substr($chrs, 0, 1);
  427. $utf8 = '';
  428. $strlen_chrs = strlen($chrs);
  429. for($i = 0; $i < $strlen_chrs; $i++) {
  430. $substr_chrs_c_2 = substr($chrs, $i, 2);
  431. $ord_chrs_c = ord($chrs[$i]);
  432. switch (true) {
  433. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
  434. // single, escaped unicode character
  435. $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
  436. . chr(hexdec(substr($chrs, ($i + 4), 2)));
  437. $utf8 .= self::_utf162utf8($utf16);
  438. $i += 5;
  439. break;
  440. case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
  441. $utf8 .= $chrs{$i};
  442. break;
  443. case ($ord_chrs_c & 0xE0) == 0xC0:
  444. // characters U-00000080 - U-000007FF, mask 110XXXXX
  445. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  446. $utf8 .= substr($chrs, $i, 2);
  447. ++$i;
  448. break;
  449. case ($ord_chrs_c & 0xF0) == 0xE0:
  450. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  451. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  452. $utf8 .= substr($chrs, $i, 3);
  453. $i += 2;
  454. break;
  455. case ($ord_chrs_c & 0xF8) == 0xF0:
  456. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  457. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  458. $utf8 .= substr($chrs, $i, 4);
  459. $i += 3;
  460. break;
  461. case ($ord_chrs_c & 0xFC) == 0xF8:
  462. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  463. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  464. $utf8 .= substr($chrs, $i, 5);
  465. $i += 4;
  466. break;
  467. case ($ord_chrs_c & 0xFE) == 0xFC:
  468. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  469. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  470. $utf8 .= substr($chrs, $i, 6);
  471. $i += 5;
  472. break;
  473. }
  474. }
  475. return $utf8;
  476. }
  477. /**
  478. * Convert a string from one UTF-16 char to one UTF-8 char.
  479. *
  480. * Normally should be handled by mb_convert_encoding, but
  481. * provides a slower PHP-only method for installations
  482. * that lack the multibye string extension.
  483. *
  484. * This method is from the Solar Framework by Paul M. Jones
  485. *
  486. * @link http://solarphp.com
  487. * @param string $utf16 UTF-16 character
  488. * @return string UTF-8 character
  489. */
  490. protected static function _utf162utf8($utf16)
  491. {
  492. // Check for mb extension otherwise do by hand.
  493. if( function_exists('mb_convert_encoding') ) {
  494. return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
  495. }
  496. $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
  497. switch (true) {
  498. case ((0x7F & $bytes) == $bytes):
  499. // this case should never be reached, because we are in ASCII range
  500. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  501. return chr(0x7F & $bytes);
  502. case (0x07FF & $bytes) == $bytes:
  503. // return a 2-byte UTF-8 character
  504. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  505. return chr(0xC0 | (($bytes >> 6) & 0x1F))
  506. . chr(0x80 | ($bytes & 0x3F));
  507. case (0xFFFF & $bytes) == $bytes:
  508. // return a 3-byte UTF-8 character
  509. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  510. return chr(0xE0 | (($bytes >> 12) & 0x0F))
  511. . chr(0x80 | (($bytes >> 6) & 0x3F))
  512. . chr(0x80 | ($bytes & 0x3F));
  513. }
  514. // ignoring UTF-32 for now, sorry
  515. return '';
  516. }
  517. }