Memory.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Storage
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Storage_File */
  22. require_once 'Zend/Search/Lucene/Storage/File.php';
  23. /**
  24. * @category Zend
  25. * @package Zend_Search_Lucene
  26. * @subpackage Storage
  27. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  28. * @license http://framework.zend.com/license/new-bsd New BSD License
  29. */
  30. class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_File
  31. {
  32. /**
  33. * FileData
  34. *
  35. * @var string
  36. */
  37. private $_data;
  38. /**
  39. * File Position
  40. *
  41. * @var integer
  42. */
  43. private $_position = 0;
  44. /**
  45. * Object constractor
  46. *
  47. * @param string $data
  48. */
  49. public function __construct($data)
  50. {
  51. $this->_data = $data;
  52. }
  53. /**
  54. * Reads $length number of bytes at the current position in the
  55. * file and advances the file pointer.
  56. *
  57. * @param integer $length
  58. * @return string
  59. */
  60. protected function _fread($length = 1)
  61. {
  62. $returnValue = substr($this->_data, $this->_position, $length);
  63. $this->_position += $length;
  64. return $returnValue;
  65. }
  66. /**
  67. * Sets the file position indicator and advances the file pointer.
  68. * The new position, measured in bytes from the beginning of the file,
  69. * is obtained by adding offset to the position specified by whence,
  70. * whose values are defined as follows:
  71. * SEEK_SET - Set position equal to offset bytes.
  72. * SEEK_CUR - Set position to current location plus offset.
  73. * SEEK_END - Set position to end-of-file plus offset. (To move to
  74. * a position before the end-of-file, you need to pass a negative value
  75. * in offset.)
  76. * Upon success, returns 0; otherwise, returns -1
  77. *
  78. * @param integer $offset
  79. * @param integer $whence
  80. * @return integer
  81. */
  82. public function seek($offset, $whence=SEEK_SET)
  83. {
  84. switch ($whence) {
  85. case SEEK_SET:
  86. $this->_position = $offset;
  87. break;
  88. case SEEK_CUR:
  89. $this->_position += $offset;
  90. break;
  91. case SEEK_END:
  92. $this->_position = strlen($this->_data);
  93. $this->_position += $offset;
  94. break;
  95. default:
  96. break;
  97. }
  98. }
  99. /**
  100. * Get file position.
  101. *
  102. * @return integer
  103. */
  104. public function tell()
  105. {
  106. return $this->_position;
  107. }
  108. /**
  109. * Flush output.
  110. *
  111. * Returns true on success or false on failure.
  112. *
  113. * @return boolean
  114. */
  115. public function flush()
  116. {
  117. // Do nothing
  118. return true;
  119. }
  120. /**
  121. * Writes $length number of bytes (all, if $length===null) to the end
  122. * of the file.
  123. *
  124. * @param string $data
  125. * @param integer $length
  126. */
  127. protected function _fwrite($data, $length=null)
  128. {
  129. // We do not need to check if file position points to the end of "file".
  130. // Only append operation is supported now
  131. if ($length !== null) {
  132. $this->_data .= substr($data, 0, $length);
  133. } else {
  134. $this->_data .= $data;
  135. }
  136. $this->_position = strlen($this->_data);
  137. }
  138. /**
  139. * Lock file
  140. *
  141. * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
  142. *
  143. * @param integer $lockType
  144. * @return boolean
  145. */
  146. public function lock($lockType, $nonBlockinLock = false)
  147. {
  148. // Memory files can't be shared
  149. // do nothing
  150. return true;
  151. }
  152. /**
  153. * Unlock file
  154. */
  155. public function unlock()
  156. {
  157. // Memory files can't be shared
  158. // do nothing
  159. }
  160. /**
  161. * Reads a byte from the current position in the file
  162. * and advances the file pointer.
  163. *
  164. * @return integer
  165. */
  166. public function readByte()
  167. {
  168. return ord($this->_data[$this->_position++]);
  169. }
  170. /**
  171. * Writes a byte to the end of the file.
  172. *
  173. * @param integer $byte
  174. */
  175. public function writeByte($byte)
  176. {
  177. // We do not need to check if file position points to the end of "file".
  178. // Only append operation is supported now
  179. $this->_data .= chr($byte);
  180. $this->_position = strlen($this->_data);
  181. return 1;
  182. }
  183. /**
  184. * Read num bytes from the current position in the file
  185. * and advances the file pointer.
  186. *
  187. * @param integer $num
  188. * @return string
  189. */
  190. public function readBytes($num)
  191. {
  192. $returnValue = substr($this->_data, $this->_position, $num);
  193. $this->_position += $num;
  194. return $returnValue;
  195. }
  196. /**
  197. * Writes num bytes of data (all, if $num===null) to the end
  198. * of the string.
  199. *
  200. * @param string $data
  201. * @param integer $num
  202. */
  203. public function writeBytes($data, $num=null)
  204. {
  205. // We do not need to check if file position points to the end of "file".
  206. // Only append operation is supported now
  207. if ($num !== null) {
  208. $this->_data .= substr($data, 0, $num);
  209. } else {
  210. $this->_data .= $data;
  211. }
  212. $this->_position = strlen($this->_data);
  213. }
  214. /**
  215. * Reads an integer from the current position in the file
  216. * and advances the file pointer.
  217. *
  218. * @return integer
  219. */
  220. public function readInt()
  221. {
  222. $str = substr($this->_data, $this->_position, 4);
  223. $this->_position += 4;
  224. return ord($str[0]) << 24 |
  225. ord($str[1]) << 16 |
  226. ord($str[2]) << 8 |
  227. ord($str[3]);
  228. }
  229. /**
  230. * Writes an integer to the end of file.
  231. *
  232. * @param integer $value
  233. */
  234. public function writeInt($value)
  235. {
  236. // We do not need to check if file position points to the end of "file".
  237. // Only append operation is supported now
  238. settype($value, 'integer');
  239. $this->_data .= chr($value>>24 & 0xFF) .
  240. chr($value>>16 & 0xFF) .
  241. chr($value>>8 & 0xFF) .
  242. chr($value & 0xFF);
  243. $this->_position = strlen($this->_data);
  244. }
  245. /**
  246. * Returns a long integer from the current position in the file
  247. * and advances the file pointer.
  248. *
  249. * @return integer
  250. * @throws Zend_Search_Lucene_Exception
  251. */
  252. public function readLong()
  253. {
  254. /**
  255. * Check, that we work in 64-bit mode.
  256. * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  257. */
  258. if (PHP_INT_SIZE > 4) {
  259. $str = substr($this->_data, $this->_position, 8);
  260. $this->_position += 8;
  261. return ord($str[0]) << 56 |
  262. ord($str[1]) << 48 |
  263. ord($str[2]) << 40 |
  264. ord($str[3]) << 32 |
  265. ord($str[4]) << 24 |
  266. ord($str[5]) << 16 |
  267. ord($str[6]) << 8 |
  268. ord($str[7]);
  269. } else {
  270. return $this->readLong32Bit();
  271. }
  272. }
  273. /**
  274. * Writes long integer to the end of file
  275. *
  276. * @param integer $value
  277. * @throws Zend_Search_Lucene_Exception
  278. */
  279. public function writeLong($value)
  280. {
  281. // We do not need to check if file position points to the end of "file".
  282. // Only append operation is supported now
  283. /**
  284. * Check, that we work in 64-bit mode.
  285. * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  286. */
  287. if (PHP_INT_SIZE > 4) {
  288. settype($value, 'integer');
  289. $this->_data .= chr($value>>56 & 0xFF) .
  290. chr($value>>48 & 0xFF) .
  291. chr($value>>40 & 0xFF) .
  292. chr($value>>32 & 0xFF) .
  293. chr($value>>24 & 0xFF) .
  294. chr($value>>16 & 0xFF) .
  295. chr($value>>8 & 0xFF) .
  296. chr($value & 0xFF);
  297. } else {
  298. $this->writeLong32Bit($value);
  299. }
  300. $this->_position = strlen($this->_data);
  301. }
  302. /**
  303. * Returns a long integer from the current position in the file,
  304. * advances the file pointer and return it as float (for 32-bit platforms).
  305. *
  306. * @return integer|float
  307. * @throws Zend_Search_Lucene_Exception
  308. */
  309. public function readLong32Bit()
  310. {
  311. $wordHigh = $this->readInt();
  312. $wordLow = $this->readInt();
  313. if ($wordHigh & (int)0x80000000) {
  314. // It's a negative value since the highest bit is set
  315. if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
  316. return $wordLow;
  317. } else {
  318. require_once 'Zend/Search/Lucene/Exception.php';
  319. throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
  320. }
  321. }
  322. if ($wordLow < 0) {
  323. // Value is large than 0x7FFF FFFF. Represent low word as float.
  324. $wordLow &= 0x7FFFFFFF;
  325. $wordLow += (float)0x80000000;
  326. }
  327. if ($wordHigh == 0) {
  328. // Return value as integer if possible
  329. return $wordLow;
  330. }
  331. return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
  332. }
  333. /**
  334. * Writes long integer to the end of file (32-bit platforms implementation)
  335. *
  336. * @param integer|float $value
  337. * @throws Zend_Search_Lucene_Exception
  338. */
  339. public function writeLong32Bit($value)
  340. {
  341. if ($value < (int)0x80000000) {
  342. require_once 'Zend/Search/Lucene/Exception.php';
  343. throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
  344. }
  345. if ($value < 0) {
  346. $wordHigh = (int)0xFFFFFFFF;
  347. $wordLow = (int)$value;
  348. } else {
  349. $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
  350. $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
  351. if ($wordLow > 0x7FFFFFFF) {
  352. // Highest bit of low word is set. Translate it to the corresponding negative integer value
  353. $wordLow -= 0x80000000;
  354. $wordLow |= 0x80000000;
  355. }
  356. }
  357. $this->writeInt($wordHigh);
  358. $this->writeInt($wordLow);
  359. }
  360. /**
  361. * Returns a variable-length integer from the current
  362. * position in the file and advances the file pointer.
  363. *
  364. * @return integer
  365. */
  366. public function readVInt()
  367. {
  368. $nextByte = ord($this->_data[$this->_position++]);
  369. $val = $nextByte & 0x7F;
  370. for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
  371. $nextByte = ord($this->_data[$this->_position++]);
  372. $val |= ($nextByte & 0x7F) << $shift;
  373. }
  374. return $val;
  375. }
  376. /**
  377. * Writes a variable-length integer to the end of file.
  378. *
  379. * @param integer $value
  380. */
  381. public function writeVInt($value)
  382. {
  383. // We do not need to check if file position points to the end of "file".
  384. // Only append operation is supported now
  385. settype($value, 'integer');
  386. while ($value > 0x7F) {
  387. $this->_data .= chr( ($value & 0x7F)|0x80 );
  388. $value >>= 7;
  389. }
  390. $this->_data .= chr($value);
  391. $this->_position = strlen($this->_data);
  392. }
  393. /**
  394. * Reads a string from the current position in the file
  395. * and advances the file pointer.
  396. *
  397. * @return string
  398. */
  399. public function readString()
  400. {
  401. $strlen = $this->readVInt();
  402. if ($strlen == 0) {
  403. return '';
  404. } else {
  405. /**
  406. * This implementation supports only Basic Multilingual Plane
  407. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  408. * "supplementary characters" (characters whose code points are
  409. * greater than 0xFFFF)
  410. * Java 2 represents these characters as a pair of char (16-bit)
  411. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  412. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  413. * they are encoded as usual UTF-8 characters in six bytes.
  414. * Standard UTF-8 representation uses four bytes for supplementary
  415. * characters.
  416. */
  417. $str_val = substr($this->_data, $this->_position, $strlen);
  418. $this->_position += $strlen;
  419. for ($count = 0; $count < $strlen; $count++ ) {
  420. if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
  421. $addBytes = 1;
  422. if (ord($str_val[$count]) & 0x20 ) {
  423. $addBytes++;
  424. // Never used. Java2 doesn't encode strings in four bytes
  425. if (ord($str_val[$count]) & 0x10 ) {
  426. $addBytes++;
  427. }
  428. }
  429. $str_val .= substr($this->_data, $this->_position, $addBytes);
  430. $this->_position += $addBytes;
  431. $strlen += $addBytes;
  432. // Check for null character. Java2 encodes null character
  433. // in two bytes.
  434. if (ord($str_val[$count]) == 0xC0 &&
  435. ord($str_val[$count+1]) == 0x80 ) {
  436. $str_val[$count] = 0;
  437. $str_val = substr($str_val,0,$count+1)
  438. . substr($str_val,$count+2);
  439. }
  440. $count += $addBytes;
  441. }
  442. }
  443. return $str_val;
  444. }
  445. }
  446. /**
  447. * Writes a string to the end of file.
  448. *
  449. * @param string $str
  450. * @throws Zend_Search_Lucene_Exception
  451. */
  452. public function writeString($str)
  453. {
  454. /**
  455. * This implementation supports only Basic Multilingual Plane
  456. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  457. * "supplementary characters" (characters whose code points are
  458. * greater than 0xFFFF)
  459. * Java 2 represents these characters as a pair of char (16-bit)
  460. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  461. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  462. * they are encoded as usual UTF-8 characters in six bytes.
  463. * Standard UTF-8 representation uses four bytes for supplementary
  464. * characters.
  465. */
  466. // We do not need to check if file position points to the end of "file".
  467. // Only append operation is supported now
  468. // convert input to a string before iterating string characters
  469. settype($str, 'string');
  470. $chars = $strlen = strlen($str);
  471. $containNullChars = false;
  472. for ($count = 0; $count < $strlen; $count++ ) {
  473. /**
  474. * String is already in Java 2 representation.
  475. * We should only calculate actual string length and replace
  476. * \x00 by \xC0\x80
  477. */
  478. if ((ord($str[$count]) & 0xC0) == 0xC0) {
  479. $addBytes = 1;
  480. if (ord($str[$count]) & 0x20 ) {
  481. $addBytes++;
  482. // Never used. Java2 doesn't encode strings in four bytes
  483. // and we dont't support non-BMP characters
  484. if (ord($str[$count]) & 0x10 ) {
  485. $addBytes++;
  486. }
  487. }
  488. $chars -= $addBytes;
  489. if (ord($str[$count]) == 0 ) {
  490. $containNullChars = true;
  491. }
  492. $count += $addBytes;
  493. }
  494. }
  495. if ($chars < 0) {
  496. require_once 'Zend/Search/Lucene/Exception.php';
  497. throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
  498. }
  499. $this->writeVInt($chars);
  500. if ($containNullChars) {
  501. $this->_data .= str_replace($str, "\x00", "\xC0\x80");
  502. } else {
  503. $this->_data .= $str;
  504. }
  505. $this->_position = strlen($this->_data);
  506. }
  507. /**
  508. * Reads binary data from the current position in the file
  509. * and advances the file pointer.
  510. *
  511. * @return string
  512. */
  513. public function readBinary()
  514. {
  515. $length = $this->readVInt();
  516. $returnValue = substr($this->_data, $this->_position, $length);
  517. $this->_position += $length;
  518. return $returnValue;
  519. }
  520. }