File.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Storage
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /**
  22. * @category Zend
  23. * @package Zend_Search_Lucene
  24. * @subpackage Storage
  25. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  26. * @license http://framework.zend.com/license/new-bsd New BSD License
  27. */
  28. abstract class Zend_Search_Lucene_Storage_File
  29. {
  30. /**
  31. * Reads $length number of bytes at the current position in the
  32. * file and advances the file pointer.
  33. *
  34. * @param integer $length
  35. * @return string
  36. */
  37. abstract protected function _fread($length=1);
  38. /**
  39. * Sets the file position indicator and advances the file pointer.
  40. * The new position, measured in bytes from the beginning of the file,
  41. * is obtained by adding offset to the position specified by whence,
  42. * whose values are defined as follows:
  43. * SEEK_SET - Set position equal to offset bytes.
  44. * SEEK_CUR - Set position to current location plus offset.
  45. * SEEK_END - Set position to end-of-file plus offset. (To move to
  46. * a position before the end-of-file, you need to pass a negative value
  47. * in offset.)
  48. * Upon success, returns 0; otherwise, returns -1
  49. *
  50. * @param integer $offset
  51. * @param integer $whence
  52. * @return integer
  53. */
  54. abstract public function seek($offset, $whence=SEEK_SET);
  55. /**
  56. * Get file position.
  57. *
  58. * @return integer
  59. */
  60. abstract public function tell();
  61. /**
  62. * Flush output.
  63. *
  64. * Returns true on success or false on failure.
  65. *
  66. * @return boolean
  67. */
  68. abstract public function flush();
  69. /**
  70. * Writes $length number of bytes (all, if $length===null) to the end
  71. * of the file.
  72. *
  73. * @param string $data
  74. * @param integer $length
  75. */
  76. abstract protected function _fwrite($data, $length=null);
  77. /**
  78. * Lock file
  79. *
  80. * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
  81. *
  82. * @param integer $lockType
  83. * @return boolean
  84. */
  85. abstract public function lock($lockType, $nonBlockinLock = false);
  86. /**
  87. * Unlock file
  88. */
  89. abstract public function unlock();
  90. /**
  91. * Reads a byte from the current position in the file
  92. * and advances the file pointer.
  93. *
  94. * @return integer
  95. */
  96. public function readByte()
  97. {
  98. return ord($this->_fread(1));
  99. }
  100. /**
  101. * Writes a byte to the end of the file.
  102. *
  103. * @param integer $byte
  104. */
  105. public function writeByte($byte)
  106. {
  107. return $this->_fwrite(chr($byte), 1);
  108. }
  109. /**
  110. * Read num bytes from the current position in the file
  111. * and advances the file pointer.
  112. *
  113. * @param integer $num
  114. * @return string
  115. */
  116. public function readBytes($num)
  117. {
  118. return $this->_fread($num);
  119. }
  120. /**
  121. * Writes num bytes of data (all, if $num===null) to the end
  122. * of the string.
  123. *
  124. * @param string $data
  125. * @param integer $num
  126. */
  127. public function writeBytes($data, $num=null)
  128. {
  129. $this->_fwrite($data, $num);
  130. }
  131. /**
  132. * Reads an integer from the current position in the file
  133. * and advances the file pointer.
  134. *
  135. * @return integer
  136. */
  137. public function readInt()
  138. {
  139. $str = $this->_fread(4);
  140. return ord($str[0]) << 24 |
  141. ord($str[1]) << 16 |
  142. ord($str[2]) << 8 |
  143. ord($str[3]);
  144. }
  145. /**
  146. * Writes an integer to the end of file.
  147. *
  148. * @param integer $value
  149. */
  150. public function writeInt($value)
  151. {
  152. settype($value, 'integer');
  153. $this->_fwrite( chr($value>>24 & 0xFF) .
  154. chr($value>>16 & 0xFF) .
  155. chr($value>>8 & 0xFF) .
  156. chr($value & 0xFF), 4 );
  157. }
  158. /**
  159. * Returns a long integer from the current position in the file
  160. * and advances the file pointer.
  161. *
  162. * @return integer|float
  163. * @throws Zend_Search_Lucene_Exception
  164. */
  165. public function readLong()
  166. {
  167. /**
  168. * Check, that we work in 64-bit mode.
  169. * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  170. */
  171. if (PHP_INT_SIZE > 4) {
  172. $str = $this->_fread(8);
  173. return ord($str[0]) << 56 |
  174. ord($str[1]) << 48 |
  175. ord($str[2]) << 40 |
  176. ord($str[3]) << 32 |
  177. ord($str[4]) << 24 |
  178. ord($str[5]) << 16 |
  179. ord($str[6]) << 8 |
  180. ord($str[7]);
  181. } else {
  182. return $this->readLong32Bit();
  183. }
  184. }
  185. /**
  186. * Writes long integer to the end of file
  187. *
  188. * @param integer $value
  189. * @throws Zend_Search_Lucene_Exception
  190. */
  191. public function writeLong($value)
  192. {
  193. /**
  194. * Check, that we work in 64-bit mode.
  195. * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  196. */
  197. if (PHP_INT_SIZE > 4) {
  198. settype($value, 'integer');
  199. $this->_fwrite( chr($value>>56 & 0xFF) .
  200. chr($value>>48 & 0xFF) .
  201. chr($value>>40 & 0xFF) .
  202. chr($value>>32 & 0xFF) .
  203. chr($value>>24 & 0xFF) .
  204. chr($value>>16 & 0xFF) .
  205. chr($value>>8 & 0xFF) .
  206. chr($value & 0xFF), 8 );
  207. } else {
  208. $this->writeLong32Bit($value);
  209. }
  210. }
  211. /**
  212. * Returns a long integer from the current position in the file,
  213. * advances the file pointer and return it as float (for 32-bit platforms).
  214. *
  215. * @return integer|float
  216. * @throws Zend_Search_Lucene_Exception
  217. */
  218. public function readLong32Bit()
  219. {
  220. $wordHigh = $this->readInt();
  221. $wordLow = $this->readInt();
  222. if ($wordHigh & (int)0x80000000) {
  223. // It's a negative value since the highest bit is set
  224. if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
  225. return $wordLow;
  226. } else {
  227. require_once 'Zend/Search/Lucene/Exception.php';
  228. throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
  229. }
  230. }
  231. if ($wordLow < 0) {
  232. // Value is large than 0x7FFF FFFF. Represent low word as float.
  233. $wordLow &= 0x7FFFFFFF;
  234. $wordLow += (float)0x80000000;
  235. }
  236. if ($wordHigh == 0) {
  237. // Return value as integer if possible
  238. return $wordLow;
  239. }
  240. return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
  241. }
  242. /**
  243. * Writes long integer to the end of file (32-bit platforms implementation)
  244. *
  245. * @param integer|float $value
  246. * @throws Zend_Search_Lucene_Exception
  247. */
  248. public function writeLong32Bit($value)
  249. {
  250. if ($value < (int)0x80000000) {
  251. require_once 'Zend/Search/Lucene/Exception.php';
  252. throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
  253. }
  254. if ($value < 0) {
  255. $wordHigh = (int)0xFFFFFFFF;
  256. $wordLow = (int)$value;
  257. } else {
  258. $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
  259. $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
  260. if ($wordLow > 0x7FFFFFFF) {
  261. // Highest bit of low word is set. Translate it to the corresponding negative integer value
  262. $wordLow -= 0x80000000;
  263. $wordLow |= 0x80000000;
  264. }
  265. }
  266. $this->writeInt($wordHigh);
  267. $this->writeInt($wordLow);
  268. }
  269. /**
  270. * Returns a variable-length integer from the current
  271. * position in the file and advances the file pointer.
  272. *
  273. * @return integer
  274. */
  275. public function readVInt()
  276. {
  277. $nextByte = ord($this->_fread(1));
  278. $val = $nextByte & 0x7F;
  279. for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
  280. $nextByte = ord($this->_fread(1));
  281. $val |= ($nextByte & 0x7F) << $shift;
  282. }
  283. return $val;
  284. }
  285. /**
  286. * Writes a variable-length integer to the end of file.
  287. *
  288. * @param integer $value
  289. */
  290. public function writeVInt($value)
  291. {
  292. settype($value, 'integer');
  293. while ($value > 0x7F) {
  294. $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
  295. $value >>= 7;
  296. }
  297. $this->_fwrite(chr($value));
  298. }
  299. /**
  300. * Reads a string from the current position in the file
  301. * and advances the file pointer.
  302. *
  303. * @return string
  304. */
  305. public function readString()
  306. {
  307. $strlen = $this->readVInt();
  308. if ($strlen == 0) {
  309. return '';
  310. } else {
  311. /**
  312. * This implementation supports only Basic Multilingual Plane
  313. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  314. * "supplementary characters" (characters whose code points are
  315. * greater than 0xFFFF)
  316. * Java 2 represents these characters as a pair of char (16-bit)
  317. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  318. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  319. * they are encoded as usual UTF-8 characters in six bytes.
  320. * Standard UTF-8 representation uses four bytes for supplementary
  321. * characters.
  322. */
  323. $str_val = $this->_fread($strlen);
  324. for ($count = 0; $count < $strlen; $count++ ) {
  325. if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
  326. $addBytes = 1;
  327. if (ord($str_val[$count]) & 0x20 ) {
  328. $addBytes++;
  329. // Never used. Java2 doesn't encode strings in four bytes
  330. if (ord($str_val[$count]) & 0x10 ) {
  331. $addBytes++;
  332. }
  333. }
  334. $str_val .= $this->_fread($addBytes);
  335. $strlen += $addBytes;
  336. // Check for null character. Java2 encodes null character
  337. // in two bytes.
  338. if (ord($str_val[$count]) == 0xC0 &&
  339. ord($str_val[$count+1]) == 0x80 ) {
  340. $str_val[$count] = 0;
  341. $str_val = substr($str_val,0,$count+1)
  342. . substr($str_val,$count+2);
  343. }
  344. $count += $addBytes;
  345. }
  346. }
  347. return $str_val;
  348. }
  349. }
  350. /**
  351. * Writes a string to the end of file.
  352. *
  353. * @param string $str
  354. * @throws Zend_Search_Lucene_Exception
  355. */
  356. public function writeString($str)
  357. {
  358. /**
  359. * This implementation supports only Basic Multilingual Plane
  360. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  361. * "supplementary characters" (characters whose code points are
  362. * greater than 0xFFFF)
  363. * Java 2 represents these characters as a pair of char (16-bit)
  364. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  365. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  366. * they are encoded as usual UTF-8 characters in six bytes.
  367. * Standard UTF-8 representation uses four bytes for supplementary
  368. * characters.
  369. */
  370. // convert input to a string before iterating string characters
  371. settype($str, 'string');
  372. $chars = $strlen = strlen($str);
  373. $containNullChars = false;
  374. for ($count = 0; $count < $strlen; $count++ ) {
  375. /**
  376. * String is already in Java 2 representation.
  377. * We should only calculate actual string length and replace
  378. * \x00 by \xC0\x80
  379. */
  380. if ((ord($str[$count]) & 0xC0) == 0xC0) {
  381. $addBytes = 1;
  382. if (ord($str[$count]) & 0x20 ) {
  383. $addBytes++;
  384. // Never used. Java2 doesn't encode strings in four bytes
  385. // and we dont't support non-BMP characters
  386. if (ord($str[$count]) & 0x10 ) {
  387. $addBytes++;
  388. }
  389. }
  390. $chars -= $addBytes;
  391. if (ord($str[$count]) == 0 ) {
  392. $containNullChars = true;
  393. }
  394. $count += $addBytes;
  395. }
  396. }
  397. if ($chars < 0) {
  398. require_once 'Zend/Search/Lucene/Exception.php';
  399. throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
  400. }
  401. $this->writeVInt($chars);
  402. if ($containNullChars) {
  403. $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
  404. } else {
  405. $this->_fwrite($str);
  406. }
  407. }
  408. /**
  409. * Reads binary data from the current position in the file
  410. * and advances the file pointer.
  411. *
  412. * @return string
  413. */
  414. public function readBinary()
  415. {
  416. return $this->_fread($this->readVInt());
  417. }
  418. }