2
0

SegmentToDelta.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @package Zend_Pdf
  16. * @subpackage Fonts
  17. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. */
  20. /** Zend_Pdf_Cmap */
  21. require_once 'Zend/Pdf/Cmap.php';
  22. /**
  23. * Implements the "segment mapping to delta values" character map (type 4).
  24. *
  25. * This is the Microsoft standard mapping table type for OpenType fonts. It
  26. * provides the ability to cover multiple contiguous ranges of the Unicode
  27. * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
  28. *
  29. * @package Zend_Pdf
  30. * @subpackage Fonts
  31. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
  35. {
  36. /**** Instance Variables ****/
  37. /**
  38. * The number of segments in the table.
  39. * @var integer
  40. */
  41. protected $_segmentCount = 0;
  42. /**
  43. * The size of the binary search range for segments.
  44. * @var integer
  45. */
  46. protected $_searchRange = 0;
  47. /**
  48. * The number of binary search steps required to cover the entire search
  49. * range.
  50. * @var integer
  51. */
  52. protected $_searchIterations = 0;
  53. /**
  54. * Array of ending character codes for each segment.
  55. * @var array
  56. */
  57. protected $_segmentTableEndCodes = array();
  58. /**
  59. * The ending character code for the segment at the end of the low search
  60. * range.
  61. * @var integer
  62. */
  63. protected $_searchRangeEndCode = 0;
  64. /**
  65. * Array of starting character codes for each segment.
  66. * @var array
  67. */
  68. protected $_segmentTableStartCodes = array();
  69. /**
  70. * Array of character code to glyph delta values for each segment.
  71. * @var array
  72. */
  73. protected $_segmentTableIdDeltas = array();
  74. /**
  75. * Array of offsets into the glyph index array for each segment.
  76. * @var array
  77. */
  78. protected $_segmentTableIdRangeOffsets = array();
  79. /**
  80. * Glyph index array. Stores glyph numbers, used with range offset.
  81. * @var array
  82. */
  83. protected $_glyphIndexArray = array();
  84. /**** Public Interface ****/
  85. /* Concrete Class Implementation */
  86. /**
  87. * Returns an array of glyph numbers corresponding to the Unicode characters.
  88. *
  89. * If a particular character doesn't exist in this font, the special 'missing
  90. * character glyph' will be substituted.
  91. *
  92. * See also {@link glyphNumberForCharacter()}.
  93. *
  94. * @param array $characterCodes Array of Unicode character codes (code points).
  95. * @return array Array of glyph numbers.
  96. */
  97. public function glyphNumbersForCharacters($characterCodes)
  98. {
  99. $glyphNumbers = array();
  100. foreach ($characterCodes as $key => $characterCode) {
  101. /* These tables only cover the 16-bit character range.
  102. */
  103. if ($characterCode > 0xffff) {
  104. $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  105. continue;
  106. }
  107. /* Determine where to start the binary search. The segments are
  108. * ordered from lowest-to-highest. We are looking for the first
  109. * segment whose end code is greater than or equal to our character
  110. * code.
  111. *
  112. * If the end code at the top of the search range is larger, then
  113. * our target is probably below it.
  114. *
  115. * If it is smaller, our target is probably above it, so move the
  116. * search range to the end of the segment list.
  117. */
  118. if ($this->_searchRangeEndCode >= $characterCode) {
  119. $searchIndex = $this->_searchRange;
  120. } else {
  121. $searchIndex = $this->_segmentCount;
  122. }
  123. /* Now do a binary search to find the first segment whose end code
  124. * is greater or equal to our character code. No matter the number
  125. * of segments (there may be hundreds in a large font), we will only
  126. * need to perform $this->_searchIterations.
  127. */
  128. for ($i = 1; $i <= $this->_searchIterations; $i++) {
  129. if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
  130. $subtableIndex = $searchIndex;
  131. $searchIndex -= $this->_searchRange >> $i;
  132. } else {
  133. $searchIndex += $this->_searchRange >> $i;
  134. }
  135. }
  136. /* If the segment's start code is greater than our character code,
  137. * that character is not represented in this font. Move on.
  138. */
  139. if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
  140. $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  141. continue;
  142. }
  143. if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
  144. /* This segment uses a simple mapping from character code to
  145. * glyph number.
  146. */
  147. $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
  148. } else {
  149. /* This segment relies on the glyph index array to determine the
  150. * glyph number. The calculation below determines the correct
  151. * index into that array. It's a little odd because the range
  152. * offset in the font file is designed to quickly provide an
  153. * address of the index in the raw binary data instead of the
  154. * index itself. Since we've parsed the data into arrays, we
  155. * must process it a bit differently.
  156. */
  157. $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
  158. $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
  159. $subtableIndex - 1);
  160. $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];
  161. }
  162. }
  163. return $glyphNumbers;
  164. }
  165. /**
  166. * Returns the glyph number corresponding to the Unicode character.
  167. *
  168. * If a particular character doesn't exist in this font, the special 'missing
  169. * character glyph' will be substituted.
  170. *
  171. * See also {@link glyphNumbersForCharacters()} which is optimized for bulk
  172. * operations.
  173. *
  174. * @param integer $characterCode Unicode character code (code point).
  175. * @return integer Glyph number.
  176. */
  177. public function glyphNumberForCharacter($characterCode)
  178. {
  179. /* This code is pretty much a copy of glyphNumbersForCharacters().
  180. * See that method for inline documentation.
  181. */
  182. if ($characterCode > 0xffff) {
  183. return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  184. }
  185. if ($this->_searchRangeEndCode >= $characterCode) {
  186. $searchIndex = $this->_searchRange;
  187. } else {
  188. $searchIndex = $this->_segmentCount;
  189. }
  190. for ($i = 1; $i <= $this->_searchIterations; $i++) {
  191. if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
  192. $subtableIndex = $searchIndex;
  193. $searchIndex -= $this->_searchRange >> $i;
  194. } else {
  195. $searchIndex += $this->_searchRange >> $i;
  196. }
  197. }
  198. if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
  199. return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  200. }
  201. if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
  202. $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
  203. } else {
  204. $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
  205. $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
  206. $subtableIndex - 1);
  207. $glyphNumber = $this->_glyphIndexArray[$glyphIndex];
  208. }
  209. return $glyphNumber;
  210. }
  211. /**
  212. * Returns an array containing the Unicode characters that have entries in
  213. * this character map.
  214. *
  215. * @return array Unicode character codes.
  216. */
  217. public function getCoveredCharacters()
  218. {
  219. $characterCodes = array();
  220. for ($i = 1; $i <= $this->_segmentCount; $i++) {
  221. for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
  222. $characterCodes[] = $code;
  223. }
  224. }
  225. return $characterCodes;
  226. }
  227. /**
  228. * Returns an array containing the glyphs numbers that have entries in this character map.
  229. * Keys are Unicode character codes (integers)
  230. *
  231. * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
  232. * call, but this method do it in more effective way (prepare complete list instead of searching
  233. * glyph for each character code).
  234. *
  235. * @internal
  236. * @return array Array representing <Unicode character code> => <glyph number> pairs.
  237. */
  238. public function getCoveredCharactersGlyphs()
  239. {
  240. $glyphNumbers = array();
  241. for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
  242. if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
  243. $delta = $this->_segmentTableIdDeltas[$segmentNum];
  244. for ($code = $this->_segmentTableStartCodes[$segmentNum];
  245. $code <= $this->_segmentTableEndCodes[$segmentNum];
  246. $code++) {
  247. $glyphNumbers[$code] = ($code + $delta) % 65536;
  248. }
  249. } else {
  250. $code = $this->_segmentTableStartCodes[$segmentNum];
  251. $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;
  252. while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
  253. $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];
  254. $code++;
  255. $glyphIndex++;
  256. }
  257. }
  258. }
  259. return $glyphNumbers;
  260. }
  261. /* Object Lifecycle */
  262. /**
  263. * Object constructor
  264. *
  265. * Parses the raw binary table data. Throws an exception if the table is
  266. * malformed.
  267. *
  268. * @param string $cmapData Raw binary cmap table data.
  269. * @throws Zend_Pdf_Exception
  270. */
  271. public function __construct($cmapData)
  272. {
  273. /* Sanity check: The table should be at least 23 bytes in size.
  274. */
  275. $actualLength = strlen($cmapData);
  276. if ($actualLength < 23) {
  277. throw new Zend_Pdf_Exception('Insufficient table data',
  278. Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
  279. }
  280. /* Sanity check: Make sure this is right data for this table type.
  281. */
  282. $type = $this->_extractUInt2($cmapData, 0);
  283. if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
  284. throw new Zend_Pdf_Exception('Wrong cmap table type',
  285. Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
  286. }
  287. $length = $this->_extractUInt2($cmapData, 2);
  288. if ($length != $actualLength) {
  289. throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
  290. Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
  291. }
  292. /* Mapping tables should be language-independent. The font may not work
  293. * as expected if they are not. Unfortunately, many font files in the
  294. * wild incorrectly record a language ID in this field, so we can't
  295. * call this a failure.
  296. */
  297. $language = $this->_extractUInt2($cmapData, 4);
  298. if ($language != 0) {
  299. // Record a warning here somehow?
  300. }
  301. /* These two values are stored premultiplied by two which is convienent
  302. * when using the binary data directly, but we're parsing it out to
  303. * native PHP data types, so divide by two.
  304. */
  305. $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
  306. $this->_searchRange = $this->_extractUInt2($cmapData, 8) >> 1;
  307. $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;
  308. $offset = 14;
  309. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  310. $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
  311. }
  312. $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];
  313. $offset += 2; // reserved bytes
  314. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  315. $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
  316. }
  317. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  318. $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset); // signed
  319. }
  320. /* The range offset helps determine the index into the glyph index array.
  321. * Like the segment count and search range above, it's stored as a byte
  322. * multiple in the font, so divide by two as we extract the values.
  323. */
  324. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  325. $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
  326. }
  327. /* The size of the glyph index array varies by font and depends on the
  328. * extent of the usage of range offsets versus deltas. Some fonts may
  329. * not have any entries in this array.
  330. */
  331. for (; $offset < $length; $offset += 2) {
  332. $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
  333. }
  334. /* Sanity check: After reading all of the data, we should be at the end
  335. * of the table.
  336. */
  337. if ($offset != $length) {
  338. throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
  339. Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
  340. }
  341. }
  342. }