github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/model/fonts/ttfparser.go (about) 1 /* 2 * Copyright (c) 2013 Kurt Jung (Gmail: kurt.w.jung) 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 package fonts 18 19 // Utility to parse TTF font files 20 // Version: 1.0 21 // Date: 2011-06-18 22 // Author: Olivier PLATHEY 23 // Port to Go: Kurt Jung, 2013-07-15 24 25 import ( 26 "encoding/binary" 27 "fmt" 28 "os" 29 "regexp" 30 "strings" 31 ) 32 33 // TtfType contains metrics of a TrueType font. 34 type TtfType struct { 35 Embeddable bool 36 UnitsPerEm uint16 37 PostScriptName string 38 Bold bool 39 ItalicAngle int16 40 IsFixedPitch bool 41 TypoAscender int16 42 TypoDescender int16 43 UnderlinePosition int16 44 UnderlineThickness int16 45 Xmin, Ymin, Xmax, Ymax int16 46 CapHeight int16 47 Widths []uint16 48 Chars map[uint16]uint16 49 } 50 51 type ttfParser struct { 52 rec TtfType 53 f *os.File 54 tables map[string]uint32 55 numberOfHMetrics uint16 56 numGlyphs uint16 57 } 58 59 // TtfParse extracts various metrics from a TrueType font file. 60 func TtfParse(fileStr string) (TtfRec TtfType, err error) { 61 var t ttfParser 62 t.f, err = os.Open(fileStr) 63 if err != nil { 64 return 65 } 66 version, err := t.ReadStr(4) 67 if err != nil { 68 return 69 } 70 if version == "OTTO" { 71 err = fmt.Errorf("fonts based on PostScript outlines are not supported") 72 return 73 } 74 if version != "\x00\x01\x00\x00" { 75 err = fmt.Errorf("unrecognized file format") 76 return 77 } 78 numTables := int(t.ReadUShort()) 79 t.Skip(3 * 2) // searchRange, entrySelector, rangeShift 80 t.tables = make(map[string]uint32) 81 var tag string 82 for j := 0; j < numTables; j++ { 83 tag, err = t.ReadStr(4) 84 if err != nil { 85 return 86 } 87 t.Skip(4) // checkSum 88 offset := t.ReadULong() 89 t.Skip(4) // length 90 t.tables[tag] = offset 91 } 92 err = t.ParseComponents() 93 if err != nil { 94 return 95 } 96 t.f.Close() 97 TtfRec = t.rec 98 return 99 } 100 101 func (t *ttfParser) ParseComponents() (err error) { 102 err = t.ParseHead() 103 if err == nil { 104 err = t.ParseHhea() 105 if err == nil { 106 err = t.ParseMaxp() 107 if err == nil { 108 err = t.ParseHmtx() 109 if err == nil { 110 err = t.ParseCmap() 111 if err == nil { 112 err = t.ParseName() 113 if err == nil { 114 err = t.ParseOS2() 115 if err == nil { 116 err = t.ParsePost() 117 } 118 } 119 } 120 } 121 } 122 } 123 } 124 return 125 } 126 127 func (t *ttfParser) ParseHead() (err error) { 128 err = t.Seek("head") 129 t.Skip(3 * 4) // version, fontRevision, checkSumAdjustment 130 magicNumber := t.ReadULong() 131 if magicNumber != 0x5F0F3CF5 { 132 err = fmt.Errorf("incorrect magic number") 133 return 134 } 135 t.Skip(2) // flags 136 t.rec.UnitsPerEm = t.ReadUShort() 137 t.Skip(2 * 8) // created, modified 138 t.rec.Xmin = t.ReadShort() 139 t.rec.Ymin = t.ReadShort() 140 t.rec.Xmax = t.ReadShort() 141 t.rec.Ymax = t.ReadShort() 142 return 143 } 144 145 func (t *ttfParser) ParseHhea() (err error) { 146 err = t.Seek("hhea") 147 if err == nil { 148 t.Skip(4 + 15*2) 149 t.numberOfHMetrics = t.ReadUShort() 150 } 151 return 152 } 153 154 func (t *ttfParser) ParseMaxp() (err error) { 155 err = t.Seek("maxp") 156 if err == nil { 157 t.Skip(4) 158 t.numGlyphs = t.ReadUShort() 159 } 160 return 161 } 162 163 func (t *ttfParser) ParseHmtx() (err error) { 164 err = t.Seek("hmtx") 165 if err == nil { 166 t.rec.Widths = make([]uint16, 0, 8) 167 for j := uint16(0); j < t.numberOfHMetrics; j++ { 168 t.rec.Widths = append(t.rec.Widths, t.ReadUShort()) 169 t.Skip(2) // lsb 170 } 171 if t.numberOfHMetrics < t.numGlyphs { 172 lastWidth := t.rec.Widths[t.numberOfHMetrics-1] 173 for j := t.numberOfHMetrics; j < t.numGlyphs; j++ { 174 t.rec.Widths = append(t.rec.Widths, lastWidth) 175 } 176 } 177 } 178 return 179 } 180 181 func (t *ttfParser) ParseCmap() (err error) { 182 var offset int64 183 if err = t.Seek("cmap"); err != nil { 184 return 185 } 186 t.Skip(2) // version 187 numTables := int(t.ReadUShort()) 188 offset31 := int64(0) 189 for j := 0; j < numTables; j++ { 190 platformID := t.ReadUShort() 191 encodingID := t.ReadUShort() 192 offset = int64(t.ReadULong()) 193 if platformID == 3 && encodingID == 1 { 194 offset31 = offset 195 } 196 } 197 if offset31 == 0 { 198 err = fmt.Errorf("no Unicode encoding found") 199 return 200 } 201 startCount := make([]uint16, 0, 8) 202 endCount := make([]uint16, 0, 8) 203 idDelta := make([]int16, 0, 8) 204 idRangeOffset := make([]uint16, 0, 8) 205 t.rec.Chars = make(map[uint16]uint16) 206 t.f.Seek(int64(t.tables["cmap"])+offset31, os.SEEK_SET) 207 format := t.ReadUShort() 208 if format != 4 { 209 err = fmt.Errorf("unexpected subtable format: %d", format) 210 return 211 } 212 t.Skip(2 * 2) // length, language 213 segCount := int(t.ReadUShort() / 2) 214 t.Skip(3 * 2) // searchRange, entrySelector, rangeShift 215 for j := 0; j < segCount; j++ { 216 endCount = append(endCount, t.ReadUShort()) 217 } 218 t.Skip(2) // reservedPad 219 for j := 0; j < segCount; j++ { 220 startCount = append(startCount, t.ReadUShort()) 221 } 222 for j := 0; j < segCount; j++ { 223 idDelta = append(idDelta, t.ReadShort()) 224 } 225 offset, _ = t.f.Seek(int64(0), os.SEEK_CUR) 226 for j := 0; j < segCount; j++ { 227 idRangeOffset = append(idRangeOffset, t.ReadUShort()) 228 } 229 for j := 0; j < segCount; j++ { 230 c1 := startCount[j] 231 c2 := endCount[j] 232 d := idDelta[j] 233 ro := idRangeOffset[j] 234 if ro > 0 { 235 t.f.Seek(offset+2*int64(j)+int64(ro), os.SEEK_SET) 236 } 237 for c := c1; c <= c2; c++ { 238 if c == 0xFFFF { 239 break 240 } 241 var gid int32 242 if ro > 0 { 243 gid = int32(t.ReadUShort()) 244 if gid > 0 { 245 gid += int32(d) 246 } 247 } else { 248 gid = int32(c) + int32(d) 249 } 250 if gid >= 65536 { 251 gid -= 65536 252 } 253 if gid > 0 { 254 t.rec.Chars[c] = uint16(gid) 255 } 256 } 257 } 258 return 259 } 260 261 func (t *ttfParser) ParseName() (err error) { 262 err = t.Seek("name") 263 if err == nil { 264 tableOffset, _ := t.f.Seek(0, os.SEEK_CUR) 265 t.rec.PostScriptName = "" 266 t.Skip(2) // format 267 count := t.ReadUShort() 268 stringOffset := t.ReadUShort() 269 for j := uint16(0); j < count && t.rec.PostScriptName == ""; j++ { 270 t.Skip(3 * 2) // platformID, encodingID, languageID 271 nameID := t.ReadUShort() 272 length := t.ReadUShort() 273 offset := t.ReadUShort() 274 if nameID == 6 { 275 // PostScript name 276 t.f.Seek(int64(tableOffset)+int64(stringOffset)+int64(offset), os.SEEK_SET) 277 var s string 278 s, err = t.ReadStr(int(length)) 279 if err != nil { 280 return 281 } 282 s = strings.Replace(s, "\x00", "", -1) 283 var re *regexp.Regexp 284 if re, err = regexp.Compile("[(){}<> /%[\\]]"); err != nil { 285 return 286 } 287 t.rec.PostScriptName = re.ReplaceAllString(s, "") 288 } 289 } 290 if t.rec.PostScriptName == "" { 291 err = fmt.Errorf("the name PostScript was not found") 292 } 293 } 294 return 295 } 296 297 func (t *ttfParser) ParseOS2() (err error) { 298 err = t.Seek("OS/2") 299 if err == nil { 300 version := t.ReadUShort() 301 t.Skip(3 * 2) // xAvgCharWidth, usWeightClass, usWidthClass 302 fsType := t.ReadUShort() 303 t.rec.Embeddable = (fsType != 2) && (fsType&0x200) == 0 304 t.Skip(11*2 + 10 + 4*4 + 4) 305 fsSelection := t.ReadUShort() 306 t.rec.Bold = (fsSelection & 32) != 0 307 t.Skip(2 * 2) // usFirstCharIndex, usLastCharIndex 308 t.rec.TypoAscender = t.ReadShort() 309 t.rec.TypoDescender = t.ReadShort() 310 if version >= 2 { 311 t.Skip(3*2 + 2*4 + 2) 312 t.rec.CapHeight = t.ReadShort() 313 } else { 314 t.rec.CapHeight = 0 315 } 316 } 317 return 318 } 319 320 func (t *ttfParser) ParsePost() (err error) { 321 err = t.Seek("post") 322 if err == nil { 323 t.Skip(4) // version 324 t.rec.ItalicAngle = t.ReadShort() 325 t.Skip(2) // Skip decimal part 326 t.rec.UnderlinePosition = t.ReadShort() 327 t.rec.UnderlineThickness = t.ReadShort() 328 t.rec.IsFixedPitch = t.ReadULong() != 0 329 } 330 return 331 } 332 333 func (t *ttfParser) Seek(tag string) (err error) { 334 ofs, ok := t.tables[tag] 335 if ok { 336 t.f.Seek(int64(ofs), os.SEEK_SET) 337 } else { 338 err = fmt.Errorf("table not found: %s", tag) 339 } 340 return 341 } 342 343 func (t *ttfParser) Skip(n int) { 344 t.f.Seek(int64(n), os.SEEK_CUR) 345 } 346 347 func (t *ttfParser) ReadStr(length int) (str string, err error) { 348 var n int 349 buf := make([]byte, length) 350 n, err = t.f.Read(buf) 351 if err == nil { 352 if n == length { 353 str = string(buf) 354 } else { 355 err = fmt.Errorf("unable to read %d bytes", length) 356 } 357 } 358 return 359 } 360 361 func (t *ttfParser) ReadUShort() (val uint16) { 362 binary.Read(t.f, binary.BigEndian, &val) 363 return 364 } 365 366 func (t *ttfParser) ReadShort() (val int16) { 367 binary.Read(t.f, binary.BigEndian, &val) 368 return 369 } 370 371 func (t *ttfParser) ReadULong() (val uint32) { 372 binary.Read(t.f, binary.BigEndian, &val) 373 return 374 }