github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/model/fonts/ttfparser.go (about)

     1  /*
     2   * Copyright (c) 2013 Kurt Jung (Gmail: kurt.w.jung)
     3   *
     4   * Permission to use, copy, modify, and distribute this software for any
     5   * purpose with or without fee is hereby granted, provided that the above
     6   * copyright notice and this permission notice appear in all copies.
     7   *
     8   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     9   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    10   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    11   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    12   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    13   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    14   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    15   */
    16  
    17  package fonts
    18  
    19  // Utility to parse TTF font files
    20  // Version:    1.0
    21  // Date:       2011-06-18
    22  // Author:     Olivier PLATHEY
    23  // Port to Go: Kurt Jung, 2013-07-15
    24  
    25  import (
    26  	"encoding/binary"
    27  	"fmt"
    28  	"os"
    29  	"regexp"
    30  	"strings"
    31  )
    32  
    33  // TtfType contains metrics of a TrueType font.
    34  type TtfType struct {
    35  	Embeddable             bool
    36  	UnitsPerEm             uint16
    37  	PostScriptName         string
    38  	Bold                   bool
    39  	ItalicAngle            int16
    40  	IsFixedPitch           bool
    41  	TypoAscender           int16
    42  	TypoDescender          int16
    43  	UnderlinePosition      int16
    44  	UnderlineThickness     int16
    45  	Xmin, Ymin, Xmax, Ymax int16
    46  	CapHeight              int16
    47  	Widths                 []uint16
    48  	Chars                  map[uint16]uint16
    49  }
    50  
    51  type ttfParser struct {
    52  	rec              TtfType
    53  	f                *os.File
    54  	tables           map[string]uint32
    55  	numberOfHMetrics uint16
    56  	numGlyphs        uint16
    57  }
    58  
    59  // TtfParse extracts various metrics from a TrueType font file.
    60  func TtfParse(fileStr string) (TtfRec TtfType, err error) {
    61  	var t ttfParser
    62  	t.f, err = os.Open(fileStr)
    63  	if err != nil {
    64  		return
    65  	}
    66  	version, err := t.ReadStr(4)
    67  	if err != nil {
    68  		return
    69  	}
    70  	if version == "OTTO" {
    71  		err = fmt.Errorf("fonts based on PostScript outlines are not supported")
    72  		return
    73  	}
    74  	if version != "\x00\x01\x00\x00" {
    75  		err = fmt.Errorf("unrecognized file format")
    76  		return
    77  	}
    78  	numTables := int(t.ReadUShort())
    79  	t.Skip(3 * 2) // searchRange, entrySelector, rangeShift
    80  	t.tables = make(map[string]uint32)
    81  	var tag string
    82  	for j := 0; j < numTables; j++ {
    83  		tag, err = t.ReadStr(4)
    84  		if err != nil {
    85  			return
    86  		}
    87  		t.Skip(4) // checkSum
    88  		offset := t.ReadULong()
    89  		t.Skip(4) // length
    90  		t.tables[tag] = offset
    91  	}
    92  	err = t.ParseComponents()
    93  	if err != nil {
    94  		return
    95  	}
    96  	t.f.Close()
    97  	TtfRec = t.rec
    98  	return
    99  }
   100  
   101  func (t *ttfParser) ParseComponents() (err error) {
   102  	err = t.ParseHead()
   103  	if err == nil {
   104  		err = t.ParseHhea()
   105  		if err == nil {
   106  			err = t.ParseMaxp()
   107  			if err == nil {
   108  				err = t.ParseHmtx()
   109  				if err == nil {
   110  					err = t.ParseCmap()
   111  					if err == nil {
   112  						err = t.ParseName()
   113  						if err == nil {
   114  							err = t.ParseOS2()
   115  							if err == nil {
   116  								err = t.ParsePost()
   117  							}
   118  						}
   119  					}
   120  				}
   121  			}
   122  		}
   123  	}
   124  	return
   125  }
   126  
   127  func (t *ttfParser) ParseHead() (err error) {
   128  	err = t.Seek("head")
   129  	t.Skip(3 * 4) // version, fontRevision, checkSumAdjustment
   130  	magicNumber := t.ReadULong()
   131  	if magicNumber != 0x5F0F3CF5 {
   132  		err = fmt.Errorf("incorrect magic number")
   133  		return
   134  	}
   135  	t.Skip(2) // flags
   136  	t.rec.UnitsPerEm = t.ReadUShort()
   137  	t.Skip(2 * 8) // created, modified
   138  	t.rec.Xmin = t.ReadShort()
   139  	t.rec.Ymin = t.ReadShort()
   140  	t.rec.Xmax = t.ReadShort()
   141  	t.rec.Ymax = t.ReadShort()
   142  	return
   143  }
   144  
   145  func (t *ttfParser) ParseHhea() (err error) {
   146  	err = t.Seek("hhea")
   147  	if err == nil {
   148  		t.Skip(4 + 15*2)
   149  		t.numberOfHMetrics = t.ReadUShort()
   150  	}
   151  	return
   152  }
   153  
   154  func (t *ttfParser) ParseMaxp() (err error) {
   155  	err = t.Seek("maxp")
   156  	if err == nil {
   157  		t.Skip(4)
   158  		t.numGlyphs = t.ReadUShort()
   159  	}
   160  	return
   161  }
   162  
   163  func (t *ttfParser) ParseHmtx() (err error) {
   164  	err = t.Seek("hmtx")
   165  	if err == nil {
   166  		t.rec.Widths = make([]uint16, 0, 8)
   167  		for j := uint16(0); j < t.numberOfHMetrics; j++ {
   168  			t.rec.Widths = append(t.rec.Widths, t.ReadUShort())
   169  			t.Skip(2) // lsb
   170  		}
   171  		if t.numberOfHMetrics < t.numGlyphs {
   172  			lastWidth := t.rec.Widths[t.numberOfHMetrics-1]
   173  			for j := t.numberOfHMetrics; j < t.numGlyphs; j++ {
   174  				t.rec.Widths = append(t.rec.Widths, lastWidth)
   175  			}
   176  		}
   177  	}
   178  	return
   179  }
   180  
   181  func (t *ttfParser) ParseCmap() (err error) {
   182  	var offset int64
   183  	if err = t.Seek("cmap"); err != nil {
   184  		return
   185  	}
   186  	t.Skip(2) // version
   187  	numTables := int(t.ReadUShort())
   188  	offset31 := int64(0)
   189  	for j := 0; j < numTables; j++ {
   190  		platformID := t.ReadUShort()
   191  		encodingID := t.ReadUShort()
   192  		offset = int64(t.ReadULong())
   193  		if platformID == 3 && encodingID == 1 {
   194  			offset31 = offset
   195  		}
   196  	}
   197  	if offset31 == 0 {
   198  		err = fmt.Errorf("no Unicode encoding found")
   199  		return
   200  	}
   201  	startCount := make([]uint16, 0, 8)
   202  	endCount := make([]uint16, 0, 8)
   203  	idDelta := make([]int16, 0, 8)
   204  	idRangeOffset := make([]uint16, 0, 8)
   205  	t.rec.Chars = make(map[uint16]uint16)
   206  	t.f.Seek(int64(t.tables["cmap"])+offset31, os.SEEK_SET)
   207  	format := t.ReadUShort()
   208  	if format != 4 {
   209  		err = fmt.Errorf("unexpected subtable format: %d", format)
   210  		return
   211  	}
   212  	t.Skip(2 * 2) // length, language
   213  	segCount := int(t.ReadUShort() / 2)
   214  	t.Skip(3 * 2) // searchRange, entrySelector, rangeShift
   215  	for j := 0; j < segCount; j++ {
   216  		endCount = append(endCount, t.ReadUShort())
   217  	}
   218  	t.Skip(2) // reservedPad
   219  	for j := 0; j < segCount; j++ {
   220  		startCount = append(startCount, t.ReadUShort())
   221  	}
   222  	for j := 0; j < segCount; j++ {
   223  		idDelta = append(idDelta, t.ReadShort())
   224  	}
   225  	offset, _ = t.f.Seek(int64(0), os.SEEK_CUR)
   226  	for j := 0; j < segCount; j++ {
   227  		idRangeOffset = append(idRangeOffset, t.ReadUShort())
   228  	}
   229  	for j := 0; j < segCount; j++ {
   230  		c1 := startCount[j]
   231  		c2 := endCount[j]
   232  		d := idDelta[j]
   233  		ro := idRangeOffset[j]
   234  		if ro > 0 {
   235  			t.f.Seek(offset+2*int64(j)+int64(ro), os.SEEK_SET)
   236  		}
   237  		for c := c1; c <= c2; c++ {
   238  			if c == 0xFFFF {
   239  				break
   240  			}
   241  			var gid int32
   242  			if ro > 0 {
   243  				gid = int32(t.ReadUShort())
   244  				if gid > 0 {
   245  					gid += int32(d)
   246  				}
   247  			} else {
   248  				gid = int32(c) + int32(d)
   249  			}
   250  			if gid >= 65536 {
   251  				gid -= 65536
   252  			}
   253  			if gid > 0 {
   254  				t.rec.Chars[c] = uint16(gid)
   255  			}
   256  		}
   257  	}
   258  	return
   259  }
   260  
   261  func (t *ttfParser) ParseName() (err error) {
   262  	err = t.Seek("name")
   263  	if err == nil {
   264  		tableOffset, _ := t.f.Seek(0, os.SEEK_CUR)
   265  		t.rec.PostScriptName = ""
   266  		t.Skip(2) // format
   267  		count := t.ReadUShort()
   268  		stringOffset := t.ReadUShort()
   269  		for j := uint16(0); j < count && t.rec.PostScriptName == ""; j++ {
   270  			t.Skip(3 * 2) // platformID, encodingID, languageID
   271  			nameID := t.ReadUShort()
   272  			length := t.ReadUShort()
   273  			offset := t.ReadUShort()
   274  			if nameID == 6 {
   275  				// PostScript name
   276  				t.f.Seek(int64(tableOffset)+int64(stringOffset)+int64(offset), os.SEEK_SET)
   277  				var s string
   278  				s, err = t.ReadStr(int(length))
   279  				if err != nil {
   280  					return
   281  				}
   282  				s = strings.Replace(s, "\x00", "", -1)
   283  				var re *regexp.Regexp
   284  				if re, err = regexp.Compile("[(){}<> /%[\\]]"); err != nil {
   285  					return
   286  				}
   287  				t.rec.PostScriptName = re.ReplaceAllString(s, "")
   288  			}
   289  		}
   290  		if t.rec.PostScriptName == "" {
   291  			err = fmt.Errorf("the name PostScript was not found")
   292  		}
   293  	}
   294  	return
   295  }
   296  
   297  func (t *ttfParser) ParseOS2() (err error) {
   298  	err = t.Seek("OS/2")
   299  	if err == nil {
   300  		version := t.ReadUShort()
   301  		t.Skip(3 * 2) // xAvgCharWidth, usWeightClass, usWidthClass
   302  		fsType := t.ReadUShort()
   303  		t.rec.Embeddable = (fsType != 2) && (fsType&0x200) == 0
   304  		t.Skip(11*2 + 10 + 4*4 + 4)
   305  		fsSelection := t.ReadUShort()
   306  		t.rec.Bold = (fsSelection & 32) != 0
   307  		t.Skip(2 * 2) // usFirstCharIndex, usLastCharIndex
   308  		t.rec.TypoAscender = t.ReadShort()
   309  		t.rec.TypoDescender = t.ReadShort()
   310  		if version >= 2 {
   311  			t.Skip(3*2 + 2*4 + 2)
   312  			t.rec.CapHeight = t.ReadShort()
   313  		} else {
   314  			t.rec.CapHeight = 0
   315  		}
   316  	}
   317  	return
   318  }
   319  
   320  func (t *ttfParser) ParsePost() (err error) {
   321  	err = t.Seek("post")
   322  	if err == nil {
   323  		t.Skip(4) // version
   324  		t.rec.ItalicAngle = t.ReadShort()
   325  		t.Skip(2) // Skip decimal part
   326  		t.rec.UnderlinePosition = t.ReadShort()
   327  		t.rec.UnderlineThickness = t.ReadShort()
   328  		t.rec.IsFixedPitch = t.ReadULong() != 0
   329  	}
   330  	return
   331  }
   332  
   333  func (t *ttfParser) Seek(tag string) (err error) {
   334  	ofs, ok := t.tables[tag]
   335  	if ok {
   336  		t.f.Seek(int64(ofs), os.SEEK_SET)
   337  	} else {
   338  		err = fmt.Errorf("table not found: %s", tag)
   339  	}
   340  	return
   341  }
   342  
   343  func (t *ttfParser) Skip(n int) {
   344  	t.f.Seek(int64(n), os.SEEK_CUR)
   345  }
   346  
   347  func (t *ttfParser) ReadStr(length int) (str string, err error) {
   348  	var n int
   349  	buf := make([]byte, length)
   350  	n, err = t.f.Read(buf)
   351  	if err == nil {
   352  		if n == length {
   353  			str = string(buf)
   354  		} else {
   355  			err = fmt.Errorf("unable to read %d bytes", length)
   356  		}
   357  	}
   358  	return
   359  }
   360  
   361  func (t *ttfParser) ReadUShort() (val uint16) {
   362  	binary.Read(t.f, binary.BigEndian, &val)
   363  	return
   364  }
   365  
   366  func (t *ttfParser) ReadShort() (val int16) {
   367  	binary.Read(t.f, binary.BigEndian, &val)
   368  	return
   369  }
   370  
   371  func (t *ttfParser) ReadULong() (val uint32) {
   372  	binary.Read(t.f, binary.BigEndian, &val)
   373  	return
   374  }