github.com/andybalholm/giopdf@v0.0.0-20220317170119-aad9a095ad48/cff/cff.go (about)

     1  // Package cff provides a parser for the CFF font format
     2  // defined at https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5176.CFF.pdf.
     3  package cff
     4  
     5  import (
     6  	"errors"
     7  	"io"
     8  	"io/ioutil"
     9  	"strings"
    10  
    11  	"github.com/benoitkugler/textlayout/fonts"
    12  	"github.com/benoitkugler/textlayout/fonts/glyphsnames"
    13  	"github.com/benoitkugler/textlayout/fonts/simpleencodings"
    14  )
    15  
    16  type loader struct{}
    17  
    18  // Load reads standalone .cff font files and may
    19  // return multiple fonts.
    20  func Load(file fonts.Resource) ([]Font, error) {
    21  	return parse(file)
    22  }
    23  
    24  // Font represents a parsed CFF font.
    25  type Font struct {
    26  	userStrings userStrings
    27  	fdSelect    fdSelect // only valid for CIDFonts
    28  	charset     []uint16 // indexed by glyph ID
    29  	Encoding    *simpleencodings.Encoding
    30  	FontMatrix  []float32
    31  
    32  	cmap fonts.CmapSimple // see synthetizeCmap
    33  
    34  	cidFontName string
    35  	charstrings [][]byte // indexed by glyph ID
    36  	fontName    []byte   // name from the Name INDEX
    37  	globalSubrs [][]byte
    38  	// array of length 1 for non CIDFonts
    39  	// For CIDFonts, it can be safely indexed by `fdSelect` output
    40  	localSubrs [][][]byte
    41  	priv       []privateDict
    42  	fonts.PSInfo
    43  }
    44  
    45  // Parse parse a .cff font file.
    46  // Although CFF enables multiple font or CIDFont programs to be bundled together in a
    47  // single file, embedded CFF font file in PDF or in TrueType/OpenType fonts
    48  // shall consist of exactly one font or CIDFont. Thus, this function
    49  // returns an error if the file contains more than one font.
    50  // See Loader to read standalone .cff files
    51  func Parse(file fonts.Resource) (*Font, error) {
    52  	fonts, err := parse(file)
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  	if len(fonts) != 1 {
    57  		return nil, errors.New("only one CFF font is allowed in embedded files")
    58  	}
    59  	return &fonts[0], nil
    60  }
    61  
    62  func parse(file fonts.Resource) ([]Font, error) {
    63  	_, err := file.Seek(0, io.SeekStart) // file might have been used before
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  	// read 4 bytes to check if its a supported CFF file
    68  	var buf [4]byte
    69  	file.Read(buf[:])
    70  	if buf[0] != 1 || buf[1] != 0 || buf[2] != 4 {
    71  		return nil, errUnsupportedCFFVersion
    72  	}
    73  	file.Seek(0, io.SeekStart)
    74  
    75  	// if this is really needed, we can modify the parser to directly use `file`
    76  	// without reading all in memory
    77  	input, err := ioutil.ReadAll(file)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	p := cffParser{src: input}
    82  	p.skip(4)
    83  	return p.parse()
    84  }
    85  
    86  // Type1 fonts have no natural notion of Unicode code points
    87  // We use a glyph names table to identify the most commonly used runes
    88  func (f *Font) synthetizeCmap() {
    89  	f.cmap = make(map[rune]fonts.GID)
    90  	for gid := range f.charstrings {
    91  		glyphName := f.GlyphName(fonts.GID(gid))
    92  		r, _ := glyphsnames.GlyphToRune(glyphName)
    93  		f.cmap[r] = fonts.GID(gid)
    94  	}
    95  }
    96  
    97  func (f *Font) Cmap() (fonts.Cmap, fonts.CmapEncoding) {
    98  	return f.cmap, fonts.EncUnicode
    99  }
   100  
   101  // GlyphName returns the name of the glyph or an empty string if not found.
   102  func (f *Font) GlyphName(glyph fonts.GID) string {
   103  	if f.fdSelect != nil || int(glyph) >= len(f.charset) {
   104  		return ""
   105  	}
   106  	out, _ := f.userStrings.getString(f.charset[glyph])
   107  	return out
   108  }
   109  
   110  // NumGlyphs returns the number of glyphs in this font.
   111  // It is also the maximum glyph index + 1.
   112  func (f *Font) NumGlyphs() int { return len(f.charstrings) }
   113  
   114  func (f *Font) PostscriptInfo() (fonts.PSInfo, bool) { return f.PSInfo, true }
   115  
   116  func (f *Font) PoscriptName() string { return f.PSInfo.FontName }
   117  
   118  // Strip all subset prefixes of the form `ABCDEF+'.  Usually, there
   119  // is only one, but font names like `APCOOG+JFABTD+FuturaBQ-Bold'
   120  // have been seen in the wild.
   121  func removeSubsetPrefix(name []byte) []byte {
   122  	for keep := true; keep; {
   123  		if len(name) >= 7 && name[6] == '+' {
   124  			for idx := 0; idx < 6; idx++ {
   125  				/* ASCII uppercase letters */
   126  				if !('A' <= name[idx] && name[idx] <= 'Z') {
   127  					keep = false
   128  				}
   129  			}
   130  			if keep {
   131  				name = name[7:]
   132  			}
   133  		} else {
   134  			keep = false
   135  		}
   136  	}
   137  	return name
   138  }
   139  
   140  // remove the style part from the family name (if present).
   141  func removeStyle(familyName, styleName string) string {
   142  	if lF, lS := len(familyName), len(styleName); lF > lS {
   143  		idx := 1
   144  		for ; idx <= len(styleName); idx++ {
   145  			if familyName[lF-idx] != styleName[lS-idx] {
   146  				break
   147  			}
   148  		}
   149  
   150  		if idx > lS {
   151  			// familyName ends with styleName; remove it
   152  			idx = lF - lS - 1
   153  
   154  			// also remove special characters
   155  			// between real family name and style
   156  			for idx > 0 &&
   157  				(familyName[idx] == '-' || familyName[idx] == ' ' ||
   158  					familyName[idx] == '_' || familyName[idx] == '+') {
   159  				idx--
   160  			}
   161  
   162  			if idx > 0 {
   163  				familyName = familyName[:idx+1]
   164  			}
   165  		}
   166  	}
   167  	return familyName
   168  }
   169  
   170  func (f *Font) getStyle() (isItalic, isBold bool, familyName, styleName string) {
   171  	// adapted from freetype/src/cff/cffobjs.c
   172  
   173  	// retrieve font family & style name
   174  	familyName = f.PSInfo.FamilyName
   175  	if familyName == "" {
   176  		familyName = string(removeSubsetPrefix(f.fontName))
   177  	}
   178  	if familyName != "" {
   179  		full := f.PSInfo.FullName
   180  
   181  		// We try to extract the style name from the full name.
   182  		// We need to ignore spaces and dashes during the search.
   183  		for i, j := 0, 0; i < len(full); {
   184  			// skip common characters at the start of both strings
   185  			if full[i] == familyName[j] {
   186  				i++
   187  				j++
   188  				continue
   189  			}
   190  
   191  			// ignore spaces and dashes in full name during comparison
   192  			if full[i] == ' ' || full[i] == '-' {
   193  				i++
   194  				continue
   195  			}
   196  
   197  			// ignore spaces and dashes in family name during comparison
   198  			if familyName[j] == ' ' || familyName[j] == '-' {
   199  				j++
   200  				continue
   201  			}
   202  
   203  			if j == len(familyName) && i < len(full) {
   204  				/* The full name begins with the same characters as the  */
   205  				/* family name, with spaces and dashes removed.  In this */
   206  				/* case, the remaining string in `full' will be used as */
   207  				/* the style name.                                       */
   208  				styleName = full[i:]
   209  
   210  				/* remove the style part from the family name (if present) */
   211  				familyName = removeStyle(familyName, styleName)
   212  			}
   213  			break
   214  		}
   215  	} else {
   216  		// do we have a `/FontName' for a CID-keyed font?
   217  		familyName = f.cidFontName
   218  	}
   219  
   220  	styleName = strings.TrimSpace(styleName)
   221  	if styleName == "" {
   222  		// assume "Regular" style if we don't know better
   223  		styleName = "Regular"
   224  	}
   225  
   226  	isItalic = f.PSInfo.ItalicAngle != 0
   227  	isBold = f.PSInfo.Weight == "Bold" || f.PSInfo.Weight == "Black"
   228  
   229  	// double check
   230  	if !isBold {
   231  		isBold = strings.HasPrefix(styleName, "Bold") || strings.HasPrefix(styleName, "Black")
   232  	}
   233  	return
   234  }
   235  
   236  func (f *Font) LoadSummary() (fonts.FontSummary, error) {
   237  	isItalic, isBold, familyName, styleName := f.getStyle()
   238  	return fonts.FontSummary{
   239  		IsItalic:          isItalic,
   240  		IsBold:            isBold,
   241  		Familly:           familyName,
   242  		Style:             styleName,
   243  		HasScalableGlyphs: true,
   244  		HasBitmapGlyphs:   false,
   245  		HasColorGlyphs:    false,
   246  	}, nil
   247  }