github.com/utopiagio/gio@v0.0.8/text/shaper.go (about)

     1  // SPDX-License-Identifier: Unlicense OR MIT
     2  
     3  package text
     4  
     5  import (
     6  	"bufio"
     7  	"io"
     8  	"strings"
     9  	"unicode/utf8"
    10  
    11  	giofont "github.com/utopiagio/gio/font"
    12  	"github.com/utopiagio/gio/io/system"
    13  	"github.com/utopiagio/gio/op"
    14  	"github.com/utopiagio/gio/op/clip"
    15  	"github.com/go-text/typesetting/font"
    16  	"golang.org/x/image/math/fixed"
    17  )
    18  
    19  // WrapPolicy configures strategies for choosing where to break lines of text for line
    20  // wrapping.
    21  type WrapPolicy uint8
    22  
    23  const (
    24  	// WrapHeuristically tries to minimize breaking within words (UAX#14 text segments)
    25  	// while also ensuring that text fits within the given MaxWidth. It will only break
    26  	// a line within a word (on a UAX#29 grapheme cluster boundary) when that word cannot
    27  	// fit on a line by itself. Additionally, when the final word of a line is being
    28  	// truncated, this policy will preserve as many symbols of that word as
    29  	// possible before the truncator.
    30  	WrapHeuristically WrapPolicy = iota
    31  	// WrapWords does not permit words (UAX#14 text segments) to be broken across lines.
    32  	// This means that sometimes long words will exceed the MaxWidth they are wrapped with.
    33  	WrapWords
    34  	// WrapGraphemes will maximize the amount of text on each line at the expense of readability,
    35  	// breaking any word across lines on UAX#29 grapheme cluster boundaries to maximize the number of
    36  	// grapheme clusters on each line.
    37  	WrapGraphemes
    38  )
    39  
    40  // Parameters are static text shaping attributes applied to the entire shaped text.
    41  type Parameters struct {
    42  	// Font describes the preferred typeface.
    43  	Font giofont.Font
    44  	// Alignment characterizes the positioning of text within the line. It does not directly
    45  	// impact shaping, but is provided in order to allow efficient offset computation.
    46  	Alignment Alignment
    47  	// PxPerEm is the pixels-per-em to shape the text with.
    48  	PxPerEm fixed.Int26_6
    49  	// MaxLines limits the quantity of shaped lines. Zero means no limit.
    50  	MaxLines int
    51  	// Truncator is a string of text to insert where the shaped text was truncated, which
    52  	// can currently ohly happen if MaxLines is nonzero and the text on the final line is
    53  	// truncated.
    54  	Truncator string
    55  
    56  	// WrapPolicy configures how line breaks will be chosen when wrapping text across lines.
    57  	WrapPolicy WrapPolicy
    58  
    59  	// MinWidth and MaxWidth provide the minimum and maximum horizontal space constraints
    60  	// for the shaped text.
    61  	MinWidth, MaxWidth int
    62  	// Locale provides primary direction and language information for the shaped text.
    63  	Locale system.Locale
    64  
    65  	// LineHeightScale is a scaling factor applied to the LineHeight of a paragraph. If zero, a default
    66  	// value of 1.2 will be used.
    67  	LineHeightScale float32
    68  
    69  	// LineHeight is the distance between the baselines of two lines of text. If zero, the PxPerEm
    70  	// of the any given paragraph will set the LineHeight of that paragraph. This value will be
    71  	// scaled by LineHeightScale, so applications desiring a specific fixed value
    72  	// should set LineHeightScale to 1.
    73  	LineHeight fixed.Int26_6
    74  
    75  	// forceTruncate controls whether the truncator string is inserted on the final line of
    76  	// text with a MaxLines. It is unexported because this behavior only makes sense for the
    77  	// shaper to control when it iterates paragraphs of text.
    78  	forceTruncate bool
    79  }
    80  
    81  type FontFace = giofont.FontFace
    82  
    83  // Glyph describes a shaped font glyph. Many fields are distances relative
    84  // to the "dot", which is a point on the baseline (the line upon which glyphs
    85  // visually rest) for the line of text containing the glyph.
    86  //
    87  // Glyphs are organized into "glyph clusters," which are sequences that
    88  // may represent an arbitrary number of runes.
    89  //
    90  // Sequences of glyph clusters that share style parameters are grouped into "runs."
    91  //
    92  // "Document coordinates" are pixel values relative to the text's origin at (0,0)
    93  // in the upper-left corner" Displaying each shaped glyph at the document
    94  // coordinates of its dot will correctly visualize the text.
    95  type Glyph struct {
    96  	// ID is a unique, per-shaper identifier for the shape of the glyph.
    97  	// Glyphs from the same shaper will share an ID when they are from
    98  	// the same face and represent the same glyph at the same size.
    99  	ID GlyphID
   100  
   101  	// X is the x coordinate of the dot for this glyph in document coordinates.
   102  	X fixed.Int26_6
   103  	// Y is the y coordinate of the dot for this glyph in document coordinates.
   104  	Y int32
   105  
   106  	// Advance is the logical width of the glyph. The glyph may be visually
   107  	// wider than this.
   108  	Advance fixed.Int26_6
   109  	// Ascent is the distance from the dot to the logical top of glyphs in
   110  	// this glyph's face. The specific glyph may be shorter than this.
   111  	Ascent fixed.Int26_6
   112  	// Descent is the distance from the dot to the logical bottom of glyphs
   113  	// in this glyph's face. The specific glyph may descend less than this.
   114  	Descent fixed.Int26_6
   115  	// Offset encodes the origin of the drawing coordinate space for this glyph
   116  	// relative to the dot. This value is used when converting glyphs to paths.
   117  	Offset fixed.Point26_6
   118  	// Bounds encodes the visual dimensions of the glyph relative to the dot.
   119  	Bounds fixed.Rectangle26_6
   120  	// Runes is the number of runes represented by the glyph cluster this glyph
   121  	// belongs to. If Flags does not contain FlagClusterBreak, this value will
   122  	// always be zero. The final glyph in the cluster contains the runes count
   123  	// for the entire cluster.
   124  	Runes uint16
   125  	// Flags encode special properties of this glyph.
   126  	Flags Flags
   127  }
   128  
   129  type Flags uint16
   130  
   131  const (
   132  	// FlagTowardOrigin is set for glyphs in runs that flow
   133  	// towards the origin (RTL).
   134  	FlagTowardOrigin Flags = 1 << iota
   135  	// FlagLineBreak is set for the last glyph in a line.
   136  	FlagLineBreak
   137  	// FlagRunBreak is set for the last glyph in a run. A run is a sequence of
   138  	// glyphs sharing constant style properties (same size, same face, same
   139  	// direction, etc...).
   140  	FlagRunBreak
   141  	// FlagClusterBreak is set for the last glyph in a glyph cluster. A glyph cluster is a
   142  	// sequence of glyphs which are logically a single unit, but require multiple
   143  	// symbols from a font to display.
   144  	FlagClusterBreak
   145  	// FlagParagraphBreak indicates that the glyph cluster does not represent actual
   146  	// font glyphs, but was inserted by the shaper to represent line-breaking
   147  	// whitespace characters. After a glyph with FlagParagraphBreak set, the shaper
   148  	// will always return a glyph with FlagParagraphStart providing the X and Y
   149  	// coordinates of the start of the next line, even if that line has no contents.
   150  	FlagParagraphBreak
   151  	// FlagParagraphStart indicates that the glyph starts a new paragraph.
   152  	FlagParagraphStart
   153  	// FlagTruncator indicates that the glyph is part of a special truncator run that
   154  	// represents the portion of text removed due to truncation. A glyph with both
   155  	// FlagTruncator and FlagClusterBreak will have a Runes field accounting for all
   156  	// runes truncated.
   157  	FlagTruncator
   158  )
   159  
   160  func (f Flags) String() string {
   161  	var b strings.Builder
   162  	if f&FlagParagraphStart != 0 {
   163  		b.WriteString("S")
   164  	} else {
   165  		b.WriteString("_")
   166  	}
   167  	if f&FlagParagraphBreak != 0 {
   168  		b.WriteString("P")
   169  	} else {
   170  		b.WriteString("_")
   171  	}
   172  	if f&FlagTowardOrigin != 0 {
   173  		b.WriteString("T")
   174  	} else {
   175  		b.WriteString("_")
   176  	}
   177  	if f&FlagLineBreak != 0 {
   178  		b.WriteString("L")
   179  	} else {
   180  		b.WriteString("_")
   181  	}
   182  	if f&FlagRunBreak != 0 {
   183  		b.WriteString("R")
   184  	} else {
   185  		b.WriteString("_")
   186  	}
   187  	if f&FlagClusterBreak != 0 {
   188  		b.WriteString("C")
   189  	} else {
   190  		b.WriteString("_")
   191  	}
   192  	if f&FlagTruncator != 0 {
   193  		b.WriteString("…")
   194  	} else {
   195  		b.WriteString("_")
   196  	}
   197  	return b.String()
   198  }
   199  
   200  type GlyphID uint64
   201  
   202  // Shaper converts strings of text into glyphs that can be displayed.
   203  type Shaper struct {
   204  	config struct {
   205  		disableSystemFonts bool
   206  		collection         []FontFace
   207  	}
   208  	initialized      bool
   209  	shaper           shaperImpl
   210  	pathCache        pathCache
   211  	bitmapShapeCache bitmapShapeCache
   212  	layoutCache      layoutCache
   213  
   214  	reader    *bufio.Reader
   215  	paragraph []byte
   216  
   217  	// Iterator state.
   218  	brokeParagraph   bool
   219  	pararagraphStart Glyph
   220  	txt              document
   221  	line             int
   222  	run              int
   223  	glyph            int
   224  	// advance is the width of glyphs from the current run that have already been displayed.
   225  	advance fixed.Int26_6
   226  	// done tracks whether iteration is over.
   227  	done bool
   228  	err  error
   229  }
   230  
   231  // ShaperOptions configure text shapers.
   232  type ShaperOption func(*Shaper)
   233  
   234  // NoSystemFonts can be used to disable system font loading.
   235  func NoSystemFonts() ShaperOption {
   236  	return func(s *Shaper) {
   237  		s.config.disableSystemFonts = true
   238  	}
   239  }
   240  
   241  // WithCollection can be used to provide a collection of pre-loaded fonts to the shaper.
   242  func WithCollection(collection []FontFace) ShaperOption {
   243  	return func(s *Shaper) {
   244  		s.config.collection = collection
   245  	}
   246  }
   247  
   248  // NewShaper constructs a shaper with the provided options.
   249  //
   250  // NewShaper must be called after [app.NewWindow], unless the [NoSystemFonts]
   251  // option is specified. This is an unfortunate restriction caused by some platforms
   252  // such as Android.
   253  func NewShaper(options ...ShaperOption) *Shaper {
   254  	l := &Shaper{}
   255  	for _, opt := range options {
   256  		opt(l)
   257  	}
   258  	l.init()
   259  	return l
   260  }
   261  
   262  func (l *Shaper) init() {
   263  	if l.initialized {
   264  		return
   265  	}
   266  	l.initialized = true
   267  	l.reader = bufio.NewReader(nil)
   268  	l.shaper = *newShaperImpl(!l.config.disableSystemFonts, l.config.collection)
   269  }
   270  
   271  // Layout text from an io.Reader according to a set of options. Results can be retrieved by
   272  // iteratively calling NextGlyph.
   273  func (l *Shaper) Layout(params Parameters, txt io.Reader) {
   274  	l.init()
   275  	l.layoutText(params, txt, "")
   276  }
   277  
   278  // LayoutString is Layout for strings.
   279  func (l *Shaper) LayoutString(params Parameters, str string) {
   280  	l.init()
   281  	l.layoutText(params, nil, str)
   282  }
   283  
   284  func (l *Shaper) reset(align Alignment) {
   285  	l.line, l.run, l.glyph, l.advance = 0, 0, 0, 0
   286  	l.done = false
   287  	l.txt.reset()
   288  	l.txt.alignment = align
   289  }
   290  
   291  // layoutText lays out a large text document by breaking it into paragraphs and laying
   292  // out each of them separately. This allows the shaping results to be cached independently
   293  // by paragraph. Only one of txt and str should be provided.
   294  func (l *Shaper) layoutText(params Parameters, txt io.Reader, str string) {
   295  	l.reset(params.Alignment)
   296  	if txt == nil && len(str) == 0 {
   297  		l.txt.append(l.layoutParagraph(params, "", nil))
   298  		return
   299  	}
   300  	l.reader.Reset(txt)
   301  	truncating := params.MaxLines > 0
   302  	var done bool
   303  	var endByte int
   304  	for !done {
   305  		l.paragraph = l.paragraph[:0]
   306  		if txt != nil {
   307  			for {
   308  				b, err := l.reader.ReadByte()
   309  				if err != nil {
   310  					// EOF or any other error ends processing here.
   311  					done = true
   312  					break
   313  				}
   314  				l.paragraph = append(l.paragraph, b)
   315  				if b == '\n' {
   316  					break
   317  				}
   318  			}
   319  			if !done {
   320  				_, re := l.reader.ReadByte()
   321  				done = re != nil
   322  				if !done {
   323  					_ = l.reader.UnreadByte()
   324  				}
   325  			}
   326  		} else {
   327  			idx := strings.IndexByte(str, '\n')
   328  			if idx == -1 {
   329  				done = true
   330  				endByte = len(str)
   331  			} else {
   332  				endByte = idx + 1
   333  				done = endByte == len(str)
   334  			}
   335  		}
   336  		if len(str[:endByte]) > 0 || (len(l.paragraph) > 0 || len(l.txt.lines) == 0) {
   337  			params.forceTruncate = truncating && !done
   338  			lines := l.layoutParagraph(params, str[:endByte], l.paragraph)
   339  			if truncating {
   340  				params.MaxLines -= len(lines.lines)
   341  				if params.MaxLines == 0 {
   342  					done = true
   343  					// We've truncated the text, but we need to account for all of the runes we never
   344  					// decoded in the truncator.
   345  					var unreadRunes int
   346  					if txt == nil {
   347  						unreadRunes = utf8.RuneCountInString(str[endByte:])
   348  					} else {
   349  						for {
   350  							_, _, e := l.reader.ReadRune()
   351  							if e != nil {
   352  								break
   353  							}
   354  							unreadRunes++
   355  						}
   356  					}
   357  					l.txt.unreadRuneCount = unreadRunes
   358  				}
   359  			}
   360  			l.txt.append(lines)
   361  		}
   362  		if done {
   363  			return
   364  		}
   365  		str = str[endByte:]
   366  	}
   367  }
   368  
   369  // layoutParagraph shapes and wraps a paragraph using the provided parameters.
   370  // It accepts the paragraph data in either string or rune format, preferring the
   371  // string in order to hit the shaper cache more quickly.
   372  func (l *Shaper) layoutParagraph(params Parameters, asStr string, asBytes []byte) document {
   373  	if l == nil {
   374  		return document{}
   375  	}
   376  	if len(asStr) == 0 && len(asBytes) > 0 {
   377  		asStr = string(asBytes)
   378  	}
   379  	// Alignment is not part of the cache key because changing it does not impact shaping.
   380  	lk := layoutKey{
   381  		ppem:            params.PxPerEm,
   382  		maxWidth:        params.MaxWidth,
   383  		minWidth:        params.MinWidth,
   384  		maxLines:        params.MaxLines,
   385  		truncator:       params.Truncator,
   386  		locale:          params.Locale,
   387  		font:            params.Font,
   388  		forceTruncate:   params.forceTruncate,
   389  		wrapPolicy:      params.WrapPolicy,
   390  		str:             asStr,
   391  		lineHeight:      params.LineHeight,
   392  		lineHeightScale: params.LineHeightScale,
   393  	}
   394  	if l, ok := l.layoutCache.Get(lk); ok {
   395  		return l
   396  	}
   397  	lines := l.shaper.LayoutRunes(params, []rune(asStr))
   398  	l.layoutCache.Put(lk, lines)
   399  	return lines
   400  }
   401  
   402  // NextGlyph returns the next glyph from the most recent shaping operation, if
   403  // any. If there are no more glyphs, ok will be false.
   404  func (l *Shaper) NextGlyph() (_ Glyph, ok bool) {
   405  	l.init()
   406  	if l.done {
   407  		return Glyph{}, false
   408  	}
   409  	for {
   410  		if l.line == len(l.txt.lines) {
   411  			if l.brokeParagraph {
   412  				l.brokeParagraph = false
   413  				return l.pararagraphStart, true
   414  			}
   415  			if l.err == nil {
   416  				l.err = io.EOF
   417  			}
   418  			return Glyph{}, false
   419  		}
   420  		line := l.txt.lines[l.line]
   421  		if l.run == len(line.runs) {
   422  			l.line++
   423  			l.run = 0
   424  			continue
   425  		}
   426  		run := line.runs[l.run]
   427  		align := l.txt.alignment.Align(line.direction, line.width, l.txt.alignWidth)
   428  		if l.line == 0 && l.run == 0 && len(run.Glyphs) == 0 {
   429  			// The very first run is empty, which will only happen when the
   430  			// entire text is a shaped empty string. Return a single synthetic
   431  			// glyph to provide ascent/descent information to the caller.
   432  			l.done = true
   433  			return Glyph{
   434  				X:       align,
   435  				Y:       int32(line.yOffset),
   436  				Runes:   0,
   437  				Flags:   FlagLineBreak | FlagClusterBreak | FlagRunBreak,
   438  				Ascent:  line.ascent,
   439  				Descent: line.descent,
   440  			}, true
   441  		}
   442  		if l.glyph == len(run.Glyphs) {
   443  			l.run++
   444  			l.glyph = 0
   445  			l.advance = 0
   446  			continue
   447  		}
   448  		glyphIdx := l.glyph
   449  		rtl := run.Direction.Progression() == system.TowardOrigin
   450  		if rtl {
   451  			// If RTL, traverse glyphs backwards to ensure rune order.
   452  			glyphIdx = len(run.Glyphs) - 1 - glyphIdx
   453  		}
   454  		g := run.Glyphs[glyphIdx]
   455  		if rtl {
   456  			// Modify the advance prior to computing runOffset to ensure that the
   457  			// current glyph's width is subtracted in RTL.
   458  			l.advance += g.xAdvance
   459  		}
   460  		// runOffset computes how far into the run the dot should be positioned.
   461  		runOffset := l.advance
   462  		if rtl {
   463  			runOffset = run.Advance - l.advance
   464  		}
   465  		glyph := Glyph{
   466  			ID:      g.id,
   467  			X:       align + run.X + runOffset,
   468  			Y:       int32(line.yOffset),
   469  			Ascent:  line.ascent,
   470  			Descent: line.descent,
   471  			Advance: g.xAdvance,
   472  			Runes:   uint16(g.runeCount),
   473  			Offset: fixed.Point26_6{
   474  				X: g.xOffset,
   475  				Y: g.yOffset,
   476  			},
   477  			Bounds: g.bounds,
   478  		}
   479  		if run.truncator {
   480  			glyph.Flags |= FlagTruncator
   481  		}
   482  		l.glyph++
   483  		if !rtl {
   484  			l.advance += g.xAdvance
   485  		}
   486  
   487  		endOfRun := l.glyph == len(run.Glyphs)
   488  		if endOfRun {
   489  			glyph.Flags |= FlagRunBreak
   490  		}
   491  		endOfLine := endOfRun && l.run == len(line.runs)-1
   492  		if endOfLine {
   493  			glyph.Flags |= FlagLineBreak
   494  		}
   495  		endOfText := endOfLine && l.line == len(l.txt.lines)-1
   496  		nextGlyph := l.glyph
   497  		if rtl {
   498  			nextGlyph = len(run.Glyphs) - 1 - nextGlyph
   499  		}
   500  		endOfCluster := endOfRun || run.Glyphs[nextGlyph].clusterIndex != g.clusterIndex
   501  		if run.truncator {
   502  			// Only emit a single cluster for the entire truncator sequence.
   503  			endOfCluster = endOfRun
   504  		}
   505  		if endOfCluster {
   506  			glyph.Flags |= FlagClusterBreak
   507  			if run.truncator {
   508  				glyph.Runes += uint16(l.txt.unreadRuneCount)
   509  			}
   510  		} else {
   511  			glyph.Runes = 0
   512  		}
   513  		if run.Direction.Progression() == system.TowardOrigin {
   514  			glyph.Flags |= FlagTowardOrigin
   515  		}
   516  		if l.brokeParagraph {
   517  			glyph.Flags |= FlagParagraphStart
   518  			l.brokeParagraph = false
   519  		}
   520  		if g.glyphCount == 0 {
   521  			glyph.Flags |= FlagParagraphBreak
   522  			l.brokeParagraph = true
   523  			if endOfText {
   524  				l.pararagraphStart = Glyph{
   525  					Ascent:  glyph.Ascent,
   526  					Descent: glyph.Descent,
   527  					Flags:   FlagParagraphStart | FlagLineBreak | FlagRunBreak | FlagClusterBreak,
   528  				}
   529  				// If a glyph is both a paragraph break and the final glyph, it's a newline
   530  				// at the end of the text. We must inform widgets like the text editor
   531  				// of a valid cursor position they can use for "after" such a newline,
   532  				// taking text alignment into account.
   533  				l.pararagraphStart.X = l.txt.alignment.Align(line.direction, 0, l.txt.alignWidth)
   534  				l.pararagraphStart.Y = glyph.Y + int32((glyph.Ascent + glyph.Descent).Ceil())
   535  			}
   536  		}
   537  		return glyph, true
   538  	}
   539  }
   540  
   541  const (
   542  	facebits = 16
   543  	sizebits = 16
   544  	gidbits  = 64 - facebits - sizebits
   545  )
   546  
   547  // newGlyphID encodes a face and a glyph id into a GlyphID.
   548  func newGlyphID(ppem fixed.Int26_6, faceIdx int, gid font.GID) GlyphID {
   549  	if gid&^((1<<gidbits)-1) != 0 {
   550  		panic("glyph id out of bounds")
   551  	}
   552  	if faceIdx&^((1<<facebits)-1) != 0 {
   553  		panic("face index out of bounds")
   554  	}
   555  	if ppem&^((1<<sizebits)-1) != 0 {
   556  		panic("ppem out of bounds")
   557  	}
   558  	// Mask off the upper 16 bits of ppem. This still allows values up to
   559  	// 1023.
   560  	ppem &= ((1 << sizebits) - 1)
   561  	return GlyphID(faceIdx)<<(gidbits+sizebits) | GlyphID(ppem)<<(gidbits) | GlyphID(gid)
   562  }
   563  
   564  // splitGlyphID is the opposite of newGlyphID.
   565  func splitGlyphID(g GlyphID) (fixed.Int26_6, int, font.GID) {
   566  	faceIdx := int(uint64(g) >> (gidbits + sizebits))
   567  	ppem := fixed.Int26_6((g & ((1<<sizebits - 1) << gidbits)) >> gidbits)
   568  	gid := font.GID(g) & (1<<gidbits - 1)
   569  	return ppem, faceIdx, gid
   570  }
   571  
   572  // Shape converts the provided glyphs into a path. The path will enclose the forms
   573  // of all vector glyphs.
   574  // All glyphs are expected to be from a single line of text (their Y offsets are ignored).
   575  func (l *Shaper) Shape(gs []Glyph) clip.PathSpec {
   576  	l.init()
   577  	key := l.pathCache.hashGlyphs(gs)
   578  	shape, ok := l.pathCache.Get(key, gs)
   579  	if ok {
   580  		return shape
   581  	}
   582  	pathOps := new(op.Ops)
   583  	shape = l.shaper.Shape(pathOps, gs)
   584  	l.pathCache.Put(key, gs, shape)
   585  	return shape
   586  }
   587  
   588  // Bitmaps extracts bitmap glyphs from the provided slice and creates an op.CallOp to present
   589  // them. The returned op.CallOp will align correctly with the return value of Shape() for the
   590  // same gs slice.
   591  // All glyphs are expected to be from a single line of text (their Y offsets are ignored).
   592  func (l *Shaper) Bitmaps(gs []Glyph) op.CallOp {
   593  	l.init()
   594  	key := l.bitmapShapeCache.hashGlyphs(gs)
   595  	call, ok := l.bitmapShapeCache.Get(key, gs)
   596  	if ok {
   597  		return call
   598  	}
   599  	callOps := new(op.Ops)
   600  	call = l.shaper.Bitmaps(callOps, gs)
   601  	l.bitmapShapeCache.Put(key, gs, call)
   602  	return call
   603  }