github.com/utopiagio/gio@v0.0.8/text/shaper.go (about) 1 // SPDX-License-Identifier: Unlicense OR MIT 2 3 package text 4 5 import ( 6 "bufio" 7 "io" 8 "strings" 9 "unicode/utf8" 10 11 giofont "github.com/utopiagio/gio/font" 12 "github.com/utopiagio/gio/io/system" 13 "github.com/utopiagio/gio/op" 14 "github.com/utopiagio/gio/op/clip" 15 "github.com/go-text/typesetting/font" 16 "golang.org/x/image/math/fixed" 17 ) 18 19 // WrapPolicy configures strategies for choosing where to break lines of text for line 20 // wrapping. 21 type WrapPolicy uint8 22 23 const ( 24 // WrapHeuristically tries to minimize breaking within words (UAX#14 text segments) 25 // while also ensuring that text fits within the given MaxWidth. It will only break 26 // a line within a word (on a UAX#29 grapheme cluster boundary) when that word cannot 27 // fit on a line by itself. Additionally, when the final word of a line is being 28 // truncated, this policy will preserve as many symbols of that word as 29 // possible before the truncator. 30 WrapHeuristically WrapPolicy = iota 31 // WrapWords does not permit words (UAX#14 text segments) to be broken across lines. 32 // This means that sometimes long words will exceed the MaxWidth they are wrapped with. 33 WrapWords 34 // WrapGraphemes will maximize the amount of text on each line at the expense of readability, 35 // breaking any word across lines on UAX#29 grapheme cluster boundaries to maximize the number of 36 // grapheme clusters on each line. 37 WrapGraphemes 38 ) 39 40 // Parameters are static text shaping attributes applied to the entire shaped text. 41 type Parameters struct { 42 // Font describes the preferred typeface. 43 Font giofont.Font 44 // Alignment characterizes the positioning of text within the line. It does not directly 45 // impact shaping, but is provided in order to allow efficient offset computation. 46 Alignment Alignment 47 // PxPerEm is the pixels-per-em to shape the text with. 48 PxPerEm fixed.Int26_6 49 // MaxLines limits the quantity of shaped lines. Zero means no limit. 50 MaxLines int 51 // Truncator is a string of text to insert where the shaped text was truncated, which 52 // can currently ohly happen if MaxLines is nonzero and the text on the final line is 53 // truncated. 54 Truncator string 55 56 // WrapPolicy configures how line breaks will be chosen when wrapping text across lines. 57 WrapPolicy WrapPolicy 58 59 // MinWidth and MaxWidth provide the minimum and maximum horizontal space constraints 60 // for the shaped text. 61 MinWidth, MaxWidth int 62 // Locale provides primary direction and language information for the shaped text. 63 Locale system.Locale 64 65 // LineHeightScale is a scaling factor applied to the LineHeight of a paragraph. If zero, a default 66 // value of 1.2 will be used. 67 LineHeightScale float32 68 69 // LineHeight is the distance between the baselines of two lines of text. If zero, the PxPerEm 70 // of the any given paragraph will set the LineHeight of that paragraph. This value will be 71 // scaled by LineHeightScale, so applications desiring a specific fixed value 72 // should set LineHeightScale to 1. 73 LineHeight fixed.Int26_6 74 75 // forceTruncate controls whether the truncator string is inserted on the final line of 76 // text with a MaxLines. It is unexported because this behavior only makes sense for the 77 // shaper to control when it iterates paragraphs of text. 78 forceTruncate bool 79 } 80 81 type FontFace = giofont.FontFace 82 83 // Glyph describes a shaped font glyph. Many fields are distances relative 84 // to the "dot", which is a point on the baseline (the line upon which glyphs 85 // visually rest) for the line of text containing the glyph. 86 // 87 // Glyphs are organized into "glyph clusters," which are sequences that 88 // may represent an arbitrary number of runes. 89 // 90 // Sequences of glyph clusters that share style parameters are grouped into "runs." 91 // 92 // "Document coordinates" are pixel values relative to the text's origin at (0,0) 93 // in the upper-left corner" Displaying each shaped glyph at the document 94 // coordinates of its dot will correctly visualize the text. 95 type Glyph struct { 96 // ID is a unique, per-shaper identifier for the shape of the glyph. 97 // Glyphs from the same shaper will share an ID when they are from 98 // the same face and represent the same glyph at the same size. 99 ID GlyphID 100 101 // X is the x coordinate of the dot for this glyph in document coordinates. 102 X fixed.Int26_6 103 // Y is the y coordinate of the dot for this glyph in document coordinates. 104 Y int32 105 106 // Advance is the logical width of the glyph. The glyph may be visually 107 // wider than this. 108 Advance fixed.Int26_6 109 // Ascent is the distance from the dot to the logical top of glyphs in 110 // this glyph's face. The specific glyph may be shorter than this. 111 Ascent fixed.Int26_6 112 // Descent is the distance from the dot to the logical bottom of glyphs 113 // in this glyph's face. The specific glyph may descend less than this. 114 Descent fixed.Int26_6 115 // Offset encodes the origin of the drawing coordinate space for this glyph 116 // relative to the dot. This value is used when converting glyphs to paths. 117 Offset fixed.Point26_6 118 // Bounds encodes the visual dimensions of the glyph relative to the dot. 119 Bounds fixed.Rectangle26_6 120 // Runes is the number of runes represented by the glyph cluster this glyph 121 // belongs to. If Flags does not contain FlagClusterBreak, this value will 122 // always be zero. The final glyph in the cluster contains the runes count 123 // for the entire cluster. 124 Runes uint16 125 // Flags encode special properties of this glyph. 126 Flags Flags 127 } 128 129 type Flags uint16 130 131 const ( 132 // FlagTowardOrigin is set for glyphs in runs that flow 133 // towards the origin (RTL). 134 FlagTowardOrigin Flags = 1 << iota 135 // FlagLineBreak is set for the last glyph in a line. 136 FlagLineBreak 137 // FlagRunBreak is set for the last glyph in a run. A run is a sequence of 138 // glyphs sharing constant style properties (same size, same face, same 139 // direction, etc...). 140 FlagRunBreak 141 // FlagClusterBreak is set for the last glyph in a glyph cluster. A glyph cluster is a 142 // sequence of glyphs which are logically a single unit, but require multiple 143 // symbols from a font to display. 144 FlagClusterBreak 145 // FlagParagraphBreak indicates that the glyph cluster does not represent actual 146 // font glyphs, but was inserted by the shaper to represent line-breaking 147 // whitespace characters. After a glyph with FlagParagraphBreak set, the shaper 148 // will always return a glyph with FlagParagraphStart providing the X and Y 149 // coordinates of the start of the next line, even if that line has no contents. 150 FlagParagraphBreak 151 // FlagParagraphStart indicates that the glyph starts a new paragraph. 152 FlagParagraphStart 153 // FlagTruncator indicates that the glyph is part of a special truncator run that 154 // represents the portion of text removed due to truncation. A glyph with both 155 // FlagTruncator and FlagClusterBreak will have a Runes field accounting for all 156 // runes truncated. 157 FlagTruncator 158 ) 159 160 func (f Flags) String() string { 161 var b strings.Builder 162 if f&FlagParagraphStart != 0 { 163 b.WriteString("S") 164 } else { 165 b.WriteString("_") 166 } 167 if f&FlagParagraphBreak != 0 { 168 b.WriteString("P") 169 } else { 170 b.WriteString("_") 171 } 172 if f&FlagTowardOrigin != 0 { 173 b.WriteString("T") 174 } else { 175 b.WriteString("_") 176 } 177 if f&FlagLineBreak != 0 { 178 b.WriteString("L") 179 } else { 180 b.WriteString("_") 181 } 182 if f&FlagRunBreak != 0 { 183 b.WriteString("R") 184 } else { 185 b.WriteString("_") 186 } 187 if f&FlagClusterBreak != 0 { 188 b.WriteString("C") 189 } else { 190 b.WriteString("_") 191 } 192 if f&FlagTruncator != 0 { 193 b.WriteString("…") 194 } else { 195 b.WriteString("_") 196 } 197 return b.String() 198 } 199 200 type GlyphID uint64 201 202 // Shaper converts strings of text into glyphs that can be displayed. 203 type Shaper struct { 204 config struct { 205 disableSystemFonts bool 206 collection []FontFace 207 } 208 initialized bool 209 shaper shaperImpl 210 pathCache pathCache 211 bitmapShapeCache bitmapShapeCache 212 layoutCache layoutCache 213 214 reader *bufio.Reader 215 paragraph []byte 216 217 // Iterator state. 218 brokeParagraph bool 219 pararagraphStart Glyph 220 txt document 221 line int 222 run int 223 glyph int 224 // advance is the width of glyphs from the current run that have already been displayed. 225 advance fixed.Int26_6 226 // done tracks whether iteration is over. 227 done bool 228 err error 229 } 230 231 // ShaperOptions configure text shapers. 232 type ShaperOption func(*Shaper) 233 234 // NoSystemFonts can be used to disable system font loading. 235 func NoSystemFonts() ShaperOption { 236 return func(s *Shaper) { 237 s.config.disableSystemFonts = true 238 } 239 } 240 241 // WithCollection can be used to provide a collection of pre-loaded fonts to the shaper. 242 func WithCollection(collection []FontFace) ShaperOption { 243 return func(s *Shaper) { 244 s.config.collection = collection 245 } 246 } 247 248 // NewShaper constructs a shaper with the provided options. 249 // 250 // NewShaper must be called after [app.NewWindow], unless the [NoSystemFonts] 251 // option is specified. This is an unfortunate restriction caused by some platforms 252 // such as Android. 253 func NewShaper(options ...ShaperOption) *Shaper { 254 l := &Shaper{} 255 for _, opt := range options { 256 opt(l) 257 } 258 l.init() 259 return l 260 } 261 262 func (l *Shaper) init() { 263 if l.initialized { 264 return 265 } 266 l.initialized = true 267 l.reader = bufio.NewReader(nil) 268 l.shaper = *newShaperImpl(!l.config.disableSystemFonts, l.config.collection) 269 } 270 271 // Layout text from an io.Reader according to a set of options. Results can be retrieved by 272 // iteratively calling NextGlyph. 273 func (l *Shaper) Layout(params Parameters, txt io.Reader) { 274 l.init() 275 l.layoutText(params, txt, "") 276 } 277 278 // LayoutString is Layout for strings. 279 func (l *Shaper) LayoutString(params Parameters, str string) { 280 l.init() 281 l.layoutText(params, nil, str) 282 } 283 284 func (l *Shaper) reset(align Alignment) { 285 l.line, l.run, l.glyph, l.advance = 0, 0, 0, 0 286 l.done = false 287 l.txt.reset() 288 l.txt.alignment = align 289 } 290 291 // layoutText lays out a large text document by breaking it into paragraphs and laying 292 // out each of them separately. This allows the shaping results to be cached independently 293 // by paragraph. Only one of txt and str should be provided. 294 func (l *Shaper) layoutText(params Parameters, txt io.Reader, str string) { 295 l.reset(params.Alignment) 296 if txt == nil && len(str) == 0 { 297 l.txt.append(l.layoutParagraph(params, "", nil)) 298 return 299 } 300 l.reader.Reset(txt) 301 truncating := params.MaxLines > 0 302 var done bool 303 var endByte int 304 for !done { 305 l.paragraph = l.paragraph[:0] 306 if txt != nil { 307 for { 308 b, err := l.reader.ReadByte() 309 if err != nil { 310 // EOF or any other error ends processing here. 311 done = true 312 break 313 } 314 l.paragraph = append(l.paragraph, b) 315 if b == '\n' { 316 break 317 } 318 } 319 if !done { 320 _, re := l.reader.ReadByte() 321 done = re != nil 322 if !done { 323 _ = l.reader.UnreadByte() 324 } 325 } 326 } else { 327 idx := strings.IndexByte(str, '\n') 328 if idx == -1 { 329 done = true 330 endByte = len(str) 331 } else { 332 endByte = idx + 1 333 done = endByte == len(str) 334 } 335 } 336 if len(str[:endByte]) > 0 || (len(l.paragraph) > 0 || len(l.txt.lines) == 0) { 337 params.forceTruncate = truncating && !done 338 lines := l.layoutParagraph(params, str[:endByte], l.paragraph) 339 if truncating { 340 params.MaxLines -= len(lines.lines) 341 if params.MaxLines == 0 { 342 done = true 343 // We've truncated the text, but we need to account for all of the runes we never 344 // decoded in the truncator. 345 var unreadRunes int 346 if txt == nil { 347 unreadRunes = utf8.RuneCountInString(str[endByte:]) 348 } else { 349 for { 350 _, _, e := l.reader.ReadRune() 351 if e != nil { 352 break 353 } 354 unreadRunes++ 355 } 356 } 357 l.txt.unreadRuneCount = unreadRunes 358 } 359 } 360 l.txt.append(lines) 361 } 362 if done { 363 return 364 } 365 str = str[endByte:] 366 } 367 } 368 369 // layoutParagraph shapes and wraps a paragraph using the provided parameters. 370 // It accepts the paragraph data in either string or rune format, preferring the 371 // string in order to hit the shaper cache more quickly. 372 func (l *Shaper) layoutParagraph(params Parameters, asStr string, asBytes []byte) document { 373 if l == nil { 374 return document{} 375 } 376 if len(asStr) == 0 && len(asBytes) > 0 { 377 asStr = string(asBytes) 378 } 379 // Alignment is not part of the cache key because changing it does not impact shaping. 380 lk := layoutKey{ 381 ppem: params.PxPerEm, 382 maxWidth: params.MaxWidth, 383 minWidth: params.MinWidth, 384 maxLines: params.MaxLines, 385 truncator: params.Truncator, 386 locale: params.Locale, 387 font: params.Font, 388 forceTruncate: params.forceTruncate, 389 wrapPolicy: params.WrapPolicy, 390 str: asStr, 391 lineHeight: params.LineHeight, 392 lineHeightScale: params.LineHeightScale, 393 } 394 if l, ok := l.layoutCache.Get(lk); ok { 395 return l 396 } 397 lines := l.shaper.LayoutRunes(params, []rune(asStr)) 398 l.layoutCache.Put(lk, lines) 399 return lines 400 } 401 402 // NextGlyph returns the next glyph from the most recent shaping operation, if 403 // any. If there are no more glyphs, ok will be false. 404 func (l *Shaper) NextGlyph() (_ Glyph, ok bool) { 405 l.init() 406 if l.done { 407 return Glyph{}, false 408 } 409 for { 410 if l.line == len(l.txt.lines) { 411 if l.brokeParagraph { 412 l.brokeParagraph = false 413 return l.pararagraphStart, true 414 } 415 if l.err == nil { 416 l.err = io.EOF 417 } 418 return Glyph{}, false 419 } 420 line := l.txt.lines[l.line] 421 if l.run == len(line.runs) { 422 l.line++ 423 l.run = 0 424 continue 425 } 426 run := line.runs[l.run] 427 align := l.txt.alignment.Align(line.direction, line.width, l.txt.alignWidth) 428 if l.line == 0 && l.run == 0 && len(run.Glyphs) == 0 { 429 // The very first run is empty, which will only happen when the 430 // entire text is a shaped empty string. Return a single synthetic 431 // glyph to provide ascent/descent information to the caller. 432 l.done = true 433 return Glyph{ 434 X: align, 435 Y: int32(line.yOffset), 436 Runes: 0, 437 Flags: FlagLineBreak | FlagClusterBreak | FlagRunBreak, 438 Ascent: line.ascent, 439 Descent: line.descent, 440 }, true 441 } 442 if l.glyph == len(run.Glyphs) { 443 l.run++ 444 l.glyph = 0 445 l.advance = 0 446 continue 447 } 448 glyphIdx := l.glyph 449 rtl := run.Direction.Progression() == system.TowardOrigin 450 if rtl { 451 // If RTL, traverse glyphs backwards to ensure rune order. 452 glyphIdx = len(run.Glyphs) - 1 - glyphIdx 453 } 454 g := run.Glyphs[glyphIdx] 455 if rtl { 456 // Modify the advance prior to computing runOffset to ensure that the 457 // current glyph's width is subtracted in RTL. 458 l.advance += g.xAdvance 459 } 460 // runOffset computes how far into the run the dot should be positioned. 461 runOffset := l.advance 462 if rtl { 463 runOffset = run.Advance - l.advance 464 } 465 glyph := Glyph{ 466 ID: g.id, 467 X: align + run.X + runOffset, 468 Y: int32(line.yOffset), 469 Ascent: line.ascent, 470 Descent: line.descent, 471 Advance: g.xAdvance, 472 Runes: uint16(g.runeCount), 473 Offset: fixed.Point26_6{ 474 X: g.xOffset, 475 Y: g.yOffset, 476 }, 477 Bounds: g.bounds, 478 } 479 if run.truncator { 480 glyph.Flags |= FlagTruncator 481 } 482 l.glyph++ 483 if !rtl { 484 l.advance += g.xAdvance 485 } 486 487 endOfRun := l.glyph == len(run.Glyphs) 488 if endOfRun { 489 glyph.Flags |= FlagRunBreak 490 } 491 endOfLine := endOfRun && l.run == len(line.runs)-1 492 if endOfLine { 493 glyph.Flags |= FlagLineBreak 494 } 495 endOfText := endOfLine && l.line == len(l.txt.lines)-1 496 nextGlyph := l.glyph 497 if rtl { 498 nextGlyph = len(run.Glyphs) - 1 - nextGlyph 499 } 500 endOfCluster := endOfRun || run.Glyphs[nextGlyph].clusterIndex != g.clusterIndex 501 if run.truncator { 502 // Only emit a single cluster for the entire truncator sequence. 503 endOfCluster = endOfRun 504 } 505 if endOfCluster { 506 glyph.Flags |= FlagClusterBreak 507 if run.truncator { 508 glyph.Runes += uint16(l.txt.unreadRuneCount) 509 } 510 } else { 511 glyph.Runes = 0 512 } 513 if run.Direction.Progression() == system.TowardOrigin { 514 glyph.Flags |= FlagTowardOrigin 515 } 516 if l.brokeParagraph { 517 glyph.Flags |= FlagParagraphStart 518 l.brokeParagraph = false 519 } 520 if g.glyphCount == 0 { 521 glyph.Flags |= FlagParagraphBreak 522 l.brokeParagraph = true 523 if endOfText { 524 l.pararagraphStart = Glyph{ 525 Ascent: glyph.Ascent, 526 Descent: glyph.Descent, 527 Flags: FlagParagraphStart | FlagLineBreak | FlagRunBreak | FlagClusterBreak, 528 } 529 // If a glyph is both a paragraph break and the final glyph, it's a newline 530 // at the end of the text. We must inform widgets like the text editor 531 // of a valid cursor position they can use for "after" such a newline, 532 // taking text alignment into account. 533 l.pararagraphStart.X = l.txt.alignment.Align(line.direction, 0, l.txt.alignWidth) 534 l.pararagraphStart.Y = glyph.Y + int32((glyph.Ascent + glyph.Descent).Ceil()) 535 } 536 } 537 return glyph, true 538 } 539 } 540 541 const ( 542 facebits = 16 543 sizebits = 16 544 gidbits = 64 - facebits - sizebits 545 ) 546 547 // newGlyphID encodes a face and a glyph id into a GlyphID. 548 func newGlyphID(ppem fixed.Int26_6, faceIdx int, gid font.GID) GlyphID { 549 if gid&^((1<<gidbits)-1) != 0 { 550 panic("glyph id out of bounds") 551 } 552 if faceIdx&^((1<<facebits)-1) != 0 { 553 panic("face index out of bounds") 554 } 555 if ppem&^((1<<sizebits)-1) != 0 { 556 panic("ppem out of bounds") 557 } 558 // Mask off the upper 16 bits of ppem. This still allows values up to 559 // 1023. 560 ppem &= ((1 << sizebits) - 1) 561 return GlyphID(faceIdx)<<(gidbits+sizebits) | GlyphID(ppem)<<(gidbits) | GlyphID(gid) 562 } 563 564 // splitGlyphID is the opposite of newGlyphID. 565 func splitGlyphID(g GlyphID) (fixed.Int26_6, int, font.GID) { 566 faceIdx := int(uint64(g) >> (gidbits + sizebits)) 567 ppem := fixed.Int26_6((g & ((1<<sizebits - 1) << gidbits)) >> gidbits) 568 gid := font.GID(g) & (1<<gidbits - 1) 569 return ppem, faceIdx, gid 570 } 571 572 // Shape converts the provided glyphs into a path. The path will enclose the forms 573 // of all vector glyphs. 574 // All glyphs are expected to be from a single line of text (their Y offsets are ignored). 575 func (l *Shaper) Shape(gs []Glyph) clip.PathSpec { 576 l.init() 577 key := l.pathCache.hashGlyphs(gs) 578 shape, ok := l.pathCache.Get(key, gs) 579 if ok { 580 return shape 581 } 582 pathOps := new(op.Ops) 583 shape = l.shaper.Shape(pathOps, gs) 584 l.pathCache.Put(key, gs, shape) 585 return shape 586 } 587 588 // Bitmaps extracts bitmap glyphs from the provided slice and creates an op.CallOp to present 589 // them. The returned op.CallOp will align correctly with the return value of Shape() for the 590 // same gs slice. 591 // All glyphs are expected to be from a single line of text (their Y offsets are ignored). 592 func (l *Shaper) Bitmaps(gs []Glyph) op.CallOp { 593 l.init() 594 key := l.bitmapShapeCache.hashGlyphs(gs) 595 call, ok := l.bitmapShapeCache.Get(key, gs) 596 if ok { 597 return call 598 } 599 callOps := new(op.Ops) 600 call = l.shaper.Bitmaps(callOps, gs) 601 l.bitmapShapeCache.Put(key, gs, call) 602 return call 603 }