github.com/Seikaijyu/gio@v0.0.1/gpu/compute.go (about) 1 // SPDX-License-Identifier: Unlicense OR MIT 2 3 package gpu 4 5 import ( 6 "bytes" 7 "encoding/binary" 8 "errors" 9 "fmt" 10 "hash/maphash" 11 "image" 12 "image/color" 13 "image/draw" 14 "image/png" 15 "math" 16 "math/bits" 17 "os" 18 "runtime" 19 "sort" 20 "time" 21 "unsafe" 22 23 "gioui.org/cpu" 24 "gioui.org/shader" 25 "gioui.org/shader/gio" 26 "gioui.org/shader/piet" 27 "github.com/Seikaijyu/gio/gpu/internal/driver" 28 "github.com/Seikaijyu/gio/internal/byteslice" 29 "github.com/Seikaijyu/gio/internal/f32" 30 "github.com/Seikaijyu/gio/internal/f32color" 31 "github.com/Seikaijyu/gio/internal/ops" 32 "github.com/Seikaijyu/gio/internal/scene" 33 "github.com/Seikaijyu/gio/layout" 34 "github.com/Seikaijyu/gio/op" 35 ) 36 37 type compute struct { 38 ctx driver.Device 39 40 collector collector 41 enc encoder 42 texOps []textureOp 43 viewport image.Point 44 maxTextureDim int 45 srgb bool 46 atlases []*textureAtlas 47 frameCount uint 48 moves []atlasMove 49 50 programs struct { 51 elements computeProgram 52 tileAlloc computeProgram 53 pathCoarse computeProgram 54 backdrop computeProgram 55 binning computeProgram 56 coarse computeProgram 57 kernel4 computeProgram 58 } 59 buffers struct { 60 config sizedBuffer 61 scene sizedBuffer 62 state sizedBuffer 63 memory sizedBuffer 64 } 65 output struct { 66 blitPipeline driver.Pipeline 67 68 buffer sizedBuffer 69 70 uniforms *copyUniforms 71 uniBuf driver.Buffer 72 73 layerVertices []layerVertex 74 descriptors *piet.Kernel4DescriptorSetLayout 75 76 nullMaterials driver.Texture 77 } 78 // imgAllocs maps imageOpData.handles to allocs. 79 imgAllocs map[interface{}]*atlasAlloc 80 // materials contains the pre-processed materials (transformed images for 81 // now, gradients etc. later) packed in a texture atlas. The atlas is used 82 // as source in kernel4. 83 materials struct { 84 // allocs maps texture ops the their atlases and FillImage offsets. 85 allocs map[textureKey]materialAlloc 86 87 pipeline driver.Pipeline 88 buffer sizedBuffer 89 quads []materialVertex 90 uniforms struct { 91 u *materialUniforms 92 buf driver.Buffer 93 } 94 } 95 timers struct { 96 profile string 97 t *timers 98 compact *timer 99 render *timer 100 blit *timer 101 } 102 103 // CPU fallback fields. 104 useCPU bool 105 dispatcher *dispatcher 106 107 // The following fields hold scratch space to avoid garbage. 108 zeroSlice []byte 109 memHeader *memoryHeader 110 conf *config 111 } 112 113 type materialAlloc struct { 114 alloc *atlasAlloc 115 offset image.Point 116 } 117 118 type layer struct { 119 rect image.Rectangle 120 alloc *atlasAlloc 121 ops []paintOp 122 materials *textureAtlas 123 } 124 125 type allocQuery struct { 126 atlas *textureAtlas 127 size image.Point 128 empty bool 129 format driver.TextureFormat 130 bindings driver.BufferBinding 131 nocompact bool 132 } 133 134 type atlasAlloc struct { 135 atlas *textureAtlas 136 rect image.Rectangle 137 cpu bool 138 dead bool 139 frameCount uint 140 } 141 142 type atlasMove struct { 143 src *textureAtlas 144 dstPos image.Point 145 srcRect image.Rectangle 146 cpu bool 147 } 148 149 type textureAtlas struct { 150 image driver.Texture 151 format driver.TextureFormat 152 bindings driver.BufferBinding 153 hasCPU bool 154 cpuImage cpu.ImageDescriptor 155 size image.Point 156 allocs []*atlasAlloc 157 packer packer 158 realized bool 159 lastFrame uint 160 compact bool 161 } 162 163 type copyUniforms struct { 164 scale [2]float32 165 pos [2]float32 166 uvScale [2]float32 167 _ [8]byte // Pad to 16 bytes. 168 } 169 170 type materialUniforms struct { 171 scale [2]float32 172 pos [2]float32 173 emulatesRGB float32 174 _ [12]byte // Pad to 16 bytes 175 } 176 177 type collector struct { 178 hasher maphash.Hash 179 profile bool 180 reader ops.Reader 181 states []f32.Affine2D 182 clear bool 183 clearColor f32color.RGBA 184 clipStates []clipState 185 order []hashIndex 186 transStack []transEntry 187 prevFrame opsCollector 188 frame opsCollector 189 } 190 191 type transEntry struct { 192 t f32.Affine2D 193 relTrans f32.Affine2D 194 } 195 196 type hashIndex struct { 197 index int 198 hash uint64 199 } 200 201 type opsCollector struct { 202 paths []byte 203 clipCmds []clipCmd 204 ops []paintOp 205 layers []layer 206 } 207 208 type paintOp struct { 209 clipStack []clipCmd 210 offset image.Point 211 state paintKey 212 intersect f32.Rectangle 213 hash uint64 214 layer int 215 texOpIdx int 216 } 217 218 // clipCmd describes a clipping command ready to be used for the compute 219 // pipeline. 220 type clipCmd struct { 221 // union of the bounds of the operations that are clipped. 222 union f32.Rectangle 223 state clipKey 224 path []byte 225 pathKey ops.Key 226 absBounds f32.Rectangle 227 } 228 229 type encoderState struct { 230 relTrans f32.Affine2D 231 clip *clipState 232 233 paintKey 234 } 235 236 // clipKey completely describes a clip operation (along with its path) and is appropriate 237 // for hashing and equality checks. 238 type clipKey struct { 239 bounds f32.Rectangle 240 strokeWidth float32 241 relTrans f32.Affine2D 242 pathHash uint64 243 } 244 245 // paintKey completely defines a paint operation. It is suitable for hashing and 246 // equality checks. 247 type paintKey struct { 248 t f32.Affine2D 249 matType materialType 250 // Current paint.ImageOp 251 image imageOpData 252 // Current paint.ColorOp, if any. 253 color color.NRGBA 254 255 // Current paint.LinearGradientOp. 256 stop1 f32.Point 257 stop2 f32.Point 258 color1 color.NRGBA 259 color2 color.NRGBA 260 } 261 262 type clipState struct { 263 absBounds f32.Rectangle 264 parent *clipState 265 path []byte 266 pathKey ops.Key 267 intersect f32.Rectangle 268 269 clipKey 270 } 271 272 type layerVertex struct { 273 posX, posY float32 274 u, v float32 275 } 276 277 // materialVertex describes a vertex of a quad used to render a transformed 278 // material. 279 type materialVertex struct { 280 posX, posY float32 281 u, v float32 282 } 283 284 // textureKey identifies textureOp. 285 type textureKey struct { 286 handle interface{} 287 transform f32.Affine2D 288 bounds image.Rectangle 289 } 290 291 // textureOp represents an paintOp that requires texture space. 292 type textureOp struct { 293 img imageOpData 294 key textureKey 295 // offset is the integer offset separated from key.transform to increase cache hit rate. 296 off image.Point 297 // matAlloc is the atlas placement for material. 298 matAlloc materialAlloc 299 // imgAlloc is the atlas placement for the source image 300 imgAlloc *atlasAlloc 301 } 302 303 type encoder struct { 304 scene []scene.Command 305 npath int 306 npathseg int 307 ntrans int 308 } 309 310 // sizedBuffer holds a GPU buffer, or its equivalent CPU memory. 311 type sizedBuffer struct { 312 size int 313 buffer driver.Buffer 314 // cpuBuf is initialized when useCPU is true. 315 cpuBuf cpu.BufferDescriptor 316 } 317 318 // computeProgram holds a compute program, or its equivalent CPU implementation. 319 type computeProgram struct { 320 prog driver.Program 321 322 // CPU fields. 323 progInfo *cpu.ProgramInfo 324 descriptors unsafe.Pointer 325 buffers []*cpu.BufferDescriptor 326 } 327 328 // config matches Config in setup.h 329 type config struct { 330 n_elements uint32 // paths 331 n_pathseg uint32 332 width_in_tiles uint32 333 height_in_tiles uint32 334 tile_alloc memAlloc 335 bin_alloc memAlloc 336 ptcl_alloc memAlloc 337 pathseg_alloc memAlloc 338 anno_alloc memAlloc 339 trans_alloc memAlloc 340 } 341 342 // memAlloc matches Alloc in mem.h 343 type memAlloc struct { 344 offset uint32 345 //size uint32 346 } 347 348 // memoryHeader matches the header of Memory in mem.h. 349 type memoryHeader struct { 350 mem_offset uint32 351 mem_error uint32 352 } 353 354 // rect is a oriented rectangle. 355 type rectangle [4]f32.Point 356 357 const ( 358 layersBindings = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture 359 materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead 360 // Materials and layers can share texture storage if their bindings match. 361 combinedBindings = layersBindings | materialsBindings 362 ) 363 364 // GPU structure sizes and constants. 365 const ( 366 tileWidthPx = 32 367 tileHeightPx = 32 368 ptclInitialAlloc = 1024 369 kernel4OutputUnit = 2 370 kernel4AtlasUnit = 3 371 372 pathSize = 12 373 binSize = 8 374 pathsegSize = 52 375 annoSize = 32 376 transSize = 24 377 stateSize = 60 378 stateStride = 4 + 2*stateSize 379 ) 380 381 // mem.h constants. 382 const ( 383 memNoError = 0 // NO_ERROR 384 memMallocFailed = 1 // ERR_MALLOC_FAILED 385 ) 386 387 func newCompute(ctx driver.Device) (*compute, error) { 388 caps := ctx.Caps() 389 maxDim := caps.MaxTextureSize 390 // Large atlas textures cause artifacts due to precision loss in 391 // shaders. 392 if cap := 8192; maxDim > cap { 393 maxDim = cap 394 } 395 // The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the 396 // complexity of a rectangular limit for later. 397 if computeCap := 4096; maxDim > computeCap { 398 maxDim = computeCap 399 } 400 g := &compute{ 401 ctx: ctx, 402 maxTextureDim: maxDim, 403 srgb: caps.Features.Has(driver.FeatureSRGB), 404 conf: new(config), 405 memHeader: new(memoryHeader), 406 } 407 shaders := []struct { 408 prog *computeProgram 409 src shader.Sources 410 info *cpu.ProgramInfo 411 }{ 412 {&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo}, 413 {&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo}, 414 {&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo}, 415 {&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo}, 416 {&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo}, 417 {&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo}, 418 {&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo}, 419 } 420 if !caps.Features.Has(driver.FeatureCompute) { 421 if !cpu.Supported { 422 return nil, errors.New("gpu: missing support for compute programs") 423 } 424 g.useCPU = true 425 } 426 if g.useCPU { 427 g.dispatcher = newDispatcher(runtime.NumCPU()) 428 } else { 429 null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead) 430 if err != nil { 431 g.Release() 432 return nil, err 433 } 434 g.output.nullMaterials = null 435 } 436 437 copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag) 438 if err != nil { 439 g.Release() 440 return nil, err 441 } 442 defer copyVert.Release() 443 defer copyFrag.Release() 444 pipe, err := ctx.NewPipeline(driver.PipelineDesc{ 445 VertexShader: copyVert, 446 FragmentShader: copyFrag, 447 VertexLayout: driver.VertexLayout{ 448 Inputs: []driver.InputDesc{ 449 {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, 450 {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, 451 }, 452 Stride: int(unsafe.Sizeof(g.output.layerVertices[0])), 453 }, 454 PixelFormat: driver.TextureFormatOutput, 455 BlendDesc: driver.BlendDesc{ 456 Enable: true, 457 SrcFactor: driver.BlendFactorOne, 458 DstFactor: driver.BlendFactorOneMinusSrcAlpha, 459 }, 460 Topology: driver.TopologyTriangles, 461 }) 462 if err != nil { 463 g.Release() 464 return nil, err 465 } 466 g.output.blitPipeline = pipe 467 g.output.uniforms = new(copyUniforms) 468 469 buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms))) 470 if err != nil { 471 g.Release() 472 return nil, err 473 } 474 g.output.uniBuf = buf 475 476 materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag) 477 if err != nil { 478 g.Release() 479 return nil, err 480 } 481 defer materialVert.Release() 482 defer materialFrag.Release() 483 pipe, err = ctx.NewPipeline(driver.PipelineDesc{ 484 VertexShader: materialVert, 485 FragmentShader: materialFrag, 486 VertexLayout: driver.VertexLayout{ 487 Inputs: []driver.InputDesc{ 488 {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, 489 {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, 490 }, 491 Stride: int(unsafe.Sizeof(g.materials.quads[0])), 492 }, 493 PixelFormat: driver.TextureFormatRGBA8, 494 Topology: driver.TopologyTriangles, 495 }) 496 if err != nil { 497 g.Release() 498 return nil, err 499 } 500 g.materials.pipeline = pipe 501 g.materials.uniforms.u = new(materialUniforms) 502 503 buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u))) 504 if err != nil { 505 g.Release() 506 return nil, err 507 } 508 g.materials.uniforms.buf = buf 509 510 for _, shader := range shaders { 511 if !g.useCPU { 512 p, err := ctx.NewComputeProgram(shader.src) 513 if err != nil { 514 g.Release() 515 return nil, err 516 } 517 shader.prog.prog = p 518 } else { 519 shader.prog.progInfo = shader.info 520 } 521 } 522 if g.useCPU { 523 { 524 desc := new(piet.ElementsDescriptorSetLayout) 525 g.programs.elements.descriptors = unsafe.Pointer(desc) 526 g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()} 527 } 528 { 529 desc := new(piet.Tile_allocDescriptorSetLayout) 530 g.programs.tileAlloc.descriptors = unsafe.Pointer(desc) 531 g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 532 } 533 { 534 desc := new(piet.Path_coarseDescriptorSetLayout) 535 g.programs.pathCoarse.descriptors = unsafe.Pointer(desc) 536 g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 537 } 538 { 539 desc := new(piet.BackdropDescriptorSetLayout) 540 g.programs.backdrop.descriptors = unsafe.Pointer(desc) 541 g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 542 } 543 { 544 desc := new(piet.BinningDescriptorSetLayout) 545 g.programs.binning.descriptors = unsafe.Pointer(desc) 546 g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 547 } 548 { 549 desc := new(piet.CoarseDescriptorSetLayout) 550 g.programs.coarse.descriptors = unsafe.Pointer(desc) 551 g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 552 } 553 { 554 desc := new(piet.Kernel4DescriptorSetLayout) 555 g.programs.kernel4.descriptors = unsafe.Pointer(desc) 556 g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 557 g.output.descriptors = desc 558 } 559 } 560 return g, nil 561 } 562 563 func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { 564 vert, err = ctx.NewVertexShader(vsrc) 565 if err != nil { 566 return 567 } 568 frag, err = ctx.NewFragmentShader(fsrc) 569 if err != nil { 570 vert.Release() 571 } 572 return 573 } 574 575 func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error { 576 g.frameCount++ 577 g.collect(viewport, frameOps) 578 return g.frame(target) 579 } 580 581 func (g *compute) collect(viewport image.Point, ops *op.Ops) { 582 g.viewport = viewport 583 g.collector.reset() 584 585 g.texOps = g.texOps[:0] 586 g.collector.collect(ops, viewport, &g.texOps) 587 } 588 589 func (g *compute) Clear(col color.NRGBA) { 590 g.collector.clear = true 591 g.collector.clearColor = f32color.LinearFromSRGB(col) 592 } 593 594 func (g *compute) frame(target RenderTarget) error { 595 viewport := g.viewport 596 defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport) 597 defer g.ctx.EndFrame() 598 599 t := &g.timers 600 if g.collector.profile && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) { 601 t.t = newTimers(g.ctx) 602 t.compact = t.t.newTimer() 603 t.render = t.t.newTimer() 604 t.blit = t.t.newTimer() 605 } 606 607 if err := g.uploadImages(); err != nil { 608 return err 609 } 610 if err := g.renderMaterials(); err != nil { 611 return err 612 } 613 g.layer(viewport, g.texOps) 614 t.render.begin() 615 if err := g.renderLayers(viewport); err != nil { 616 return err 617 } 618 t.render.end() 619 d := driver.LoadDesc{ 620 ClearColor: g.collector.clearColor, 621 } 622 if g.collector.clear { 623 g.collector.clear = false 624 d.Action = driver.LoadActionClear 625 } 626 t.blit.begin() 627 g.blitLayers(d, defFBO, viewport) 628 t.blit.end() 629 t.compact.begin() 630 if err := g.compactAllocs(); err != nil { 631 return err 632 } 633 t.compact.end() 634 if g.collector.profile && t.t.ready() { 635 com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed 636 ft := com + ren + blit 637 q := 100 * time.Microsecond 638 ft = ft.Round(q) 639 com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q) 640 t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit) 641 } 642 return nil 643 } 644 645 func (g *compute) dumpAtlases() { 646 for i, a := range g.atlases { 647 dump := image.NewRGBA(image.Rectangle{Max: a.size}) 648 err := driver.DownloadImage(g.ctx, a.image, dump) 649 if err != nil { 650 panic(err) 651 } 652 nrgba := image.NewNRGBA(dump.Bounds()) 653 draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src) 654 var buf bytes.Buffer 655 if err := png.Encode(&buf, nrgba); err != nil { 656 panic(err) 657 } 658 if err := os.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil { 659 panic(err) 660 } 661 } 662 } 663 664 func (g *compute) Profile() string { 665 return g.timers.profile 666 } 667 668 func (g *compute) compactAllocs() error { 669 const ( 670 maxAllocAge = 3 671 maxAtlasAge = 10 672 ) 673 atlases := g.atlases 674 for _, a := range atlases { 675 if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge { 676 a.compact = true 677 } 678 } 679 for len(atlases) > 0 { 680 var ( 681 dstAtlas *textureAtlas 682 format driver.TextureFormat 683 bindings driver.BufferBinding 684 ) 685 g.moves = g.moves[:0] 686 addedLayers := false 687 useCPU := false 688 fill: 689 for len(atlases) > 0 { 690 srcAtlas := atlases[0] 691 allocs := srcAtlas.allocs 692 if !srcAtlas.compact { 693 atlases = atlases[1:] 694 continue 695 } 696 if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) { 697 break 698 } 699 format = srcAtlas.format 700 bindings = srcAtlas.bindings 701 for len(srcAtlas.allocs) > 0 { 702 a := srcAtlas.allocs[0] 703 n := len(srcAtlas.allocs) 704 if g.frameCount-a.frameCount > maxAllocAge { 705 a.dead = true 706 srcAtlas.allocs[0] = srcAtlas.allocs[n-1] 707 srcAtlas.allocs = srcAtlas.allocs[:n-1] 708 continue 709 } 710 size := a.rect.Size() 711 alloc, fits := g.atlasAlloc(allocQuery{ 712 atlas: dstAtlas, 713 size: size, 714 format: format, 715 bindings: bindings, 716 nocompact: true, 717 }) 718 if !fits { 719 break fill 720 } 721 dstAtlas = alloc.atlas 722 allocs = append(allocs, a) 723 addedLayers = true 724 useCPU = useCPU || a.cpu 725 dstAtlas.allocs = append(dstAtlas.allocs, a) 726 pos := alloc.rect.Min 727 g.moves = append(g.moves, atlasMove{ 728 src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu, 729 }) 730 a.atlas = dstAtlas 731 a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())} 732 srcAtlas.allocs[0] = srcAtlas.allocs[n-1] 733 srcAtlas.allocs = srcAtlas.allocs[:n-1] 734 } 735 srcAtlas.compact = false 736 srcAtlas.realized = false 737 srcAtlas.packer.clear() 738 srcAtlas.packer.newPage() 739 srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) 740 atlases = atlases[1:] 741 } 742 if !addedLayers { 743 break 744 } 745 outputSize := dstAtlas.packer.sizes[0] 746 if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil { 747 return err 748 } 749 for _, move := range g.moves { 750 if !move.cpu { 751 g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect) 752 } else { 753 src := move.src.cpuImage.Data() 754 dst := dstAtlas.cpuImage.Data() 755 sstride := move.src.size.X * 4 756 dstride := dstAtlas.size.X * 4 757 copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect) 758 } 759 } 760 } 761 for i := len(g.atlases) - 1; i >= 0; i-- { 762 a := g.atlases[i] 763 if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge { 764 a.Release() 765 n := len(g.atlases) 766 g.atlases[i] = g.atlases[n-1] 767 g.atlases = g.atlases[:n-1] 768 } 769 } 770 return nil 771 } 772 773 func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) { 774 sz := srcRect.Size() 775 soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4 776 doff := dstPos.Y*dstStride + dstPos.X*4 777 rowLen := sz.X * 4 778 for y := 0; y < sz.Y; y++ { 779 srow := src[soff : soff+rowLen] 780 drow := dst[doff : doff+rowLen] 781 copy(drow, srow) 782 soff += srcStride 783 doff += dstStride 784 } 785 } 786 787 func (g *compute) renderLayers(viewport image.Point) error { 788 layers := g.collector.frame.layers 789 for len(layers) > 0 { 790 var materials, dst *textureAtlas 791 addedLayers := false 792 g.enc.reset() 793 for len(layers) > 0 { 794 l := &layers[0] 795 if l.alloc != nil { 796 layers = layers[1:] 797 continue 798 } 799 if materials != nil { 800 if l.materials != nil && materials != l.materials { 801 // Only one materials texture per compute pass. 802 break 803 } 804 } else { 805 materials = l.materials 806 } 807 size := l.rect.Size() 808 alloc, fits := g.atlasAlloc(allocQuery{ 809 atlas: dst, 810 empty: true, 811 format: driver.TextureFormatRGBA8, 812 bindings: combinedBindings, 813 // Pad to avoid overlap. 814 size: size.Add(image.Pt(1, 1)), 815 }) 816 if !fits { 817 // Only one output atlas per compute pass. 818 break 819 } 820 dst = alloc.atlas 821 dst.compact = true 822 addedLayers = true 823 l.alloc = &alloc 824 dst.allocs = append(dst.allocs, l.alloc) 825 encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps) 826 layers = layers[1:] 827 } 828 if !addedLayers { 829 break 830 } 831 outputSize := dst.packer.sizes[0] 832 tileDims := image.Point{ 833 X: (outputSize.X + tileWidthPx - 1) / tileWidthPx, 834 Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx, 835 } 836 w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx 837 if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil { 838 return err 839 } 840 if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil { 841 return err 842 } 843 } 844 return nil 845 } 846 847 func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) { 848 layers := g.collector.frame.layers 849 g.output.layerVertices = g.output.layerVertices[:0] 850 for _, l := range layers { 851 placef := layout.FPt(l.alloc.rect.Min) 852 sizef := layout.FPt(l.rect.Size()) 853 r := f32.FRect(l.rect) 854 quad := [4]layerVertex{ 855 {posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y}, 856 {posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y}, 857 {posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y}, 858 {posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y}, 859 } 860 g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1]) 861 g.ctx.PrepareTexture(l.alloc.atlas.image) 862 } 863 if len(g.output.layerVertices) > 0 { 864 vertexData := byteslice.Slice(g.output.layerVertices) 865 g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData)) 866 g.output.buffer.buffer.Upload(vertexData) 867 } 868 g.ctx.BeginRenderPass(fbo, d) 869 defer g.ctx.EndRenderPass() 870 if len(layers) == 0 { 871 return 872 } 873 g.ctx.Viewport(0, 0, viewport.X, viewport.Y) 874 g.ctx.BindPipeline(g.output.blitPipeline) 875 g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0) 876 start := 0 877 for len(layers) > 0 { 878 count := 0 879 atlas := layers[0].alloc.atlas 880 for len(layers) > 0 { 881 l := layers[0] 882 if l.alloc.atlas != atlas { 883 break 884 } 885 layers = layers[1:] 886 const verticesPerQuad = 6 887 count += verticesPerQuad 888 } 889 890 // Transform positions to clip space: [-1, -1] - [1, 1], and texture 891 // coordinates to texture space: [0, 0] - [1, 1]. 892 clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1)) 893 sx, _, ox, _, sy, oy := clip.Elems() 894 g.output.uniforms.scale = [2]float32{sx, sy} 895 g.output.uniforms.pos = [2]float32{ox, oy} 896 g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)} 897 g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms)) 898 g.ctx.BindUniforms(g.output.uniBuf) 899 g.ctx.BindTexture(0, atlas.image) 900 g.ctx.DrawArrays(start, count) 901 start += count 902 } 903 } 904 905 func (g *compute) renderMaterials() error { 906 m := &g.materials 907 for k, place := range m.allocs { 908 if place.alloc.dead { 909 delete(m.allocs, k) 910 } 911 } 912 texOps := g.texOps 913 for len(texOps) > 0 { 914 m.quads = m.quads[:0] 915 var ( 916 atlas *textureAtlas 917 imgAtlas *textureAtlas 918 ) 919 // A material is clipped to avoid drawing outside its atlas bounds. 920 // However, imprecision in the clipping may cause a single pixel 921 // overflow. 922 var padding = image.Pt(1, 1) 923 var allocStart int 924 for len(texOps) > 0 { 925 op := &texOps[0] 926 if a, exists := m.allocs[op.key]; exists { 927 g.touchAlloc(a.alloc) 928 op.matAlloc = a 929 texOps = texOps[1:] 930 continue 931 } 932 933 if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas { 934 // Only one image atlas per render pass. 935 break 936 } 937 imgAtlas = op.imgAlloc.atlas 938 quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min) 939 boundsf := quadBounds(quad) 940 bounds := boundsf.Round() 941 bounds = bounds.Intersect(op.key.bounds) 942 943 size := bounds.Size() 944 alloc, fits := g.atlasAlloc(allocQuery{ 945 atlas: atlas, 946 size: size.Add(padding), 947 format: driver.TextureFormatRGBA8, 948 bindings: combinedBindings, 949 }) 950 if !fits { 951 break 952 } 953 if atlas == nil { 954 allocStart = len(alloc.atlas.allocs) 955 } 956 atlas = alloc.atlas 957 alloc.cpu = g.useCPU 958 offsetf := layout.FPt(bounds.Min.Mul(-1)) 959 scale := f32.Pt(float32(size.X), float32(size.Y)) 960 for i := range quad { 961 // Position quad to match place. 962 quad[i].posX += offsetf.X 963 quad[i].posY += offsetf.Y 964 // Scale to match viewport [0, 1]. 965 quad[i].posX /= scale.X 966 quad[i].posY /= scale.Y 967 } 968 // Draw quad as two triangles. 969 m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2]) 970 if m.allocs == nil { 971 m.allocs = make(map[textureKey]materialAlloc) 972 } 973 atlasAlloc := materialAlloc{ 974 alloc: &alloc, 975 offset: bounds.Min.Mul(-1), 976 } 977 atlas.allocs = append(atlas.allocs, atlasAlloc.alloc) 978 m.allocs[op.key] = atlasAlloc 979 op.matAlloc = atlasAlloc 980 texOps = texOps[1:] 981 } 982 if len(m.quads) == 0 { 983 break 984 } 985 realized := atlas.realized 986 if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil { 987 return err 988 } 989 // Transform to clip space: [-1, -1] - [1, 1]. 990 *m.uniforms.u = materialUniforms{ 991 scale: [2]float32{2, 2}, 992 pos: [2]float32{-1, -1}, 993 } 994 if !g.srgb { 995 m.uniforms.u.emulatesRGB = 1.0 996 } 997 m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u)) 998 vertexData := byteslice.Slice(m.quads) 999 n := pow2Ceil(len(vertexData)) 1000 m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n) 1001 m.buffer.buffer.Upload(vertexData) 1002 var d driver.LoadDesc 1003 if !realized { 1004 d.Action = driver.LoadActionClear 1005 } 1006 g.ctx.PrepareTexture(imgAtlas.image) 1007 g.ctx.BeginRenderPass(atlas.image, d) 1008 g.ctx.BindTexture(0, imgAtlas.image) 1009 g.ctx.BindPipeline(m.pipeline) 1010 g.ctx.BindUniforms(m.uniforms.buf) 1011 g.ctx.BindVertexBuffer(m.buffer.buffer, 0) 1012 newAllocs := atlas.allocs[allocStart:] 1013 for i, a := range newAllocs { 1014 sz := a.rect.Size().Sub(padding) 1015 g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y) 1016 g.ctx.DrawArrays(i*6, 6) 1017 } 1018 g.ctx.EndRenderPass() 1019 if !g.useCPU { 1020 continue 1021 } 1022 src := atlas.image 1023 data := atlas.cpuImage.Data() 1024 for _, a := range newAllocs { 1025 stride := atlas.size.X * 4 1026 col := a.rect.Min.X * 4 1027 row := stride * a.rect.Min.Y 1028 off := col + row 1029 src.ReadPixels(a.rect, data[off:], stride) 1030 } 1031 } 1032 return nil 1033 } 1034 1035 func (g *compute) uploadImages() error { 1036 for k, a := range g.imgAllocs { 1037 if a.dead { 1038 delete(g.imgAllocs, k) 1039 } 1040 } 1041 type upload struct { 1042 pos image.Point 1043 img *image.RGBA 1044 } 1045 var uploads []upload 1046 format := driver.TextureFormatSRGBA 1047 if !g.srgb { 1048 format = driver.TextureFormatRGBA8 1049 } 1050 // padding is the number of pixels added to the right and below 1051 // images, to avoid atlas filtering artifacts. 1052 const padding = 1 1053 texOps := g.texOps 1054 for len(texOps) > 0 { 1055 uploads = uploads[:0] 1056 var atlas *textureAtlas 1057 for len(texOps) > 0 { 1058 op := &texOps[0] 1059 if a, exists := g.imgAllocs[op.img.handle]; exists { 1060 g.touchAlloc(a) 1061 op.imgAlloc = a 1062 texOps = texOps[1:] 1063 continue 1064 } 1065 size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding)) 1066 alloc, fits := g.atlasAlloc(allocQuery{ 1067 atlas: atlas, 1068 size: size, 1069 format: format, 1070 bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer, 1071 }) 1072 if !fits { 1073 break 1074 } 1075 atlas = alloc.atlas 1076 if g.imgAllocs == nil { 1077 g.imgAllocs = make(map[interface{}]*atlasAlloc) 1078 } 1079 op.imgAlloc = &alloc 1080 atlas.allocs = append(atlas.allocs, op.imgAlloc) 1081 g.imgAllocs[op.img.handle] = op.imgAlloc 1082 uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src}) 1083 texOps = texOps[1:] 1084 } 1085 if len(uploads) == 0 { 1086 break 1087 } 1088 if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil { 1089 return err 1090 } 1091 for _, u := range uploads { 1092 size := u.img.Bounds().Size() 1093 driver.UploadImage(atlas.image, u.pos, u.img) 1094 rightPadding := image.Pt(padding, size.Y) 1095 atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0) 1096 bottomPadding := image.Pt(size.X, padding) 1097 atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0) 1098 } 1099 } 1100 return nil 1101 } 1102 1103 func pow2Ceil(v int) int { 1104 exp := bits.Len(uint(v)) 1105 if bits.OnesCount(uint(v)) == 1 { 1106 exp-- 1107 } 1108 return 1 << exp 1109 } 1110 1111 // materialQuad constructs a quad that represents the transformed image. It returns the quad 1112 // and its bounds. 1113 func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex { 1114 imgSize := layout.FPt(img.src.Bounds().Size()) 1115 sx, hx, ox, hy, sy, oy := M.Elems() 1116 transOff := f32.Pt(ox, oy) 1117 // The 4 corners of the image rectangle transformed by M, excluding its offset, are: 1118 // 1119 // q0: M * (0, 0) q3: M * (w, 0) 1120 // q1: M * (0, h) q2: M * (w, h) 1121 // 1122 // Note that q0 = M*0 = 0, q2 = q1 + q3. 1123 q0 := f32.Pt(0, 0) 1124 q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y) 1125 q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X) 1126 q2 := q1.Add(q3) 1127 q0 = q0.Add(transOff) 1128 q1 = q1.Add(transOff) 1129 q2 = q2.Add(transOff) 1130 q3 = q3.Add(transOff) 1131 1132 uvPosf := layout.FPt(uvPos) 1133 atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y)) 1134 uvBounds := f32.Rectangle{ 1135 Min: uvPosf, 1136 Max: uvPosf.Add(imgSize), 1137 } 1138 uvBounds.Min.X *= atlasScale.X 1139 uvBounds.Min.Y *= atlasScale.Y 1140 uvBounds.Max.X *= atlasScale.X 1141 uvBounds.Max.Y *= atlasScale.Y 1142 quad := [4]materialVertex{ 1143 {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y}, 1144 {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y}, 1145 {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y}, 1146 {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y}, 1147 } 1148 return quad 1149 } 1150 1151 func quadBounds(q [4]materialVertex) f32.Rectangle { 1152 q0 := f32.Pt(q[0].posX, q[0].posY) 1153 q1 := f32.Pt(q[1].posX, q[1].posY) 1154 q2 := f32.Pt(q[2].posX, q[2].posY) 1155 q3 := f32.Pt(q[3].posX, q[3].posY) 1156 return f32.Rectangle{ 1157 Min: min(min(q0, q1), min(q2, q3)), 1158 Max: max(max(q0, q1), max(q2, q3)), 1159 } 1160 } 1161 1162 func max(p1, p2 f32.Point) f32.Point { 1163 p := p1 1164 if p2.X > p.X { 1165 p.X = p2.X 1166 } 1167 if p2.Y > p.Y { 1168 p.Y = p2.Y 1169 } 1170 return p 1171 } 1172 1173 func min(p1, p2 f32.Point) f32.Point { 1174 p := p1 1175 if p2.X < p.X { 1176 p.X = p2.X 1177 } 1178 if p2.Y < p.Y { 1179 p.Y = p2.Y 1180 } 1181 return p 1182 } 1183 1184 func (enc *encoder) encodePath(verts []byte, fillMode int) { 1185 for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] { 1186 cmd := ops.DecodeCommand(verts[4:]) 1187 if cmd.Op() == scene.OpGap { 1188 if fillMode != scene.FillModeNonzero { 1189 // Skip gaps in strokes. 1190 continue 1191 } 1192 // Replace them by a straight line in outlines. 1193 cmd = scene.Line(scene.DecodeGap(cmd)) 1194 } 1195 enc.scene = append(enc.scene, cmd) 1196 enc.npathseg++ 1197 } 1198 } 1199 1200 func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error { 1201 const ( 1202 // wgSize is the largest and most common workgroup size. 1203 wgSize = 128 1204 // PARTITION_SIZE from elements.comp 1205 partitionSize = 32 * 4 1206 ) 1207 widthInBins := (tileDims.X + 15) / 16 1208 heightInBins := (tileDims.Y + 7) / 8 1209 if widthInBins*heightInBins > wgSize { 1210 return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx) 1211 } 1212 1213 enc := &g.enc 1214 // Pad scene with zeroes to avoid reading garbage in elements.comp. 1215 scenePadding := partitionSize - len(enc.scene)%partitionSize 1216 enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...) 1217 1218 scene := byteslice.Slice(enc.scene) 1219 if s := len(scene); s > g.buffers.scene.size { 1220 paddedCap := s * 11 / 10 1221 if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil { 1222 return err 1223 } 1224 } 1225 g.buffers.scene.upload(scene) 1226 1227 // alloc is the number of allocated bytes for static buffers. 1228 var alloc uint32 1229 round := func(v, quantum int) int { 1230 return (v + quantum - 1) &^ (quantum - 1) 1231 } 1232 malloc := func(size int) memAlloc { 1233 size = round(size, 4) 1234 offset := alloc 1235 alloc += uint32(size) 1236 return memAlloc{offset /*, uint32(size)*/} 1237 } 1238 1239 *g.conf = config{ 1240 n_elements: uint32(enc.npath), 1241 n_pathseg: uint32(enc.npathseg), 1242 width_in_tiles: uint32(tileDims.X), 1243 height_in_tiles: uint32(tileDims.Y), 1244 tile_alloc: malloc(enc.npath * pathSize), 1245 bin_alloc: malloc(round(enc.npath, wgSize) * binSize), 1246 ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc), 1247 pathseg_alloc: malloc(enc.npathseg * pathsegSize), 1248 anno_alloc: malloc(enc.npath * annoSize), 1249 trans_alloc: malloc(enc.ntrans * transSize), 1250 } 1251 1252 numPartitions := (enc.numElements() + 127) / 128 1253 // clearSize is the atomic partition counter plus flag and 2 states per partition. 1254 clearSize := 4 + numPartitions*stateStride 1255 if clearSize > g.buffers.state.size { 1256 paddedCap := clearSize * 11 / 10 1257 if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil { 1258 return err 1259 } 1260 } 1261 1262 confData := byteslice.Struct(g.conf) 1263 g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData)) 1264 g.buffers.config.upload(confData) 1265 1266 minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) 1267 if minSize > g.buffers.memory.size { 1268 // Add space for dynamic GPU allocations. 1269 const sizeBump = 4 * 1024 * 1024 1270 minSize += sizeBump 1271 if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil { 1272 return err 1273 } 1274 } 1275 1276 for { 1277 *g.memHeader = memoryHeader{ 1278 mem_offset: alloc, 1279 } 1280 g.buffers.memory.upload(byteslice.Struct(g.memHeader)) 1281 g.buffers.state.upload(g.zeros(clearSize)) 1282 1283 if !g.useCPU { 1284 g.ctx.BeginCompute() 1285 g.ctx.BindImageTexture(kernel4OutputUnit, dst) 1286 img := g.output.nullMaterials 1287 if images != nil { 1288 img = images.image 1289 } 1290 g.ctx.BindImageTexture(kernel4AtlasUnit, img) 1291 } else { 1292 *g.output.descriptors.Binding2() = cpuDst 1293 if images != nil { 1294 *g.output.descriptors.Binding3() = images.cpuImage 1295 } 1296 } 1297 1298 g.bindBuffers() 1299 g.memoryBarrier() 1300 g.dispatch(g.programs.elements, numPartitions, 1, 1) 1301 g.memoryBarrier() 1302 g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1) 1303 g.memoryBarrier() 1304 g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1) 1305 g.memoryBarrier() 1306 g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1) 1307 // No barrier needed between backdrop and binning. 1308 g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1) 1309 g.memoryBarrier() 1310 g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1) 1311 g.memoryBarrier() 1312 g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1) 1313 g.memoryBarrier() 1314 if !g.useCPU { 1315 g.ctx.EndCompute() 1316 } else { 1317 g.dispatcher.Sync() 1318 } 1319 1320 if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil { 1321 if err == driver.ErrContentLost { 1322 continue 1323 } 1324 return err 1325 } 1326 switch errCode := g.memHeader.mem_error; errCode { 1327 case memNoError: 1328 if g.useCPU { 1329 w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx 1330 dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride) 1331 } 1332 return nil 1333 case memMallocFailed: 1334 // Resize memory and try again. 1335 sz := g.buffers.memory.size * 15 / 10 1336 if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil { 1337 return err 1338 } 1339 continue 1340 default: 1341 return fmt.Errorf("compute: shader program failed with error %d", errCode) 1342 } 1343 } 1344 } 1345 1346 func (g *compute) memoryBarrier() { 1347 if g.useCPU { 1348 g.dispatcher.Barrier() 1349 } 1350 } 1351 1352 func (g *compute) dispatch(p computeProgram, x, y, z int) { 1353 if !g.useCPU { 1354 g.ctx.BindProgram(p.prog) 1355 g.ctx.DispatchCompute(x, y, z) 1356 } else { 1357 g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z) 1358 } 1359 } 1360 1361 // zeros returns a byte slice with size bytes of zeros. 1362 func (g *compute) zeros(size int) []byte { 1363 if cap(g.zeroSlice) < size { 1364 g.zeroSlice = append(g.zeroSlice, make([]byte, size)...) 1365 } 1366 return g.zeroSlice[:size] 1367 } 1368 1369 func (g *compute) touchAlloc(a *atlasAlloc) { 1370 if a.dead { 1371 panic("re-use of dead allocation") 1372 } 1373 a.frameCount = g.frameCount 1374 a.atlas.lastFrame = a.frameCount 1375 } 1376 1377 func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) { 1378 var ( 1379 place placement 1380 fits bool 1381 atlas = q.atlas 1382 ) 1383 if atlas != nil { 1384 place, fits = atlas.packer.tryAdd(q.size) 1385 if !fits { 1386 atlas.compact = true 1387 } 1388 } 1389 if atlas == nil { 1390 // Look for matching atlas to re-use. 1391 for _, a := range g.atlases { 1392 if q.empty && len(a.allocs) > 0 { 1393 continue 1394 } 1395 if q.nocompact && a.compact { 1396 continue 1397 } 1398 if a.format != q.format || a.bindings&q.bindings != q.bindings { 1399 continue 1400 } 1401 place, fits = a.packer.tryAdd(q.size) 1402 if !fits { 1403 a.compact = true 1404 continue 1405 } 1406 atlas = a 1407 break 1408 } 1409 } 1410 if atlas == nil { 1411 atlas = &textureAtlas{ 1412 format: q.format, 1413 bindings: q.bindings, 1414 } 1415 atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) 1416 atlas.packer.newPage() 1417 g.atlases = append(g.atlases, atlas) 1418 place, fits = atlas.packer.tryAdd(q.size) 1419 if !fits { 1420 panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size)) 1421 } 1422 } 1423 if !fits { 1424 return atlasAlloc{}, false 1425 } 1426 atlas.lastFrame = g.frameCount 1427 return atlasAlloc{ 1428 frameCount: g.frameCount, 1429 atlas: atlas, 1430 rect: image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)}, 1431 }, true 1432 } 1433 1434 func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error { 1435 defer func() { 1436 atlas.packer.maxDims = atlas.size 1437 atlas.realized = true 1438 atlas.ensureCPUImage(useCPU) 1439 }() 1440 if atlas.size.X >= size.X && atlas.size.Y >= size.Y { 1441 return nil 1442 } 1443 if atlas.realized { 1444 panic("resizing a realized atlas") 1445 } 1446 if err := atlas.resize(g.ctx, size); err != nil { 1447 return err 1448 } 1449 return nil 1450 } 1451 1452 func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error { 1453 a.Release() 1454 1455 img, err := ctx.NewTexture(a.format, size.X, size.Y, 1456 driver.FilterNearest, 1457 driver.FilterNearest, 1458 a.bindings) 1459 if err != nil { 1460 return err 1461 } 1462 a.image = img 1463 a.size = size 1464 return nil 1465 } 1466 1467 func (a *textureAtlas) ensureCPUImage(useCPU bool) { 1468 if !useCPU || a.hasCPU { 1469 return 1470 } 1471 a.hasCPU = true 1472 a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y) 1473 } 1474 1475 func (g *compute) Release() { 1476 if g.useCPU { 1477 g.dispatcher.Stop() 1478 } 1479 type resource interface { 1480 Release() 1481 } 1482 res := []resource{ 1483 g.output.nullMaterials, 1484 &g.programs.elements, 1485 &g.programs.tileAlloc, 1486 &g.programs.pathCoarse, 1487 &g.programs.backdrop, 1488 &g.programs.binning, 1489 &g.programs.coarse, 1490 &g.programs.kernel4, 1491 g.output.blitPipeline, 1492 &g.output.buffer, 1493 g.output.uniBuf, 1494 &g.buffers.scene, 1495 &g.buffers.state, 1496 &g.buffers.memory, 1497 &g.buffers.config, 1498 g.materials.pipeline, 1499 &g.materials.buffer, 1500 g.materials.uniforms.buf, 1501 g.timers.t, 1502 } 1503 for _, r := range res { 1504 if r != nil { 1505 r.Release() 1506 } 1507 } 1508 for _, a := range g.atlases { 1509 a.Release() 1510 } 1511 g.ctx.Release() 1512 *g = compute{} 1513 } 1514 1515 func (a *textureAtlas) Release() { 1516 if a.image != nil { 1517 a.image.Release() 1518 a.image = nil 1519 } 1520 a.cpuImage.Free() 1521 a.hasCPU = false 1522 } 1523 1524 func (g *compute) bindBuffers() { 1525 g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state) 1526 g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config) 1527 g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config) 1528 g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config) 1529 g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config) 1530 g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config) 1531 g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config) 1532 } 1533 1534 func (p *computeProgram) Release() { 1535 if p.prog != nil { 1536 p.prog.Release() 1537 } 1538 *p = computeProgram{} 1539 } 1540 1541 func (b *sizedBuffer) Release() { 1542 if b.buffer != nil { 1543 b.buffer.Release() 1544 } 1545 b.cpuBuf.Free() 1546 *b = sizedBuffer{} 1547 } 1548 1549 func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error { 1550 if b.size >= size { 1551 return nil 1552 } 1553 if b.buffer != nil { 1554 b.Release() 1555 } 1556 b.cpuBuf.Free() 1557 if !useCPU { 1558 buf, err := ctx.NewBuffer(binding, size) 1559 if err != nil { 1560 return err 1561 } 1562 b.buffer = buf 1563 } else { 1564 b.cpuBuf = cpu.NewBuffer(size) 1565 } 1566 b.size = size 1567 return nil 1568 } 1569 1570 func (b *sizedBuffer) download(data []byte) error { 1571 if b.buffer != nil { 1572 return b.buffer.Download(data) 1573 } else { 1574 copy(data, b.cpuBuf.Data()) 1575 return nil 1576 } 1577 } 1578 1579 func (b *sizedBuffer) upload(data []byte) { 1580 if b.buffer != nil { 1581 b.buffer.Upload(data) 1582 } else { 1583 copy(b.cpuBuf.Data(), data) 1584 } 1585 } 1586 1587 func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) { 1588 for i, buf := range buffers { 1589 if !g.useCPU { 1590 g.ctx.BindStorageBuffer(i, buf.buffer) 1591 } else { 1592 *prog.buffers[i] = buf.cpuBuf 1593 } 1594 } 1595 } 1596 1597 var bo = binary.LittleEndian 1598 1599 func (e *encoder) reset() { 1600 e.scene = e.scene[:0] 1601 e.npath = 0 1602 e.npathseg = 0 1603 e.ntrans = 0 1604 } 1605 1606 func (e *encoder) numElements() int { 1607 return len(e.scene) 1608 } 1609 1610 func (e *encoder) transform(m f32.Affine2D) { 1611 e.scene = append(e.scene, scene.Transform(m)) 1612 e.ntrans++ 1613 } 1614 1615 func (e *encoder) lineWidth(width float32) { 1616 e.scene = append(e.scene, scene.SetLineWidth(width)) 1617 } 1618 1619 func (e *encoder) fillMode(mode scene.FillMode) { 1620 e.scene = append(e.scene, scene.SetFillMode(mode)) 1621 } 1622 1623 func (e *encoder) beginClip(bbox f32.Rectangle) { 1624 e.scene = append(e.scene, scene.BeginClip(bbox)) 1625 e.npath++ 1626 } 1627 1628 func (e *encoder) endClip(bbox f32.Rectangle) { 1629 e.scene = append(e.scene, scene.EndClip(bbox)) 1630 e.npath++ 1631 } 1632 1633 func (e *encoder) rect(r f32.Rectangle) { 1634 // Rectangle corners, clock-wise. 1635 c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y) 1636 e.line(c0, c1) 1637 e.line(c1, c2) 1638 e.line(c2, c3) 1639 e.line(c3, c0) 1640 } 1641 1642 func (e *encoder) fillColor(col color.RGBA) { 1643 e.scene = append(e.scene, scene.FillColor(col)) 1644 e.npath++ 1645 } 1646 1647 func (e *encoder) fillImage(index int, offset image.Point) { 1648 e.scene = append(e.scene, scene.FillImage(index, offset)) 1649 e.npath++ 1650 } 1651 1652 func (e *encoder) line(start, end f32.Point) { 1653 e.scene = append(e.scene, scene.Line(start, end)) 1654 e.npathseg++ 1655 } 1656 1657 func (c *collector) reset() { 1658 c.prevFrame, c.frame = c.frame, c.prevFrame 1659 c.profile = false 1660 c.clipStates = c.clipStates[:0] 1661 c.transStack = c.transStack[:0] 1662 c.frame.reset() 1663 } 1664 1665 func (c *opsCollector) reset() { 1666 c.paths = c.paths[:0] 1667 c.clipCmds = c.clipCmds[:0] 1668 c.ops = c.ops[:0] 1669 c.layers = c.layers[:0] 1670 } 1671 1672 func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) { 1673 // Rectangle clip regions. 1674 if len(path) == 0 && !push { 1675 // If the rectangular clip region contains a previous path it can be discarded. 1676 p := state.clip 1677 t := state.relTrans.Invert() 1678 for p != nil { 1679 // rect is the parent bounds transformed relative to the rectangle. 1680 rect := transformBounds(t, p.bounds) 1681 if rect.In(bounds) { 1682 return 1683 } 1684 t = p.relTrans.Invert().Mul(t) 1685 p = p.parent 1686 } 1687 } 1688 1689 absBounds := transformBounds(state.t, bounds).Bounds() 1690 intersect := absBounds 1691 if state.clip != nil { 1692 intersect = state.clip.intersect.Intersect(intersect) 1693 } 1694 c.clipStates = append(c.clipStates, clipState{ 1695 parent: state.clip, 1696 absBounds: absBounds, 1697 path: path, 1698 pathKey: key, 1699 intersect: intersect, 1700 clipKey: clipKey{ 1701 bounds: bounds, 1702 relTrans: state.relTrans, 1703 strokeWidth: strokeWidth, 1704 pathHash: hash, 1705 }, 1706 }) 1707 state.clip = &c.clipStates[len(c.clipStates)-1] 1708 state.relTrans = f32.Affine2D{} 1709 } 1710 1711 func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) { 1712 fview := f32.Rectangle{Max: layout.FPt(viewport)} 1713 var intOps *ops.Ops 1714 if root != nil { 1715 intOps = &root.Internal 1716 } 1717 c.reader.Reset(intOps) 1718 var state encoderState 1719 reset := func() { 1720 state = encoderState{ 1721 paintKey: paintKey{ 1722 color: color.NRGBA{A: 0xff}, 1723 }, 1724 } 1725 } 1726 reset() 1727 r := &c.reader 1728 var ( 1729 pathData struct { 1730 data []byte 1731 key ops.Key 1732 hash uint64 1733 } 1734 strWidth float32 1735 ) 1736 c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false) 1737 for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() { 1738 switch ops.OpType(encOp.Data[0]) { 1739 case ops.TypeProfile: 1740 c.profile = true 1741 case ops.TypeTransform: 1742 dop, push := ops.DecodeTransform(encOp.Data) 1743 if push { 1744 c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans}) 1745 } 1746 state.t = state.t.Mul(dop) 1747 state.relTrans = state.relTrans.Mul(dop) 1748 case ops.TypePopTransform: 1749 n := len(c.transStack) 1750 st := c.transStack[n-1] 1751 c.transStack = c.transStack[:n-1] 1752 state.t = st.t 1753 state.relTrans = st.relTrans 1754 case ops.TypeStroke: 1755 strWidth = decodeStrokeOp(encOp.Data) 1756 case ops.TypePath: 1757 hash := bo.Uint64(encOp.Data[1:]) 1758 encOp, ok = r.Decode() 1759 if !ok { 1760 panic("unexpected end of path operation") 1761 } 1762 pathData.data = encOp.Data[ops.TypeAuxLen:] 1763 pathData.key = encOp.Key 1764 pathData.hash = hash 1765 case ops.TypeClip: 1766 var op ops.ClipOp 1767 op.Decode(encOp.Data) 1768 bounds := f32.FRect(op.Bounds) 1769 c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true) 1770 pathData.data = nil 1771 strWidth = 0 1772 case ops.TypePopClip: 1773 state.relTrans = state.clip.relTrans.Mul(state.relTrans) 1774 state.clip = state.clip.parent 1775 case ops.TypeColor: 1776 state.matType = materialColor 1777 state.color = decodeColorOp(encOp.Data) 1778 case ops.TypeLinearGradient: 1779 state.matType = materialLinearGradient 1780 op := decodeLinearGradientOp(encOp.Data) 1781 state.stop1 = op.stop1 1782 state.stop2 = op.stop2 1783 state.color1 = op.color1 1784 state.color2 = op.color2 1785 case ops.TypeImage: 1786 state.matType = materialTexture 1787 state.image = decodeImageOp(encOp.Data, encOp.Refs) 1788 case ops.TypePaint: 1789 paintState := state 1790 if paintState.matType == materialTexture { 1791 // Clip to the bounds of the image, to hide other images in the atlas. 1792 sz := state.image.src.Rect.Size() 1793 bounds := f32.Rectangle{Max: layout.FPt(sz)} 1794 c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false) 1795 } 1796 intersect := paintState.clip.intersect 1797 if intersect.Empty() { 1798 break 1799 } 1800 1801 // If the paint is a uniform opaque color that takes up the whole 1802 // screen, it covers all previous paints and we can discard all 1803 // rendering commands recorded so far. 1804 if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 { 1805 c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque() 1806 c.clear = true 1807 c.frame.reset() 1808 break 1809 } 1810 1811 // Flatten clip stack. 1812 p := paintState.clip 1813 startIdx := len(c.frame.clipCmds) 1814 for p != nil { 1815 idx := len(c.frame.paths) 1816 c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...) 1817 path := c.frame.paths[idx:] 1818 copy(path, p.path) 1819 c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{ 1820 state: p.clipKey, 1821 path: path, 1822 pathKey: p.pathKey, 1823 absBounds: p.absBounds, 1824 }) 1825 p = p.parent 1826 } 1827 clipStack := c.frame.clipCmds[startIdx:] 1828 c.frame.ops = append(c.frame.ops, paintOp{ 1829 clipStack: clipStack, 1830 state: paintState.paintKey, 1831 intersect: intersect, 1832 }) 1833 case ops.TypeSave: 1834 id := ops.DecodeSave(encOp.Data) 1835 c.save(id, state.t) 1836 case ops.TypeLoad: 1837 reset() 1838 id := ops.DecodeLoad(encOp.Data) 1839 state.t = c.states[id] 1840 state.relTrans = state.t 1841 } 1842 } 1843 for i := range c.frame.ops { 1844 op := &c.frame.ops[i] 1845 // For each clip, cull rectangular clip regions that contain its 1846 // (transformed) bounds. addClip already handled the converse case. 1847 // TODO: do better than O(n²) to efficiently deal with deep stacks. 1848 for j := 0; j < len(op.clipStack)-1; j++ { 1849 cl := op.clipStack[j] 1850 p := cl.state 1851 r := transformBounds(p.relTrans, p.bounds) 1852 for k := j + 1; k < len(op.clipStack); k++ { 1853 cl2 := op.clipStack[k] 1854 p2 := cl2.state 1855 if len(cl2.path) == 0 && r.In(cl2.state.bounds) { 1856 op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...) 1857 k-- 1858 op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans) 1859 } 1860 r = transformRect(p2.relTrans, r) 1861 } 1862 } 1863 // Separate the integer offset from the first transform. Two ops that differ 1864 // only in integer offsets may share backing storage. 1865 if len(op.clipStack) > 0 { 1866 c := &op.clipStack[len(op.clipStack)-1] 1867 t := c.state.relTrans 1868 t, off := separateTransform(t) 1869 c.state.relTrans = t 1870 op.offset = off 1871 op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1))) 1872 } 1873 op.hash = c.hashOp(*op) 1874 op.texOpIdx = -1 1875 switch op.state.matType { 1876 case materialTexture: 1877 op.texOpIdx = len(*texOps) 1878 // Separate integer offset from transformation. TextureOps that have identical transforms 1879 // except for their integer offsets can share a transformed image. 1880 t := op.state.t.Offset(layout.FPt(op.offset)) 1881 t, off := separateTransform(t) 1882 bounds := op.intersect.Round().Sub(off) 1883 *texOps = append(*texOps, textureOp{ 1884 img: op.state.image, 1885 off: off, 1886 key: textureKey{ 1887 bounds: bounds, 1888 transform: t, 1889 handle: op.state.image.handle, 1890 }, 1891 }) 1892 } 1893 } 1894 } 1895 1896 func (c *collector) hashOp(op paintOp) uint64 { 1897 c.hasher.Reset() 1898 for _, cl := range op.clipStack { 1899 k := cl.state 1900 keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) 1901 c.hasher.Write(keyBytes[:]) 1902 } 1903 k := op.state 1904 keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) 1905 c.hasher.Write(keyBytes[:]) 1906 return c.hasher.Sum64() 1907 } 1908 1909 func (g *compute) layer(viewport image.Point, texOps []textureOp) { 1910 // Sort ops from previous frames by hash. 1911 c := &g.collector 1912 prevOps := c.prevFrame.ops 1913 c.order = c.order[:0] 1914 for i, op := range prevOps { 1915 c.order = append(c.order, hashIndex{ 1916 index: i, 1917 hash: op.hash, 1918 }) 1919 } 1920 sort.Slice(c.order, func(i, j int) bool { 1921 return c.order[i].hash < c.order[j].hash 1922 }) 1923 // Split layers with different materials atlas; the compute stage has only 1924 // one materials slot. 1925 splitLayer := func(ops []paintOp, prevLayerIdx int) { 1926 for len(ops) > 0 { 1927 var materials *textureAtlas 1928 idx := 0 1929 for idx < len(ops) { 1930 if i := ops[idx].texOpIdx; i != -1 { 1931 omats := texOps[i].matAlloc.alloc.atlas 1932 if materials != nil && omats != nil && omats != materials { 1933 break 1934 } 1935 materials = omats 1936 } 1937 idx++ 1938 } 1939 l := layer{ops: ops[:idx], materials: materials} 1940 if prevLayerIdx != -1 { 1941 prev := c.prevFrame.layers[prevLayerIdx] 1942 if !prev.alloc.dead && len(prev.ops) == len(l.ops) { 1943 l.alloc = prev.alloc 1944 l.materials = prev.materials 1945 g.touchAlloc(l.alloc) 1946 } 1947 } 1948 for i, op := range l.ops { 1949 l.rect = l.rect.Union(op.intersect.Round()) 1950 l.ops[i].layer = len(c.frame.layers) 1951 } 1952 c.frame.layers = append(c.frame.layers, l) 1953 ops = ops[idx:] 1954 } 1955 } 1956 ops := c.frame.ops 1957 idx := 0 1958 for idx < len(ops) { 1959 op := ops[idx] 1960 // Search for longest matching op sequence. 1961 // start is the earliest index of a match. 1962 start := searchOp(c.order, op.hash) 1963 layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:]) 1964 if len(layerOps) == 0 { 1965 idx++ 1966 continue 1967 } 1968 if unmatched := ops[:idx]; len(unmatched) > 0 { 1969 // Flush layer of unmatched ops. 1970 splitLayer(unmatched, -1) 1971 ops = ops[idx:] 1972 idx = 0 1973 } 1974 splitLayer(layerOps, prevLayerIdx) 1975 ops = ops[len(layerOps):] 1976 } 1977 if len(ops) > 0 { 1978 splitLayer(ops, -1) 1979 } 1980 } 1981 1982 func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) { 1983 longest := 0 1984 longestIdx := -1 1985 outer: 1986 for len(order) > 0 { 1987 first := order[0] 1988 order = order[1:] 1989 match := prev[first.index:] 1990 // Potential match found. Now find longest matching sequence. 1991 end := 0 1992 layer := match[0].layer 1993 off := match[0].offset.Sub(ops[0].offset) 1994 for end < len(match) && end < len(ops) { 1995 m := match[end] 1996 o := ops[end] 1997 // End layers on previous match. 1998 if m.layer != layer { 1999 break 2000 } 2001 // End layer when the next op doesn't match. 2002 if m.hash != o.hash { 2003 if end == 0 { 2004 // Hashes are sorted so if the first op doesn't match, no 2005 // more matches are possible. 2006 break outer 2007 } 2008 break 2009 } 2010 if !opEqual(off, m, o) { 2011 break 2012 } 2013 end++ 2014 } 2015 if end > longest { 2016 longest = end 2017 longestIdx = layer 2018 2019 } 2020 } 2021 return ops[:longest], longestIdx 2022 } 2023 2024 func searchOp(order []hashIndex, hash uint64) int { 2025 lo, hi := 0, len(order) 2026 for lo < hi { 2027 mid := (lo + hi) / 2 2028 if order[mid].hash < hash { 2029 lo = mid + 1 2030 } else { 2031 hi = mid 2032 } 2033 } 2034 return lo 2035 } 2036 2037 func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool { 2038 if len(o1.clipStack) != len(o2.clipStack) { 2039 return false 2040 } 2041 if o1.state != o2.state { 2042 return false 2043 } 2044 if o1.offset.Sub(o2.offset) != off { 2045 return false 2046 } 2047 for i, cl1 := range o1.clipStack { 2048 cl2 := o2.clipStack[i] 2049 if len(cl1.path) != len(cl2.path) { 2050 return false 2051 } 2052 if cl1.state != cl2.state { 2053 return false 2054 } 2055 if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) { 2056 return false 2057 } 2058 } 2059 return true 2060 } 2061 2062 func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) { 2063 off := pos.Sub(l.rect.Min) 2064 offf := layout.FPt(off) 2065 2066 enc.transform(f32.Affine2D{}.Offset(offf)) 2067 for _, op := range l.ops { 2068 encodeOp(viewport, off, enc, texOps, op) 2069 } 2070 enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1))) 2071 } 2072 2073 func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) { 2074 // Fill in clip bounds, which the shaders expect to be the union 2075 // of all affected bounds. 2076 var union f32.Rectangle 2077 for i, cl := range op.clipStack { 2078 union = union.Union(cl.absBounds) 2079 op.clipStack[i].union = union 2080 } 2081 2082 absOfff := layout.FPt(absOff) 2083 fillMode := scene.FillModeNonzero 2084 opOff := layout.FPt(op.offset) 2085 inv := f32.Affine2D{}.Offset(opOff) 2086 enc.transform(inv) 2087 for i := len(op.clipStack) - 1; i >= 0; i-- { 2088 cl := op.clipStack[i] 2089 if w := cl.state.strokeWidth; w > 0 { 2090 enc.fillMode(scene.FillModeStroke) 2091 enc.lineWidth(w) 2092 fillMode = scene.FillModeStroke 2093 } else if fillMode != scene.FillModeNonzero { 2094 enc.fillMode(scene.FillModeNonzero) 2095 fillMode = scene.FillModeNonzero 2096 } 2097 enc.transform(cl.state.relTrans) 2098 inv = inv.Mul(cl.state.relTrans) 2099 if len(cl.path) == 0 { 2100 enc.rect(cl.state.bounds) 2101 } else { 2102 enc.encodePath(cl.path, fillMode) 2103 } 2104 if i != 0 { 2105 enc.beginClip(cl.union.Add(absOfff)) 2106 } 2107 } 2108 if len(op.clipStack) == 0 { 2109 // No clipping; fill the entire view. 2110 enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) 2111 } 2112 2113 switch op.state.matType { 2114 case materialTexture: 2115 texOp := texOps[op.texOpIdx] 2116 off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff) 2117 enc.fillImage(0, off) 2118 case materialColor: 2119 enc.fillColor(f32color.NRGBAToRGBA(op.state.color)) 2120 case materialLinearGradient: 2121 // TODO: implement. 2122 enc.fillColor(f32color.NRGBAToRGBA(op.state.color1)) 2123 default: 2124 panic("not implemented") 2125 } 2126 enc.transform(inv.Invert()) 2127 // Pop the clip stack, except the first entry used for fill. 2128 for i := 1; i < len(op.clipStack); i++ { 2129 cl := op.clipStack[i] 2130 enc.endClip(cl.union.Add(absOfff)) 2131 } 2132 if fillMode != scene.FillModeNonzero { 2133 enc.fillMode(scene.FillModeNonzero) 2134 } 2135 } 2136 2137 func (c *collector) save(id int, state f32.Affine2D) { 2138 if extra := id - len(c.states) + 1; extra > 0 { 2139 c.states = append(c.states, make([]f32.Affine2D, extra)...) 2140 } 2141 c.states[id] = state 2142 } 2143 2144 func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle { 2145 return rectangle{ 2146 t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)), 2147 t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)), 2148 } 2149 } 2150 2151 func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) { 2152 sx, hx, ox, hy, sy, oy := t.Elems() 2153 intx, fracx := math.Modf(float64(ox)) 2154 inty, fracy := math.Modf(float64(oy)) 2155 t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy)) 2156 return t, image.Pt(int(intx), int(inty)) 2157 } 2158 2159 func transformRect(t f32.Affine2D, r rectangle) rectangle { 2160 var tr rectangle 2161 for i, c := range r { 2162 tr[i] = t.Transform(c) 2163 } 2164 return tr 2165 } 2166 2167 func (r rectangle) In(b f32.Rectangle) bool { 2168 for _, c := range r { 2169 inside := b.Min.X <= c.X && c.X <= b.Max.X && 2170 b.Min.Y <= c.Y && c.Y <= b.Max.Y 2171 if !inside { 2172 return false 2173 } 2174 } 2175 return true 2176 } 2177 2178 func (r rectangle) Contains(b f32.Rectangle) bool { 2179 return true 2180 } 2181 2182 func (r rectangle) Bounds() f32.Rectangle { 2183 bounds := f32.Rectangle{ 2184 Min: f32.Pt(math.MaxFloat32, math.MaxFloat32), 2185 Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32), 2186 } 2187 for _, c := range r { 2188 if c.X < bounds.Min.X { 2189 bounds.Min.X = c.X 2190 } 2191 if c.Y < bounds.Min.Y { 2192 bounds.Min.Y = c.Y 2193 } 2194 if c.X > bounds.Max.X { 2195 bounds.Max.X = c.X 2196 } 2197 if c.Y > bounds.Max.Y { 2198 bounds.Max.Y = c.Y 2199 } 2200 } 2201 return bounds 2202 }