github.com/utopiagio/gio@v0.0.8/gpu/compute.go (about) 1 // SPDX-License-Identifier: Unlicense OR MIT 2 3 package gpu 4 5 import ( 6 "bytes" 7 "encoding/binary" 8 "errors" 9 "fmt" 10 "hash/maphash" 11 "image" 12 "image/color" 13 "image/draw" 14 "image/png" 15 "math" 16 "math/bits" 17 "os" 18 "runtime" 19 "sort" 20 "time" 21 "unsafe" 22 23 "gioui.org/cpu" 24 "github.com/utopiagio/gio/gpu/internal/driver" 25 "github.com/utopiagio/gio/internal/byteslice" 26 "github.com/utopiagio/gio/internal/f32" 27 "github.com/utopiagio/gio/internal/f32color" 28 "github.com/utopiagio/gio/internal/ops" 29 "github.com/utopiagio/gio/internal/scene" 30 "github.com/utopiagio/gio/layout" 31 "github.com/utopiagio/gio/op" 32 "gioui.org/shader" 33 "gioui.org/shader/gio" 34 "gioui.org/shader/piet" 35 ) 36 37 type compute struct { 38 ctx driver.Device 39 40 collector collector 41 enc encoder 42 texOps []textureOp 43 viewport image.Point 44 maxTextureDim int 45 srgb bool 46 atlases []*textureAtlas 47 frameCount uint 48 moves []atlasMove 49 50 programs struct { 51 elements computeProgram 52 tileAlloc computeProgram 53 pathCoarse computeProgram 54 backdrop computeProgram 55 binning computeProgram 56 coarse computeProgram 57 kernel4 computeProgram 58 } 59 buffers struct { 60 config sizedBuffer 61 scene sizedBuffer 62 state sizedBuffer 63 memory sizedBuffer 64 } 65 output struct { 66 blitPipeline driver.Pipeline 67 68 buffer sizedBuffer 69 70 uniforms *copyUniforms 71 uniBuf driver.Buffer 72 73 layerVertices []layerVertex 74 descriptors *piet.Kernel4DescriptorSetLayout 75 76 nullMaterials driver.Texture 77 } 78 // imgAllocs maps imageOpData.handles to allocs. 79 imgAllocs map[interface{}]*atlasAlloc 80 // materials contains the pre-processed materials (transformed images for 81 // now, gradients etc. later) packed in a texture atlas. The atlas is used 82 // as source in kernel4. 83 materials struct { 84 // allocs maps texture ops the their atlases and FillImage offsets. 85 allocs map[textureKey]materialAlloc 86 87 pipeline driver.Pipeline 88 buffer sizedBuffer 89 quads []materialVertex 90 uniforms struct { 91 u *materialUniforms 92 buf driver.Buffer 93 } 94 } 95 timers struct { 96 t *timers 97 compact *timer 98 render *timer 99 blit *timer 100 } 101 102 // CPU fallback fields. 103 useCPU bool 104 dispatcher *dispatcher 105 106 // The following fields hold scratch space to avoid garbage. 107 zeroSlice []byte 108 memHeader *memoryHeader 109 conf *config 110 } 111 112 type materialAlloc struct { 113 alloc *atlasAlloc 114 offset image.Point 115 } 116 117 type layer struct { 118 rect image.Rectangle 119 alloc *atlasAlloc 120 ops []paintOp 121 materials *textureAtlas 122 } 123 124 type allocQuery struct { 125 atlas *textureAtlas 126 size image.Point 127 empty bool 128 format driver.TextureFormat 129 bindings driver.BufferBinding 130 nocompact bool 131 } 132 133 type atlasAlloc struct { 134 atlas *textureAtlas 135 rect image.Rectangle 136 cpu bool 137 dead bool 138 frameCount uint 139 } 140 141 type atlasMove struct { 142 src *textureAtlas 143 dstPos image.Point 144 srcRect image.Rectangle 145 cpu bool 146 } 147 148 type textureAtlas struct { 149 image driver.Texture 150 format driver.TextureFormat 151 bindings driver.BufferBinding 152 hasCPU bool 153 cpuImage cpu.ImageDescriptor 154 size image.Point 155 allocs []*atlasAlloc 156 packer packer 157 realized bool 158 lastFrame uint 159 compact bool 160 } 161 162 type copyUniforms struct { 163 scale [2]float32 164 pos [2]float32 165 uvScale [2]float32 166 _ [8]byte // Pad to 16 bytes. 167 } 168 169 type materialUniforms struct { 170 scale [2]float32 171 pos [2]float32 172 emulatesRGB float32 173 _ [12]byte // Pad to 16 bytes 174 } 175 176 type collector struct { 177 hasher maphash.Hash 178 reader ops.Reader 179 states []f32.Affine2D 180 clear bool 181 clearColor f32color.RGBA 182 clipStates []clipState 183 order []hashIndex 184 transStack []transEntry 185 prevFrame opsCollector 186 frame opsCollector 187 } 188 189 type transEntry struct { 190 t f32.Affine2D 191 relTrans f32.Affine2D 192 } 193 194 type hashIndex struct { 195 index int 196 hash uint64 197 } 198 199 type opsCollector struct { 200 paths []byte 201 clipCmds []clipCmd 202 ops []paintOp 203 layers []layer 204 } 205 206 type paintOp struct { 207 clipStack []clipCmd 208 offset image.Point 209 state paintKey 210 intersect f32.Rectangle 211 hash uint64 212 layer int 213 texOpIdx int 214 } 215 216 // clipCmd describes a clipping command ready to be used for the compute 217 // pipeline. 218 type clipCmd struct { 219 // union of the bounds of the operations that are clipped. 220 union f32.Rectangle 221 state clipKey 222 path []byte 223 pathKey ops.Key 224 absBounds f32.Rectangle 225 } 226 227 type encoderState struct { 228 relTrans f32.Affine2D 229 clip *clipState 230 231 paintKey 232 } 233 234 // clipKey completely describes a clip operation (along with its path) and is appropriate 235 // for hashing and equality checks. 236 type clipKey struct { 237 bounds f32.Rectangle 238 strokeWidth float32 239 relTrans f32.Affine2D 240 pathHash uint64 241 } 242 243 // paintKey completely defines a paint operation. It is suitable for hashing and 244 // equality checks. 245 type paintKey struct { 246 t f32.Affine2D 247 matType materialType 248 // Current paint.ImageOp 249 image imageOpData 250 // Current paint.ColorOp, if any. 251 color color.NRGBA 252 253 // Current paint.LinearGradientOp. 254 stop1 f32.Point 255 stop2 f32.Point 256 color1 color.NRGBA 257 color2 color.NRGBA 258 } 259 260 type clipState struct { 261 absBounds f32.Rectangle 262 parent *clipState 263 path []byte 264 pathKey ops.Key 265 intersect f32.Rectangle 266 267 clipKey 268 } 269 270 type layerVertex struct { 271 posX, posY float32 272 u, v float32 273 } 274 275 // materialVertex describes a vertex of a quad used to render a transformed 276 // material. 277 type materialVertex struct { 278 posX, posY float32 279 u, v float32 280 } 281 282 // textureKey identifies textureOp. 283 type textureKey struct { 284 handle interface{} 285 transform f32.Affine2D 286 bounds image.Rectangle 287 } 288 289 // textureOp represents an paintOp that requires texture space. 290 type textureOp struct { 291 img imageOpData 292 key textureKey 293 // offset is the integer offset separated from key.transform to increase cache hit rate. 294 off image.Point 295 // matAlloc is the atlas placement for material. 296 matAlloc materialAlloc 297 // imgAlloc is the atlas placement for the source image 298 imgAlloc *atlasAlloc 299 } 300 301 type encoder struct { 302 scene []scene.Command 303 npath int 304 npathseg int 305 ntrans int 306 } 307 308 // sizedBuffer holds a GPU buffer, or its equivalent CPU memory. 309 type sizedBuffer struct { 310 size int 311 buffer driver.Buffer 312 // cpuBuf is initialized when useCPU is true. 313 cpuBuf cpu.BufferDescriptor 314 } 315 316 // computeProgram holds a compute program, or its equivalent CPU implementation. 317 type computeProgram struct { 318 prog driver.Program 319 320 // CPU fields. 321 progInfo *cpu.ProgramInfo 322 descriptors unsafe.Pointer 323 buffers []*cpu.BufferDescriptor 324 } 325 326 // config matches Config in setup.h 327 type config struct { 328 n_elements uint32 // paths 329 n_pathseg uint32 330 width_in_tiles uint32 331 height_in_tiles uint32 332 tile_alloc memAlloc 333 bin_alloc memAlloc 334 ptcl_alloc memAlloc 335 pathseg_alloc memAlloc 336 anno_alloc memAlloc 337 trans_alloc memAlloc 338 } 339 340 // memAlloc matches Alloc in mem.h 341 type memAlloc struct { 342 offset uint32 343 //size uint32 344 } 345 346 // memoryHeader matches the header of Memory in mem.h. 347 type memoryHeader struct { 348 mem_offset uint32 349 mem_error uint32 350 } 351 352 // rect is a oriented rectangle. 353 type rectangle [4]f32.Point 354 355 const ( 356 layersBindings = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture 357 materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead 358 // Materials and layers can share texture storage if their bindings match. 359 combinedBindings = layersBindings | materialsBindings 360 ) 361 362 // GPU structure sizes and constants. 363 const ( 364 tileWidthPx = 32 365 tileHeightPx = 32 366 ptclInitialAlloc = 1024 367 kernel4OutputUnit = 2 368 kernel4AtlasUnit = 3 369 370 pathSize = 12 371 binSize = 8 372 pathsegSize = 52 373 annoSize = 32 374 transSize = 24 375 stateSize = 60 376 stateStride = 4 + 2*stateSize 377 ) 378 379 // mem.h constants. 380 const ( 381 memNoError = 0 // NO_ERROR 382 memMallocFailed = 1 // ERR_MALLOC_FAILED 383 ) 384 385 func newCompute(ctx driver.Device) (*compute, error) { 386 caps := ctx.Caps() 387 maxDim := caps.MaxTextureSize 388 // Large atlas textures cause artifacts due to precision loss in 389 // shaders. 390 if cap := 8192; maxDim > cap { 391 maxDim = cap 392 } 393 // The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the 394 // complexity of a rectangular limit for later. 395 if computeCap := 4096; maxDim > computeCap { 396 maxDim = computeCap 397 } 398 g := &compute{ 399 ctx: ctx, 400 maxTextureDim: maxDim, 401 srgb: caps.Features.Has(driver.FeatureSRGB), 402 conf: new(config), 403 memHeader: new(memoryHeader), 404 } 405 shaders := []struct { 406 prog *computeProgram 407 src shader.Sources 408 info *cpu.ProgramInfo 409 }{ 410 {&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo}, 411 {&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo}, 412 {&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo}, 413 {&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo}, 414 {&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo}, 415 {&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo}, 416 {&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo}, 417 } 418 if !caps.Features.Has(driver.FeatureCompute) { 419 if !cpu.Supported { 420 return nil, errors.New("gpu: missing support for compute programs") 421 } 422 g.useCPU = true 423 } 424 if g.useCPU { 425 g.dispatcher = newDispatcher(runtime.NumCPU()) 426 } else { 427 null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead) 428 if err != nil { 429 g.Release() 430 return nil, err 431 } 432 g.output.nullMaterials = null 433 } 434 435 copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag) 436 if err != nil { 437 g.Release() 438 return nil, err 439 } 440 defer copyVert.Release() 441 defer copyFrag.Release() 442 pipe, err := ctx.NewPipeline(driver.PipelineDesc{ 443 VertexShader: copyVert, 444 FragmentShader: copyFrag, 445 VertexLayout: driver.VertexLayout{ 446 Inputs: []driver.InputDesc{ 447 {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, 448 {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, 449 }, 450 Stride: int(unsafe.Sizeof(g.output.layerVertices[0])), 451 }, 452 PixelFormat: driver.TextureFormatOutput, 453 BlendDesc: driver.BlendDesc{ 454 Enable: true, 455 SrcFactor: driver.BlendFactorOne, 456 DstFactor: driver.BlendFactorOneMinusSrcAlpha, 457 }, 458 Topology: driver.TopologyTriangles, 459 }) 460 if err != nil { 461 g.Release() 462 return nil, err 463 } 464 g.output.blitPipeline = pipe 465 g.output.uniforms = new(copyUniforms) 466 467 buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms))) 468 if err != nil { 469 g.Release() 470 return nil, err 471 } 472 g.output.uniBuf = buf 473 474 materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag) 475 if err != nil { 476 g.Release() 477 return nil, err 478 } 479 defer materialVert.Release() 480 defer materialFrag.Release() 481 pipe, err = ctx.NewPipeline(driver.PipelineDesc{ 482 VertexShader: materialVert, 483 FragmentShader: materialFrag, 484 VertexLayout: driver.VertexLayout{ 485 Inputs: []driver.InputDesc{ 486 {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, 487 {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, 488 }, 489 Stride: int(unsafe.Sizeof(g.materials.quads[0])), 490 }, 491 PixelFormat: driver.TextureFormatRGBA8, 492 Topology: driver.TopologyTriangles, 493 }) 494 if err != nil { 495 g.Release() 496 return nil, err 497 } 498 g.materials.pipeline = pipe 499 g.materials.uniforms.u = new(materialUniforms) 500 501 buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u))) 502 if err != nil { 503 g.Release() 504 return nil, err 505 } 506 g.materials.uniforms.buf = buf 507 508 for _, shader := range shaders { 509 if !g.useCPU { 510 p, err := ctx.NewComputeProgram(shader.src) 511 if err != nil { 512 g.Release() 513 return nil, err 514 } 515 shader.prog.prog = p 516 } else { 517 shader.prog.progInfo = shader.info 518 } 519 } 520 if g.useCPU { 521 { 522 desc := new(piet.ElementsDescriptorSetLayout) 523 g.programs.elements.descriptors = unsafe.Pointer(desc) 524 g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()} 525 } 526 { 527 desc := new(piet.Tile_allocDescriptorSetLayout) 528 g.programs.tileAlloc.descriptors = unsafe.Pointer(desc) 529 g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 530 } 531 { 532 desc := new(piet.Path_coarseDescriptorSetLayout) 533 g.programs.pathCoarse.descriptors = unsafe.Pointer(desc) 534 g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 535 } 536 { 537 desc := new(piet.BackdropDescriptorSetLayout) 538 g.programs.backdrop.descriptors = unsafe.Pointer(desc) 539 g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 540 } 541 { 542 desc := new(piet.BinningDescriptorSetLayout) 543 g.programs.binning.descriptors = unsafe.Pointer(desc) 544 g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 545 } 546 { 547 desc := new(piet.CoarseDescriptorSetLayout) 548 g.programs.coarse.descriptors = unsafe.Pointer(desc) 549 g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 550 } 551 { 552 desc := new(piet.Kernel4DescriptorSetLayout) 553 g.programs.kernel4.descriptors = unsafe.Pointer(desc) 554 g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} 555 g.output.descriptors = desc 556 } 557 } 558 return g, nil 559 } 560 561 func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { 562 vert, err = ctx.NewVertexShader(vsrc) 563 if err != nil { 564 return 565 } 566 frag, err = ctx.NewFragmentShader(fsrc) 567 if err != nil { 568 vert.Release() 569 } 570 return 571 } 572 573 func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error { 574 g.frameCount++ 575 g.collect(viewport, frameOps) 576 return g.frame(target) 577 } 578 579 func (g *compute) collect(viewport image.Point, ops *op.Ops) { 580 g.viewport = viewport 581 g.collector.reset() 582 583 g.texOps = g.texOps[:0] 584 g.collector.collect(ops, viewport, &g.texOps) 585 } 586 587 func (g *compute) Clear(col color.NRGBA) { 588 g.collector.clear = true 589 g.collector.clearColor = f32color.LinearFromSRGB(col) 590 } 591 592 func (g *compute) frame(target RenderTarget) error { 593 viewport := g.viewport 594 defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport) 595 defer g.ctx.EndFrame() 596 597 t := &g.timers 598 if false && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) { 599 t.t = newTimers(g.ctx) 600 t.compact = t.t.newTimer() 601 t.render = t.t.newTimer() 602 t.blit = t.t.newTimer() 603 } 604 605 if err := g.uploadImages(); err != nil { 606 return err 607 } 608 if err := g.renderMaterials(); err != nil { 609 return err 610 } 611 g.layer(viewport, g.texOps) 612 t.render.begin() 613 if err := g.renderLayers(viewport); err != nil { 614 return err 615 } 616 t.render.end() 617 d := driver.LoadDesc{ 618 ClearColor: g.collector.clearColor, 619 } 620 if g.collector.clear { 621 g.collector.clear = false 622 d.Action = driver.LoadActionClear 623 } 624 t.blit.begin() 625 g.blitLayers(d, defFBO, viewport) 626 t.blit.end() 627 t.compact.begin() 628 if err := g.compactAllocs(); err != nil { 629 return err 630 } 631 t.compact.end() 632 if false && t.t.ready() { 633 com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed 634 ft := com + ren + blit 635 q := 100 * time.Microsecond 636 ft = ft.Round(q) 637 com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q) 638 // t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit) 639 } 640 return nil 641 } 642 643 func (g *compute) dumpAtlases() { 644 for i, a := range g.atlases { 645 dump := image.NewRGBA(image.Rectangle{Max: a.size}) 646 err := driver.DownloadImage(g.ctx, a.image, dump) 647 if err != nil { 648 panic(err) 649 } 650 nrgba := image.NewNRGBA(dump.Bounds()) 651 draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src) 652 var buf bytes.Buffer 653 if err := png.Encode(&buf, nrgba); err != nil { 654 panic(err) 655 } 656 if err := os.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil { 657 panic(err) 658 } 659 } 660 } 661 662 func (g *compute) compactAllocs() error { 663 const ( 664 maxAllocAge = 3 665 maxAtlasAge = 10 666 ) 667 atlases := g.atlases 668 for _, a := range atlases { 669 if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge { 670 a.compact = true 671 } 672 } 673 for len(atlases) > 0 { 674 var ( 675 dstAtlas *textureAtlas 676 format driver.TextureFormat 677 bindings driver.BufferBinding 678 ) 679 g.moves = g.moves[:0] 680 addedLayers := false 681 useCPU := false 682 fill: 683 for len(atlases) > 0 { 684 srcAtlas := atlases[0] 685 allocs := srcAtlas.allocs 686 if !srcAtlas.compact { 687 atlases = atlases[1:] 688 continue 689 } 690 if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) { 691 break 692 } 693 format = srcAtlas.format 694 bindings = srcAtlas.bindings 695 for len(srcAtlas.allocs) > 0 { 696 a := srcAtlas.allocs[0] 697 n := len(srcAtlas.allocs) 698 if g.frameCount-a.frameCount > maxAllocAge { 699 a.dead = true 700 srcAtlas.allocs[0] = srcAtlas.allocs[n-1] 701 srcAtlas.allocs = srcAtlas.allocs[:n-1] 702 continue 703 } 704 size := a.rect.Size() 705 alloc, fits := g.atlasAlloc(allocQuery{ 706 atlas: dstAtlas, 707 size: size, 708 format: format, 709 bindings: bindings, 710 nocompact: true, 711 }) 712 if !fits { 713 break fill 714 } 715 dstAtlas = alloc.atlas 716 allocs = append(allocs, a) 717 addedLayers = true 718 useCPU = useCPU || a.cpu 719 dstAtlas.allocs = append(dstAtlas.allocs, a) 720 pos := alloc.rect.Min 721 g.moves = append(g.moves, atlasMove{ 722 src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu, 723 }) 724 a.atlas = dstAtlas 725 a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())} 726 srcAtlas.allocs[0] = srcAtlas.allocs[n-1] 727 srcAtlas.allocs = srcAtlas.allocs[:n-1] 728 } 729 srcAtlas.compact = false 730 srcAtlas.realized = false 731 srcAtlas.packer.clear() 732 srcAtlas.packer.newPage() 733 srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) 734 atlases = atlases[1:] 735 } 736 if !addedLayers { 737 break 738 } 739 outputSize := dstAtlas.packer.sizes[0] 740 if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil { 741 return err 742 } 743 for _, move := range g.moves { 744 if !move.cpu { 745 g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect) 746 } else { 747 src := move.src.cpuImage.Data() 748 dst := dstAtlas.cpuImage.Data() 749 sstride := move.src.size.X * 4 750 dstride := dstAtlas.size.X * 4 751 copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect) 752 } 753 } 754 } 755 for i := len(g.atlases) - 1; i >= 0; i-- { 756 a := g.atlases[i] 757 if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge { 758 a.Release() 759 n := len(g.atlases) 760 g.atlases[i] = g.atlases[n-1] 761 g.atlases = g.atlases[:n-1] 762 } 763 } 764 return nil 765 } 766 767 func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) { 768 sz := srcRect.Size() 769 soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4 770 doff := dstPos.Y*dstStride + dstPos.X*4 771 rowLen := sz.X * 4 772 for y := 0; y < sz.Y; y++ { 773 srow := src[soff : soff+rowLen] 774 drow := dst[doff : doff+rowLen] 775 copy(drow, srow) 776 soff += srcStride 777 doff += dstStride 778 } 779 } 780 781 func (g *compute) renderLayers(viewport image.Point) error { 782 layers := g.collector.frame.layers 783 for len(layers) > 0 { 784 var materials, dst *textureAtlas 785 addedLayers := false 786 g.enc.reset() 787 for len(layers) > 0 { 788 l := &layers[0] 789 if l.alloc != nil { 790 layers = layers[1:] 791 continue 792 } 793 if materials != nil { 794 if l.materials != nil && materials != l.materials { 795 // Only one materials texture per compute pass. 796 break 797 } 798 } else { 799 materials = l.materials 800 } 801 size := l.rect.Size() 802 alloc, fits := g.atlasAlloc(allocQuery{ 803 atlas: dst, 804 empty: true, 805 format: driver.TextureFormatRGBA8, 806 bindings: combinedBindings, 807 // Pad to avoid overlap. 808 size: size.Add(image.Pt(1, 1)), 809 }) 810 if !fits { 811 // Only one output atlas per compute pass. 812 break 813 } 814 dst = alloc.atlas 815 dst.compact = true 816 addedLayers = true 817 l.alloc = &alloc 818 dst.allocs = append(dst.allocs, l.alloc) 819 encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps) 820 layers = layers[1:] 821 } 822 if !addedLayers { 823 break 824 } 825 outputSize := dst.packer.sizes[0] 826 tileDims := image.Point{ 827 X: (outputSize.X + tileWidthPx - 1) / tileWidthPx, 828 Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx, 829 } 830 w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx 831 if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil { 832 return err 833 } 834 if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil { 835 return err 836 } 837 } 838 return nil 839 } 840 841 func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) { 842 layers := g.collector.frame.layers 843 g.output.layerVertices = g.output.layerVertices[:0] 844 for _, l := range layers { 845 placef := layout.FPt(l.alloc.rect.Min) 846 sizef := layout.FPt(l.rect.Size()) 847 r := f32.FRect(l.rect) 848 quad := [4]layerVertex{ 849 {posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y}, 850 {posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y}, 851 {posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y}, 852 {posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y}, 853 } 854 g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1]) 855 g.ctx.PrepareTexture(l.alloc.atlas.image) 856 } 857 if len(g.output.layerVertices) > 0 { 858 vertexData := byteslice.Slice(g.output.layerVertices) 859 g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData)) 860 g.output.buffer.buffer.Upload(vertexData) 861 } 862 g.ctx.BeginRenderPass(fbo, d) 863 defer g.ctx.EndRenderPass() 864 if len(layers) == 0 { 865 return 866 } 867 g.ctx.Viewport(0, 0, viewport.X, viewport.Y) 868 g.ctx.BindPipeline(g.output.blitPipeline) 869 g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0) 870 start := 0 871 for len(layers) > 0 { 872 count := 0 873 atlas := layers[0].alloc.atlas 874 for len(layers) > 0 { 875 l := layers[0] 876 if l.alloc.atlas != atlas { 877 break 878 } 879 layers = layers[1:] 880 const verticesPerQuad = 6 881 count += verticesPerQuad 882 } 883 884 // Transform positions to clip space: [-1, -1] - [1, 1], and texture 885 // coordinates to texture space: [0, 0] - [1, 1]. 886 clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1)) 887 sx, _, ox, _, sy, oy := clip.Elems() 888 g.output.uniforms.scale = [2]float32{sx, sy} 889 g.output.uniforms.pos = [2]float32{ox, oy} 890 g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)} 891 g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms)) 892 g.ctx.BindUniforms(g.output.uniBuf) 893 g.ctx.BindTexture(0, atlas.image) 894 g.ctx.DrawArrays(start, count) 895 start += count 896 } 897 } 898 899 func (g *compute) renderMaterials() error { 900 m := &g.materials 901 for k, place := range m.allocs { 902 if place.alloc.dead { 903 delete(m.allocs, k) 904 } 905 } 906 texOps := g.texOps 907 for len(texOps) > 0 { 908 m.quads = m.quads[:0] 909 var ( 910 atlas *textureAtlas 911 imgAtlas *textureAtlas 912 ) 913 // A material is clipped to avoid drawing outside its atlas bounds. 914 // However, imprecision in the clipping may cause a single pixel 915 // overflow. 916 var padding = image.Pt(1, 1) 917 var allocStart int 918 for len(texOps) > 0 { 919 op := &texOps[0] 920 if a, exists := m.allocs[op.key]; exists { 921 g.touchAlloc(a.alloc) 922 op.matAlloc = a 923 texOps = texOps[1:] 924 continue 925 } 926 927 if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas { 928 // Only one image atlas per render pass. 929 break 930 } 931 imgAtlas = op.imgAlloc.atlas 932 quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min) 933 boundsf := quadBounds(quad) 934 bounds := boundsf.Round() 935 bounds = bounds.Intersect(op.key.bounds) 936 937 size := bounds.Size() 938 alloc, fits := g.atlasAlloc(allocQuery{ 939 atlas: atlas, 940 size: size.Add(padding), 941 format: driver.TextureFormatRGBA8, 942 bindings: combinedBindings, 943 }) 944 if !fits { 945 break 946 } 947 if atlas == nil { 948 allocStart = len(alloc.atlas.allocs) 949 } 950 atlas = alloc.atlas 951 alloc.cpu = g.useCPU 952 offsetf := layout.FPt(bounds.Min.Mul(-1)) 953 scale := f32.Pt(float32(size.X), float32(size.Y)) 954 for i := range quad { 955 // Position quad to match place. 956 quad[i].posX += offsetf.X 957 quad[i].posY += offsetf.Y 958 // Scale to match viewport [0, 1]. 959 quad[i].posX /= scale.X 960 quad[i].posY /= scale.Y 961 } 962 // Draw quad as two triangles. 963 m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2]) 964 if m.allocs == nil { 965 m.allocs = make(map[textureKey]materialAlloc) 966 } 967 atlasAlloc := materialAlloc{ 968 alloc: &alloc, 969 offset: bounds.Min.Mul(-1), 970 } 971 atlas.allocs = append(atlas.allocs, atlasAlloc.alloc) 972 m.allocs[op.key] = atlasAlloc 973 op.matAlloc = atlasAlloc 974 texOps = texOps[1:] 975 } 976 if len(m.quads) == 0 { 977 break 978 } 979 realized := atlas.realized 980 if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil { 981 return err 982 } 983 // Transform to clip space: [-1, -1] - [1, 1]. 984 *m.uniforms.u = materialUniforms{ 985 scale: [2]float32{2, 2}, 986 pos: [2]float32{-1, -1}, 987 } 988 if !g.srgb { 989 m.uniforms.u.emulatesRGB = 1.0 990 } 991 m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u)) 992 vertexData := byteslice.Slice(m.quads) 993 n := pow2Ceil(len(vertexData)) 994 m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n) 995 m.buffer.buffer.Upload(vertexData) 996 var d driver.LoadDesc 997 if !realized { 998 d.Action = driver.LoadActionClear 999 } 1000 g.ctx.PrepareTexture(imgAtlas.image) 1001 g.ctx.BeginRenderPass(atlas.image, d) 1002 g.ctx.BindTexture(0, imgAtlas.image) 1003 g.ctx.BindPipeline(m.pipeline) 1004 g.ctx.BindUniforms(m.uniforms.buf) 1005 g.ctx.BindVertexBuffer(m.buffer.buffer, 0) 1006 newAllocs := atlas.allocs[allocStart:] 1007 for i, a := range newAllocs { 1008 sz := a.rect.Size().Sub(padding) 1009 g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y) 1010 g.ctx.DrawArrays(i*6, 6) 1011 } 1012 g.ctx.EndRenderPass() 1013 if !g.useCPU { 1014 continue 1015 } 1016 src := atlas.image 1017 data := atlas.cpuImage.Data() 1018 for _, a := range newAllocs { 1019 stride := atlas.size.X * 4 1020 col := a.rect.Min.X * 4 1021 row := stride * a.rect.Min.Y 1022 off := col + row 1023 src.ReadPixels(a.rect, data[off:], stride) 1024 } 1025 } 1026 return nil 1027 } 1028 1029 func (g *compute) uploadImages() error { 1030 for k, a := range g.imgAllocs { 1031 if a.dead { 1032 delete(g.imgAllocs, k) 1033 } 1034 } 1035 type upload struct { 1036 pos image.Point 1037 img *image.RGBA 1038 } 1039 var uploads []upload 1040 format := driver.TextureFormatSRGBA 1041 if !g.srgb { 1042 format = driver.TextureFormatRGBA8 1043 } 1044 // padding is the number of pixels added to the right and below 1045 // images, to avoid atlas filtering artifacts. 1046 const padding = 1 1047 texOps := g.texOps 1048 for len(texOps) > 0 { 1049 uploads = uploads[:0] 1050 var atlas *textureAtlas 1051 for len(texOps) > 0 { 1052 op := &texOps[0] 1053 if a, exists := g.imgAllocs[op.img.handle]; exists { 1054 g.touchAlloc(a) 1055 op.imgAlloc = a 1056 texOps = texOps[1:] 1057 continue 1058 } 1059 size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding)) 1060 alloc, fits := g.atlasAlloc(allocQuery{ 1061 atlas: atlas, 1062 size: size, 1063 format: format, 1064 bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer, 1065 }) 1066 if !fits { 1067 break 1068 } 1069 atlas = alloc.atlas 1070 if g.imgAllocs == nil { 1071 g.imgAllocs = make(map[interface{}]*atlasAlloc) 1072 } 1073 op.imgAlloc = &alloc 1074 atlas.allocs = append(atlas.allocs, op.imgAlloc) 1075 g.imgAllocs[op.img.handle] = op.imgAlloc 1076 uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src}) 1077 texOps = texOps[1:] 1078 } 1079 if len(uploads) == 0 { 1080 break 1081 } 1082 if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil { 1083 return err 1084 } 1085 for _, u := range uploads { 1086 size := u.img.Bounds().Size() 1087 driver.UploadImage(atlas.image, u.pos, u.img) 1088 rightPadding := image.Pt(padding, size.Y) 1089 atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0) 1090 bottomPadding := image.Pt(size.X, padding) 1091 atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0) 1092 } 1093 } 1094 return nil 1095 } 1096 1097 func pow2Ceil(v int) int { 1098 exp := bits.Len(uint(v)) 1099 if bits.OnesCount(uint(v)) == 1 { 1100 exp-- 1101 } 1102 return 1 << exp 1103 } 1104 1105 // materialQuad constructs a quad that represents the transformed image. It returns the quad 1106 // and its bounds. 1107 func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex { 1108 imgSize := layout.FPt(img.src.Bounds().Size()) 1109 sx, hx, ox, hy, sy, oy := M.Elems() 1110 transOff := f32.Pt(ox, oy) 1111 // The 4 corners of the image rectangle transformed by M, excluding its offset, are: 1112 // 1113 // q0: M * (0, 0) q3: M * (w, 0) 1114 // q1: M * (0, h) q2: M * (w, h) 1115 // 1116 // Note that q0 = M*0 = 0, q2 = q1 + q3. 1117 q0 := f32.Pt(0, 0) 1118 q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y) 1119 q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X) 1120 q2 := q1.Add(q3) 1121 q0 = q0.Add(transOff) 1122 q1 = q1.Add(transOff) 1123 q2 = q2.Add(transOff) 1124 q3 = q3.Add(transOff) 1125 1126 uvPosf := layout.FPt(uvPos) 1127 atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y)) 1128 uvBounds := f32.Rectangle{ 1129 Min: uvPosf, 1130 Max: uvPosf.Add(imgSize), 1131 } 1132 uvBounds.Min.X *= atlasScale.X 1133 uvBounds.Min.Y *= atlasScale.Y 1134 uvBounds.Max.X *= atlasScale.X 1135 uvBounds.Max.Y *= atlasScale.Y 1136 quad := [4]materialVertex{ 1137 {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y}, 1138 {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y}, 1139 {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y}, 1140 {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y}, 1141 } 1142 return quad 1143 } 1144 1145 func quadBounds(q [4]materialVertex) f32.Rectangle { 1146 q0 := f32.Pt(q[0].posX, q[0].posY) 1147 q1 := f32.Pt(q[1].posX, q[1].posY) 1148 q2 := f32.Pt(q[2].posX, q[2].posY) 1149 q3 := f32.Pt(q[3].posX, q[3].posY) 1150 return f32.Rectangle{ 1151 Min: min(min(q0, q1), min(q2, q3)), 1152 Max: max(max(q0, q1), max(q2, q3)), 1153 } 1154 } 1155 1156 func max(p1, p2 f32.Point) f32.Point { 1157 p := p1 1158 if p2.X > p.X { 1159 p.X = p2.X 1160 } 1161 if p2.Y > p.Y { 1162 p.Y = p2.Y 1163 } 1164 return p 1165 } 1166 1167 func min(p1, p2 f32.Point) f32.Point { 1168 p := p1 1169 if p2.X < p.X { 1170 p.X = p2.X 1171 } 1172 if p2.Y < p.Y { 1173 p.Y = p2.Y 1174 } 1175 return p 1176 } 1177 1178 func (enc *encoder) encodePath(verts []byte, fillMode int) { 1179 for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] { 1180 cmd := ops.DecodeCommand(verts[4:]) 1181 if cmd.Op() == scene.OpGap { 1182 if fillMode != scene.FillModeNonzero { 1183 // Skip gaps in strokes. 1184 continue 1185 } 1186 // Replace them by a straight line in outlines. 1187 cmd = scene.Line(scene.DecodeGap(cmd)) 1188 } 1189 enc.scene = append(enc.scene, cmd) 1190 enc.npathseg++ 1191 } 1192 } 1193 1194 func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error { 1195 const ( 1196 // wgSize is the largest and most common workgroup size. 1197 wgSize = 128 1198 // PARTITION_SIZE from elements.comp 1199 partitionSize = 32 * 4 1200 ) 1201 widthInBins := (tileDims.X + 15) / 16 1202 heightInBins := (tileDims.Y + 7) / 8 1203 if widthInBins*heightInBins > wgSize { 1204 return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx) 1205 } 1206 1207 enc := &g.enc 1208 // Pad scene with zeroes to avoid reading garbage in elements.comp. 1209 scenePadding := partitionSize - len(enc.scene)%partitionSize 1210 enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...) 1211 1212 scene := byteslice.Slice(enc.scene) 1213 if s := len(scene); s > g.buffers.scene.size { 1214 paddedCap := s * 11 / 10 1215 if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil { 1216 return err 1217 } 1218 } 1219 g.buffers.scene.upload(scene) 1220 1221 // alloc is the number of allocated bytes for static buffers. 1222 var alloc uint32 1223 round := func(v, quantum int) int { 1224 return (v + quantum - 1) &^ (quantum - 1) 1225 } 1226 malloc := func(size int) memAlloc { 1227 size = round(size, 4) 1228 offset := alloc 1229 alloc += uint32(size) 1230 return memAlloc{offset /*, uint32(size)*/} 1231 } 1232 1233 *g.conf = config{ 1234 n_elements: uint32(enc.npath), 1235 n_pathseg: uint32(enc.npathseg), 1236 width_in_tiles: uint32(tileDims.X), 1237 height_in_tiles: uint32(tileDims.Y), 1238 tile_alloc: malloc(enc.npath * pathSize), 1239 bin_alloc: malloc(round(enc.npath, wgSize) * binSize), 1240 ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc), 1241 pathseg_alloc: malloc(enc.npathseg * pathsegSize), 1242 anno_alloc: malloc(enc.npath * annoSize), 1243 trans_alloc: malloc(enc.ntrans * transSize), 1244 } 1245 1246 numPartitions := (enc.numElements() + 127) / 128 1247 // clearSize is the atomic partition counter plus flag and 2 states per partition. 1248 clearSize := 4 + numPartitions*stateStride 1249 if clearSize > g.buffers.state.size { 1250 paddedCap := clearSize * 11 / 10 1251 if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil { 1252 return err 1253 } 1254 } 1255 1256 confData := byteslice.Struct(g.conf) 1257 g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData)) 1258 g.buffers.config.upload(confData) 1259 1260 minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) 1261 if minSize > g.buffers.memory.size { 1262 // Add space for dynamic GPU allocations. 1263 const sizeBump = 4 * 1024 * 1024 1264 minSize += sizeBump 1265 if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil { 1266 return err 1267 } 1268 } 1269 1270 for { 1271 *g.memHeader = memoryHeader{ 1272 mem_offset: alloc, 1273 } 1274 g.buffers.memory.upload(byteslice.Struct(g.memHeader)) 1275 g.buffers.state.upload(g.zeros(clearSize)) 1276 1277 if !g.useCPU { 1278 g.ctx.BeginCompute() 1279 g.ctx.BindImageTexture(kernel4OutputUnit, dst) 1280 img := g.output.nullMaterials 1281 if images != nil { 1282 img = images.image 1283 } 1284 g.ctx.BindImageTexture(kernel4AtlasUnit, img) 1285 } else { 1286 *g.output.descriptors.Binding2() = cpuDst 1287 if images != nil { 1288 *g.output.descriptors.Binding3() = images.cpuImage 1289 } 1290 } 1291 1292 g.bindBuffers() 1293 g.memoryBarrier() 1294 g.dispatch(g.programs.elements, numPartitions, 1, 1) 1295 g.memoryBarrier() 1296 g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1) 1297 g.memoryBarrier() 1298 g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1) 1299 g.memoryBarrier() 1300 g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1) 1301 // No barrier needed between backdrop and binning. 1302 g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1) 1303 g.memoryBarrier() 1304 g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1) 1305 g.memoryBarrier() 1306 g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1) 1307 g.memoryBarrier() 1308 if !g.useCPU { 1309 g.ctx.EndCompute() 1310 } else { 1311 g.dispatcher.Sync() 1312 } 1313 1314 if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil { 1315 if err == driver.ErrContentLost { 1316 continue 1317 } 1318 return err 1319 } 1320 switch errCode := g.memHeader.mem_error; errCode { 1321 case memNoError: 1322 if g.useCPU { 1323 w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx 1324 dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride) 1325 } 1326 return nil 1327 case memMallocFailed: 1328 // Resize memory and try again. 1329 sz := g.buffers.memory.size * 15 / 10 1330 if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil { 1331 return err 1332 } 1333 continue 1334 default: 1335 return fmt.Errorf("compute: shader program failed with error %d", errCode) 1336 } 1337 } 1338 } 1339 1340 func (g *compute) memoryBarrier() { 1341 if g.useCPU { 1342 g.dispatcher.Barrier() 1343 } 1344 } 1345 1346 func (g *compute) dispatch(p computeProgram, x, y, z int) { 1347 if !g.useCPU { 1348 g.ctx.BindProgram(p.prog) 1349 g.ctx.DispatchCompute(x, y, z) 1350 } else { 1351 g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z) 1352 } 1353 } 1354 1355 // zeros returns a byte slice with size bytes of zeros. 1356 func (g *compute) zeros(size int) []byte { 1357 if cap(g.zeroSlice) < size { 1358 g.zeroSlice = append(g.zeroSlice, make([]byte, size)...) 1359 } 1360 return g.zeroSlice[:size] 1361 } 1362 1363 func (g *compute) touchAlloc(a *atlasAlloc) { 1364 if a.dead { 1365 panic("re-use of dead allocation") 1366 } 1367 a.frameCount = g.frameCount 1368 a.atlas.lastFrame = a.frameCount 1369 } 1370 1371 func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) { 1372 var ( 1373 place placement 1374 fits bool 1375 atlas = q.atlas 1376 ) 1377 if atlas != nil { 1378 place, fits = atlas.packer.tryAdd(q.size) 1379 if !fits { 1380 atlas.compact = true 1381 } 1382 } 1383 if atlas == nil { 1384 // Look for matching atlas to re-use. 1385 for _, a := range g.atlases { 1386 if q.empty && len(a.allocs) > 0 { 1387 continue 1388 } 1389 if q.nocompact && a.compact { 1390 continue 1391 } 1392 if a.format != q.format || a.bindings&q.bindings != q.bindings { 1393 continue 1394 } 1395 place, fits = a.packer.tryAdd(q.size) 1396 if !fits { 1397 a.compact = true 1398 continue 1399 } 1400 atlas = a 1401 break 1402 } 1403 } 1404 if atlas == nil { 1405 atlas = &textureAtlas{ 1406 format: q.format, 1407 bindings: q.bindings, 1408 } 1409 atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) 1410 atlas.packer.newPage() 1411 g.atlases = append(g.atlases, atlas) 1412 place, fits = atlas.packer.tryAdd(q.size) 1413 if !fits { 1414 panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size)) 1415 } 1416 } 1417 if !fits { 1418 return atlasAlloc{}, false 1419 } 1420 atlas.lastFrame = g.frameCount 1421 return atlasAlloc{ 1422 frameCount: g.frameCount, 1423 atlas: atlas, 1424 rect: image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)}, 1425 }, true 1426 } 1427 1428 func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error { 1429 defer func() { 1430 atlas.packer.maxDims = atlas.size 1431 atlas.realized = true 1432 atlas.ensureCPUImage(useCPU) 1433 }() 1434 if atlas.size.X >= size.X && atlas.size.Y >= size.Y { 1435 return nil 1436 } 1437 if atlas.realized { 1438 panic("resizing a realized atlas") 1439 } 1440 if err := atlas.resize(g.ctx, size); err != nil { 1441 return err 1442 } 1443 return nil 1444 } 1445 1446 func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error { 1447 a.Release() 1448 1449 img, err := ctx.NewTexture(a.format, size.X, size.Y, 1450 driver.FilterNearest, 1451 driver.FilterNearest, 1452 a.bindings) 1453 if err != nil { 1454 return err 1455 } 1456 a.image = img 1457 a.size = size 1458 return nil 1459 } 1460 1461 func (a *textureAtlas) ensureCPUImage(useCPU bool) { 1462 if !useCPU || a.hasCPU { 1463 return 1464 } 1465 a.hasCPU = true 1466 a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y) 1467 } 1468 1469 func (g *compute) Release() { 1470 if g.useCPU { 1471 g.dispatcher.Stop() 1472 } 1473 type resource interface { 1474 Release() 1475 } 1476 res := []resource{ 1477 g.output.nullMaterials, 1478 &g.programs.elements, 1479 &g.programs.tileAlloc, 1480 &g.programs.pathCoarse, 1481 &g.programs.backdrop, 1482 &g.programs.binning, 1483 &g.programs.coarse, 1484 &g.programs.kernel4, 1485 g.output.blitPipeline, 1486 &g.output.buffer, 1487 g.output.uniBuf, 1488 &g.buffers.scene, 1489 &g.buffers.state, 1490 &g.buffers.memory, 1491 &g.buffers.config, 1492 g.materials.pipeline, 1493 &g.materials.buffer, 1494 g.materials.uniforms.buf, 1495 g.timers.t, 1496 } 1497 for _, r := range res { 1498 if r != nil { 1499 r.Release() 1500 } 1501 } 1502 for _, a := range g.atlases { 1503 a.Release() 1504 } 1505 g.ctx.Release() 1506 *g = compute{} 1507 } 1508 1509 func (a *textureAtlas) Release() { 1510 if a.image != nil { 1511 a.image.Release() 1512 a.image = nil 1513 } 1514 a.cpuImage.Free() 1515 a.hasCPU = false 1516 } 1517 1518 func (g *compute) bindBuffers() { 1519 g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state) 1520 g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config) 1521 g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config) 1522 g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config) 1523 g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config) 1524 g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config) 1525 g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config) 1526 } 1527 1528 func (p *computeProgram) Release() { 1529 if p.prog != nil { 1530 p.prog.Release() 1531 } 1532 *p = computeProgram{} 1533 } 1534 1535 func (b *sizedBuffer) Release() { 1536 if b.buffer != nil { 1537 b.buffer.Release() 1538 } 1539 b.cpuBuf.Free() 1540 *b = sizedBuffer{} 1541 } 1542 1543 func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error { 1544 if b.size >= size { 1545 return nil 1546 } 1547 if b.buffer != nil { 1548 b.Release() 1549 } 1550 b.cpuBuf.Free() 1551 if !useCPU { 1552 buf, err := ctx.NewBuffer(binding, size) 1553 if err != nil { 1554 return err 1555 } 1556 b.buffer = buf 1557 } else { 1558 b.cpuBuf = cpu.NewBuffer(size) 1559 } 1560 b.size = size 1561 return nil 1562 } 1563 1564 func (b *sizedBuffer) download(data []byte) error { 1565 if b.buffer != nil { 1566 return b.buffer.Download(data) 1567 } else { 1568 copy(data, b.cpuBuf.Data()) 1569 return nil 1570 } 1571 } 1572 1573 func (b *sizedBuffer) upload(data []byte) { 1574 if b.buffer != nil { 1575 b.buffer.Upload(data) 1576 } else { 1577 copy(b.cpuBuf.Data(), data) 1578 } 1579 } 1580 1581 func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) { 1582 for i, buf := range buffers { 1583 if !g.useCPU { 1584 g.ctx.BindStorageBuffer(i, buf.buffer) 1585 } else { 1586 *prog.buffers[i] = buf.cpuBuf 1587 } 1588 } 1589 } 1590 1591 var bo = binary.LittleEndian 1592 1593 func (e *encoder) reset() { 1594 e.scene = e.scene[:0] 1595 e.npath = 0 1596 e.npathseg = 0 1597 e.ntrans = 0 1598 } 1599 1600 func (e *encoder) numElements() int { 1601 return len(e.scene) 1602 } 1603 1604 func (e *encoder) transform(m f32.Affine2D) { 1605 e.scene = append(e.scene, scene.Transform(m)) 1606 e.ntrans++ 1607 } 1608 1609 func (e *encoder) lineWidth(width float32) { 1610 e.scene = append(e.scene, scene.SetLineWidth(width)) 1611 } 1612 1613 func (e *encoder) fillMode(mode scene.FillMode) { 1614 e.scene = append(e.scene, scene.SetFillMode(mode)) 1615 } 1616 1617 func (e *encoder) beginClip(bbox f32.Rectangle) { 1618 e.scene = append(e.scene, scene.BeginClip(bbox)) 1619 e.npath++ 1620 } 1621 1622 func (e *encoder) endClip(bbox f32.Rectangle) { 1623 e.scene = append(e.scene, scene.EndClip(bbox)) 1624 e.npath++ 1625 } 1626 1627 func (e *encoder) rect(r f32.Rectangle) { 1628 // Rectangle corners, clock-wise. 1629 c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y) 1630 e.line(c0, c1) 1631 e.line(c1, c2) 1632 e.line(c2, c3) 1633 e.line(c3, c0) 1634 } 1635 1636 func (e *encoder) fillColor(col color.RGBA) { 1637 e.scene = append(e.scene, scene.FillColor(col)) 1638 e.npath++ 1639 } 1640 1641 func (e *encoder) fillImage(index int, offset image.Point) { 1642 e.scene = append(e.scene, scene.FillImage(index, offset)) 1643 e.npath++ 1644 } 1645 1646 func (e *encoder) line(start, end f32.Point) { 1647 e.scene = append(e.scene, scene.Line(start, end)) 1648 e.npathseg++ 1649 } 1650 1651 func (c *collector) reset() { 1652 c.prevFrame, c.frame = c.frame, c.prevFrame 1653 c.clipStates = c.clipStates[:0] 1654 c.transStack = c.transStack[:0] 1655 c.frame.reset() 1656 } 1657 1658 func (c *opsCollector) reset() { 1659 c.paths = c.paths[:0] 1660 c.clipCmds = c.clipCmds[:0] 1661 c.ops = c.ops[:0] 1662 c.layers = c.layers[:0] 1663 } 1664 1665 func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) { 1666 // Rectangle clip regions. 1667 if len(path) == 0 && !push { 1668 // If the rectangular clip region contains a previous path it can be discarded. 1669 p := state.clip 1670 t := state.relTrans.Invert() 1671 for p != nil { 1672 // rect is the parent bounds transformed relative to the rectangle. 1673 rect := transformBounds(t, p.bounds) 1674 if rect.In(bounds) { 1675 return 1676 } 1677 t = p.relTrans.Invert().Mul(t) 1678 p = p.parent 1679 } 1680 } 1681 1682 absBounds := transformBounds(state.t, bounds).Bounds() 1683 intersect := absBounds 1684 if state.clip != nil { 1685 intersect = state.clip.intersect.Intersect(intersect) 1686 } 1687 c.clipStates = append(c.clipStates, clipState{ 1688 parent: state.clip, 1689 absBounds: absBounds, 1690 path: path, 1691 pathKey: key, 1692 intersect: intersect, 1693 clipKey: clipKey{ 1694 bounds: bounds, 1695 relTrans: state.relTrans, 1696 strokeWidth: strokeWidth, 1697 pathHash: hash, 1698 }, 1699 }) 1700 state.clip = &c.clipStates[len(c.clipStates)-1] 1701 state.relTrans = f32.Affine2D{} 1702 } 1703 1704 func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) { 1705 fview := f32.Rectangle{Max: layout.FPt(viewport)} 1706 var intOps *ops.Ops 1707 if root != nil { 1708 intOps = &root.Internal 1709 } 1710 c.reader.Reset(intOps) 1711 var state encoderState 1712 reset := func() { 1713 state = encoderState{ 1714 paintKey: paintKey{ 1715 color: color.NRGBA{A: 0xff}, 1716 }, 1717 } 1718 } 1719 reset() 1720 r := &c.reader 1721 var ( 1722 pathData struct { 1723 data []byte 1724 key ops.Key 1725 hash uint64 1726 } 1727 strWidth float32 1728 ) 1729 c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false) 1730 for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() { 1731 switch ops.OpType(encOp.Data[0]) { 1732 case ops.TypeTransform: 1733 dop, push := ops.DecodeTransform(encOp.Data) 1734 if push { 1735 c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans}) 1736 } 1737 state.t = state.t.Mul(dop) 1738 state.relTrans = state.relTrans.Mul(dop) 1739 case ops.TypePopTransform: 1740 n := len(c.transStack) 1741 st := c.transStack[n-1] 1742 c.transStack = c.transStack[:n-1] 1743 state.t = st.t 1744 state.relTrans = st.relTrans 1745 case ops.TypeStroke: 1746 strWidth = decodeStrokeOp(encOp.Data) 1747 case ops.TypePath: 1748 hash := bo.Uint64(encOp.Data[1:]) 1749 encOp, ok = r.Decode() 1750 if !ok { 1751 panic("unexpected end of path operation") 1752 } 1753 pathData.data = encOp.Data[ops.TypeAuxLen:] 1754 pathData.key = encOp.Key 1755 pathData.hash = hash 1756 case ops.TypeClip: 1757 var op ops.ClipOp 1758 op.Decode(encOp.Data) 1759 bounds := f32.FRect(op.Bounds) 1760 c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true) 1761 pathData.data = nil 1762 strWidth = 0 1763 case ops.TypePopClip: 1764 state.relTrans = state.clip.relTrans.Mul(state.relTrans) 1765 state.clip = state.clip.parent 1766 case ops.TypeColor: 1767 state.matType = materialColor 1768 state.color = decodeColorOp(encOp.Data) 1769 case ops.TypeLinearGradient: 1770 state.matType = materialLinearGradient 1771 op := decodeLinearGradientOp(encOp.Data) 1772 state.stop1 = op.stop1 1773 state.stop2 = op.stop2 1774 state.color1 = op.color1 1775 state.color2 = op.color2 1776 case ops.TypeImage: 1777 state.matType = materialTexture 1778 state.image = decodeImageOp(encOp.Data, encOp.Refs) 1779 case ops.TypePaint: 1780 paintState := state 1781 if paintState.matType == materialTexture { 1782 // Clip to the bounds of the image, to hide other images in the atlas. 1783 sz := state.image.src.Rect.Size() 1784 bounds := f32.Rectangle{Max: layout.FPt(sz)} 1785 c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false) 1786 } 1787 intersect := paintState.clip.intersect 1788 if intersect.Empty() { 1789 break 1790 } 1791 1792 // If the paint is a uniform opaque color that takes up the whole 1793 // screen, it covers all previous paints and we can discard all 1794 // rendering commands recorded so far. 1795 if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 { 1796 c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque() 1797 c.clear = true 1798 c.frame.reset() 1799 break 1800 } 1801 1802 // Flatten clip stack. 1803 p := paintState.clip 1804 startIdx := len(c.frame.clipCmds) 1805 for p != nil { 1806 idx := len(c.frame.paths) 1807 c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...) 1808 path := c.frame.paths[idx:] 1809 copy(path, p.path) 1810 c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{ 1811 state: p.clipKey, 1812 path: path, 1813 pathKey: p.pathKey, 1814 absBounds: p.absBounds, 1815 }) 1816 p = p.parent 1817 } 1818 clipStack := c.frame.clipCmds[startIdx:] 1819 c.frame.ops = append(c.frame.ops, paintOp{ 1820 clipStack: clipStack, 1821 state: paintState.paintKey, 1822 intersect: intersect, 1823 }) 1824 case ops.TypeSave: 1825 id := ops.DecodeSave(encOp.Data) 1826 c.save(id, state.t) 1827 case ops.TypeLoad: 1828 reset() 1829 id := ops.DecodeLoad(encOp.Data) 1830 state.t = c.states[id] 1831 state.relTrans = state.t 1832 } 1833 } 1834 for i := range c.frame.ops { 1835 op := &c.frame.ops[i] 1836 // For each clip, cull rectangular clip regions that contain its 1837 // (transformed) bounds. addClip already handled the converse case. 1838 // TODO: do better than O(n²) to efficiently deal with deep stacks. 1839 for j := 0; j < len(op.clipStack)-1; j++ { 1840 cl := op.clipStack[j] 1841 p := cl.state 1842 r := transformBounds(p.relTrans, p.bounds) 1843 for k := j + 1; k < len(op.clipStack); k++ { 1844 cl2 := op.clipStack[k] 1845 p2 := cl2.state 1846 if len(cl2.path) == 0 && r.In(cl2.state.bounds) { 1847 op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...) 1848 k-- 1849 op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans) 1850 } 1851 r = transformRect(p2.relTrans, r) 1852 } 1853 } 1854 // Separate the integer offset from the first transform. Two ops that differ 1855 // only in integer offsets may share backing storage. 1856 if len(op.clipStack) > 0 { 1857 c := &op.clipStack[len(op.clipStack)-1] 1858 t := c.state.relTrans 1859 t, off := separateTransform(t) 1860 c.state.relTrans = t 1861 op.offset = off 1862 op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1))) 1863 } 1864 op.hash = c.hashOp(*op) 1865 op.texOpIdx = -1 1866 switch op.state.matType { 1867 case materialTexture: 1868 op.texOpIdx = len(*texOps) 1869 // Separate integer offset from transformation. TextureOps that have identical transforms 1870 // except for their integer offsets can share a transformed image. 1871 t := op.state.t.Offset(layout.FPt(op.offset)) 1872 t, off := separateTransform(t) 1873 bounds := op.intersect.Round().Sub(off) 1874 *texOps = append(*texOps, textureOp{ 1875 img: op.state.image, 1876 off: off, 1877 key: textureKey{ 1878 bounds: bounds, 1879 transform: t, 1880 handle: op.state.image.handle, 1881 }, 1882 }) 1883 } 1884 } 1885 } 1886 1887 func (c *collector) hashOp(op paintOp) uint64 { 1888 c.hasher.Reset() 1889 for _, cl := range op.clipStack { 1890 k := cl.state 1891 keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) 1892 c.hasher.Write(keyBytes[:]) 1893 } 1894 k := op.state 1895 keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) 1896 c.hasher.Write(keyBytes[:]) 1897 return c.hasher.Sum64() 1898 } 1899 1900 func (g *compute) layer(viewport image.Point, texOps []textureOp) { 1901 // Sort ops from previous frames by hash. 1902 c := &g.collector 1903 prevOps := c.prevFrame.ops 1904 c.order = c.order[:0] 1905 for i, op := range prevOps { 1906 c.order = append(c.order, hashIndex{ 1907 index: i, 1908 hash: op.hash, 1909 }) 1910 } 1911 sort.Slice(c.order, func(i, j int) bool { 1912 return c.order[i].hash < c.order[j].hash 1913 }) 1914 // Split layers with different materials atlas; the compute stage has only 1915 // one materials slot. 1916 splitLayer := func(ops []paintOp, prevLayerIdx int) { 1917 for len(ops) > 0 { 1918 var materials *textureAtlas 1919 idx := 0 1920 for idx < len(ops) { 1921 if i := ops[idx].texOpIdx; i != -1 { 1922 omats := texOps[i].matAlloc.alloc.atlas 1923 if materials != nil && omats != nil && omats != materials { 1924 break 1925 } 1926 materials = omats 1927 } 1928 idx++ 1929 } 1930 l := layer{ops: ops[:idx], materials: materials} 1931 if prevLayerIdx != -1 { 1932 prev := c.prevFrame.layers[prevLayerIdx] 1933 if !prev.alloc.dead && len(prev.ops) == len(l.ops) { 1934 l.alloc = prev.alloc 1935 l.materials = prev.materials 1936 g.touchAlloc(l.alloc) 1937 } 1938 } 1939 for i, op := range l.ops { 1940 l.rect = l.rect.Union(op.intersect.Round()) 1941 l.ops[i].layer = len(c.frame.layers) 1942 } 1943 c.frame.layers = append(c.frame.layers, l) 1944 ops = ops[idx:] 1945 } 1946 } 1947 ops := c.frame.ops 1948 idx := 0 1949 for idx < len(ops) { 1950 op := ops[idx] 1951 // Search for longest matching op sequence. 1952 // start is the earliest index of a match. 1953 start := searchOp(c.order, op.hash) 1954 layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:]) 1955 if len(layerOps) == 0 { 1956 idx++ 1957 continue 1958 } 1959 if unmatched := ops[:idx]; len(unmatched) > 0 { 1960 // Flush layer of unmatched ops. 1961 splitLayer(unmatched, -1) 1962 ops = ops[idx:] 1963 idx = 0 1964 } 1965 splitLayer(layerOps, prevLayerIdx) 1966 ops = ops[len(layerOps):] 1967 } 1968 if len(ops) > 0 { 1969 splitLayer(ops, -1) 1970 } 1971 } 1972 1973 func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) { 1974 longest := 0 1975 longestIdx := -1 1976 outer: 1977 for len(order) > 0 { 1978 first := order[0] 1979 order = order[1:] 1980 match := prev[first.index:] 1981 // Potential match found. Now find longest matching sequence. 1982 end := 0 1983 layer := match[0].layer 1984 off := match[0].offset.Sub(ops[0].offset) 1985 for end < len(match) && end < len(ops) { 1986 m := match[end] 1987 o := ops[end] 1988 // End layers on previous match. 1989 if m.layer != layer { 1990 break 1991 } 1992 // End layer when the next op doesn't match. 1993 if m.hash != o.hash { 1994 if end == 0 { 1995 // Hashes are sorted so if the first op doesn't match, no 1996 // more matches are possible. 1997 break outer 1998 } 1999 break 2000 } 2001 if !opEqual(off, m, o) { 2002 break 2003 } 2004 end++ 2005 } 2006 if end > longest { 2007 longest = end 2008 longestIdx = layer 2009 2010 } 2011 } 2012 return ops[:longest], longestIdx 2013 } 2014 2015 func searchOp(order []hashIndex, hash uint64) int { 2016 lo, hi := 0, len(order) 2017 for lo < hi { 2018 mid := (lo + hi) / 2 2019 if order[mid].hash < hash { 2020 lo = mid + 1 2021 } else { 2022 hi = mid 2023 } 2024 } 2025 return lo 2026 } 2027 2028 func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool { 2029 if len(o1.clipStack) != len(o2.clipStack) { 2030 return false 2031 } 2032 if o1.state != o2.state { 2033 return false 2034 } 2035 if o1.offset.Sub(o2.offset) != off { 2036 return false 2037 } 2038 for i, cl1 := range o1.clipStack { 2039 cl2 := o2.clipStack[i] 2040 if len(cl1.path) != len(cl2.path) { 2041 return false 2042 } 2043 if cl1.state != cl2.state { 2044 return false 2045 } 2046 if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) { 2047 return false 2048 } 2049 } 2050 return true 2051 } 2052 2053 func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) { 2054 off := pos.Sub(l.rect.Min) 2055 offf := layout.FPt(off) 2056 2057 enc.transform(f32.Affine2D{}.Offset(offf)) 2058 for _, op := range l.ops { 2059 encodeOp(viewport, off, enc, texOps, op) 2060 } 2061 enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1))) 2062 } 2063 2064 func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) { 2065 // Fill in clip bounds, which the shaders expect to be the union 2066 // of all affected bounds. 2067 var union f32.Rectangle 2068 for i, cl := range op.clipStack { 2069 union = union.Union(cl.absBounds) 2070 op.clipStack[i].union = union 2071 } 2072 2073 absOfff := layout.FPt(absOff) 2074 fillMode := scene.FillModeNonzero 2075 opOff := layout.FPt(op.offset) 2076 inv := f32.Affine2D{}.Offset(opOff) 2077 enc.transform(inv) 2078 for i := len(op.clipStack) - 1; i >= 0; i-- { 2079 cl := op.clipStack[i] 2080 if w := cl.state.strokeWidth; w > 0 { 2081 enc.fillMode(scene.FillModeStroke) 2082 enc.lineWidth(w) 2083 fillMode = scene.FillModeStroke 2084 } else if fillMode != scene.FillModeNonzero { 2085 enc.fillMode(scene.FillModeNonzero) 2086 fillMode = scene.FillModeNonzero 2087 } 2088 enc.transform(cl.state.relTrans) 2089 inv = inv.Mul(cl.state.relTrans) 2090 if len(cl.path) == 0 { 2091 enc.rect(cl.state.bounds) 2092 } else { 2093 enc.encodePath(cl.path, fillMode) 2094 } 2095 if i != 0 { 2096 enc.beginClip(cl.union.Add(absOfff)) 2097 } 2098 } 2099 if len(op.clipStack) == 0 { 2100 // No clipping; fill the entire view. 2101 enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) 2102 } 2103 2104 switch op.state.matType { 2105 case materialTexture: 2106 texOp := texOps[op.texOpIdx] 2107 off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff) 2108 enc.fillImage(0, off) 2109 case materialColor: 2110 enc.fillColor(f32color.NRGBAToRGBA(op.state.color)) 2111 case materialLinearGradient: 2112 // TODO: implement. 2113 enc.fillColor(f32color.NRGBAToRGBA(op.state.color1)) 2114 default: 2115 panic("not implemented") 2116 } 2117 enc.transform(inv.Invert()) 2118 // Pop the clip stack, except the first entry used for fill. 2119 for i := 1; i < len(op.clipStack); i++ { 2120 cl := op.clipStack[i] 2121 enc.endClip(cl.union.Add(absOfff)) 2122 } 2123 if fillMode != scene.FillModeNonzero { 2124 enc.fillMode(scene.FillModeNonzero) 2125 } 2126 } 2127 2128 func (c *collector) save(id int, state f32.Affine2D) { 2129 if extra := id - len(c.states) + 1; extra > 0 { 2130 c.states = append(c.states, make([]f32.Affine2D, extra)...) 2131 } 2132 c.states[id] = state 2133 } 2134 2135 func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle { 2136 return rectangle{ 2137 t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)), 2138 t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)), 2139 } 2140 } 2141 2142 func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) { 2143 sx, hx, ox, hy, sy, oy := t.Elems() 2144 intx, fracx := math.Modf(float64(ox)) 2145 inty, fracy := math.Modf(float64(oy)) 2146 t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy)) 2147 return t, image.Pt(int(intx), int(inty)) 2148 } 2149 2150 func transformRect(t f32.Affine2D, r rectangle) rectangle { 2151 var tr rectangle 2152 for i, c := range r { 2153 tr[i] = t.Transform(c) 2154 } 2155 return tr 2156 } 2157 2158 func (r rectangle) In(b f32.Rectangle) bool { 2159 for _, c := range r { 2160 inside := b.Min.X <= c.X && c.X <= b.Max.X && 2161 b.Min.Y <= c.Y && c.Y <= b.Max.Y 2162 if !inside { 2163 return false 2164 } 2165 } 2166 return true 2167 } 2168 2169 func (r rectangle) Contains(b f32.Rectangle) bool { 2170 return true 2171 } 2172 2173 func (r rectangle) Bounds() f32.Rectangle { 2174 bounds := f32.Rectangle{ 2175 Min: f32.Pt(math.MaxFloat32, math.MaxFloat32), 2176 Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32), 2177 } 2178 for _, c := range r { 2179 if c.X < bounds.Min.X { 2180 bounds.Min.X = c.X 2181 } 2182 if c.Y < bounds.Min.Y { 2183 bounds.Min.Y = c.Y 2184 } 2185 if c.X > bounds.Max.X { 2186 bounds.Max.X = c.X 2187 } 2188 if c.Y > bounds.Max.Y { 2189 bounds.Max.Y = c.Y 2190 } 2191 } 2192 return bounds 2193 }