github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/compute.go (about) 1 // SPDX-License-Identifier: Unlicense OR MIT 2 3 package gpu 4 5 import ( 6 "encoding/binary" 7 "errors" 8 "fmt" 9 "image" 10 "image/color" 11 "math/bits" 12 "time" 13 "unsafe" 14 15 "github.com/cybriq/giocore/f32" 16 "github.com/cybriq/giocore/gpu/internal/driver" 17 "github.com/cybriq/giocore/internal/byteslice" 18 "github.com/cybriq/giocore/internal/f32color" 19 "github.com/cybriq/giocore/internal/ops" 20 "github.com/cybriq/giocore/internal/scene" 21 "github.com/cybriq/giocore/op" 22 layout "github.com/cybriq/giocore/utils" 23 ) 24 25 type compute struct { 26 ctx driver.Device 27 enc encoder 28 29 drawOps drawOps 30 texOps []textureOp 31 cache *resourceCache 32 maxTextureDim int 33 34 programs struct { 35 elements driver.Program 36 tileAlloc driver.Program 37 pathCoarse driver.Program 38 backdrop driver.Program 39 binning driver.Program 40 coarse driver.Program 41 kernel4 driver.Program 42 } 43 buffers struct { 44 config driver.Buffer 45 scene sizedBuffer 46 state sizedBuffer 47 memory sizedBuffer 48 } 49 output struct { 50 size image.Point 51 // image is the output texture. Note that it is in RGBA format, 52 // but contains data in sRGB. See blitOutput for more detail. 53 image driver.Texture 54 blitProg driver.Program 55 } 56 // images contains ImageOp images packed into a texture atlas. 57 images struct { 58 packer packer 59 // positions maps imageOpData.handles to positions inside tex. 60 positions map[interface{}]image.Point 61 tex driver.Texture 62 } 63 // materials contains the pre-processed materials (transformed images for 64 // now, gradients etc. later) packed in a texture atlas. The atlas is used 65 // as source in kernel4. 66 materials struct { 67 // offsets maps texture ops to the offsets to put in their FillImage commands. 68 offsets map[textureKey]image.Point 69 70 prog driver.Program 71 layout driver.InputLayout 72 73 packer packer 74 75 tex driver.Texture 76 fbo driver.Framebuffer 77 quads []materialVertex 78 79 buffer sizedBuffer 80 81 uniforms *materialUniforms 82 uniBuf driver.Buffer 83 } 84 timers struct { 85 profile string 86 t *timers 87 materials *timer 88 elements *timer 89 tileAlloc *timer 90 pathCoarse *timer 91 backdropBinning *timer 92 coarse *timer 93 kernel4 *timer 94 blit *timer 95 } 96 97 // The following fields hold scratch space to avoid garbage. 98 zeroSlice []byte 99 memHeader *memoryHeader 100 conf *config 101 } 102 103 type materialUniforms struct { 104 scale [2]float32 105 pos [2]float32 106 } 107 108 // materialVertex describes a vertex of a quad used to render a transformed 109 // material. 110 type materialVertex struct { 111 posX, posY float32 112 u, v float32 113 } 114 115 // textureKey identifies textureOp. 116 type textureKey struct { 117 handle interface{} 118 transform f32.Affine2D 119 } 120 121 // textureOp represents an imageOp that requires texture space. 122 type textureOp struct { 123 // sceneIdx is the index in the scene that contains the fill image command 124 // that corresponds to the operation. 125 sceneIdx int 126 key textureKey 127 img imageOpData 128 129 // pos is the position of the untransformed image in the images texture. 130 pos image.Point 131 } 132 133 type encoder struct { 134 scene []scene.Command 135 npath int 136 npathseg int 137 ntrans int 138 } 139 140 type encodeState struct { 141 trans f32.Affine2D 142 clip f32.Rectangle 143 } 144 145 type sizedBuffer struct { 146 size int 147 buffer driver.Buffer 148 } 149 150 // config matches Config in setup.h 151 type config struct { 152 n_elements uint32 // paths 153 n_pathseg uint32 154 width_in_tiles uint32 155 height_in_tiles uint32 156 tile_alloc memAlloc 157 bin_alloc memAlloc 158 ptcl_alloc memAlloc 159 pathseg_alloc memAlloc 160 anno_alloc memAlloc 161 trans_alloc memAlloc 162 } 163 164 // memAlloc matches Alloc in mem.h 165 type memAlloc struct { 166 offset uint32 167 //size uint32 168 } 169 170 // memoryHeader matches the header of Memory in mem.h. 171 type memoryHeader struct { 172 mem_offset uint32 173 mem_error uint32 174 } 175 176 // GPU structure sizes and constants. 177 const ( 178 tileWidthPx = 32 179 tileHeightPx = 32 180 ptclInitialAlloc = 1024 181 kernel4OutputUnit = 2 182 kernel4AtlasUnit = 3 183 184 pathSize = 12 185 binSize = 8 186 pathsegSize = 52 187 annoSize = 32 188 transSize = 24 189 stateSize = 60 190 stateStride = 4 + 2*stateSize 191 ) 192 193 // mem.h constants. 194 const ( 195 memNoError = 0 // NO_ERROR 196 memMallocFailed = 1 // ERR_MALLOC_FAILED 197 ) 198 199 func newCompute(ctx driver.Device) (*compute, error) { 200 maxDim := ctx.Caps().MaxTextureSize 201 // Large atlas textures cause artifacts due to precision loss in 202 // shaders. 203 if cap := 8192; maxDim > cap { 204 maxDim = cap 205 } 206 g := &compute{ 207 ctx: ctx, 208 cache: newResourceCache(), 209 maxTextureDim: maxDim, 210 conf: new(config), 211 memHeader: new(memoryHeader), 212 } 213 214 blitProg, err := ctx.NewProgram(shader_copy_vert, shader_copy_frag) 215 if err != nil { 216 g.Release() 217 return nil, err 218 } 219 g.output.blitProg = blitProg 220 221 materialProg, err := ctx.NewProgram(shader_material_vert, shader_material_frag) 222 if err != nil { 223 g.Release() 224 return nil, err 225 } 226 g.materials.prog = materialProg 227 progLayout, err := ctx.NewInputLayout(shader_material_vert, []driver.InputDesc{ 228 {Type: driver.DataTypeFloat, Size: 2, Offset: 0}, 229 {Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2}, 230 }) 231 if err != nil { 232 g.Release() 233 return nil, err 234 } 235 g.materials.layout = progLayout 236 g.materials.uniforms = new(materialUniforms) 237 238 buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms))) 239 if err != nil { 240 g.Release() 241 return nil, err 242 } 243 g.materials.uniBuf = buf 244 g.materials.prog.SetVertexUniforms(buf) 245 246 g.drawOps.pathCache = newOpCache() 247 g.drawOps.compute = true 248 249 buf, err = ctx.NewBuffer(driver.BufferBindingShaderStorage, int(unsafe.Sizeof(config{}))) 250 if err != nil { 251 g.Release() 252 return nil, err 253 } 254 g.buffers.config = buf 255 256 shaders := []struct { 257 prog *driver.Program 258 src driver.ShaderSources 259 }{ 260 {&g.programs.elements, shader_elements_comp}, 261 {&g.programs.tileAlloc, shader_tile_alloc_comp}, 262 {&g.programs.pathCoarse, shader_path_coarse_comp}, 263 {&g.programs.backdrop, shader_backdrop_comp}, 264 {&g.programs.binning, shader_binning_comp}, 265 {&g.programs.coarse, shader_coarse_comp}, 266 {&g.programs.kernel4, shader_kernel4_comp}, 267 } 268 for _, shader := range shaders { 269 p, err := ctx.NewComputeProgram(shader.src) 270 if err != nil { 271 g.Release() 272 return nil, err 273 } 274 *shader.prog = p 275 } 276 return g, nil 277 } 278 279 func (g *compute) Collect(viewport image.Point, ops *op.Ops) { 280 g.drawOps.reset(g.cache, viewport) 281 g.drawOps.collect(g.ctx, g.cache, ops, viewport) 282 for _, img := range g.drawOps.allImageOps { 283 expandPathOp(img.path, img.clip) 284 } 285 g.encode(viewport) 286 } 287 288 func (g *compute) Clear(col color.NRGBA) { 289 g.drawOps.clear = true 290 g.drawOps.clearColor = f32color.LinearFromSRGB(col) 291 } 292 293 func (g *compute) Frame() error { 294 viewport := g.drawOps.viewport 295 tileDims := image.Point{ 296 X: (viewport.X + tileWidthPx - 1) / tileWidthPx, 297 Y: (viewport.Y + tileHeightPx - 1) / tileHeightPx, 298 } 299 300 defFBO := g.ctx.BeginFrame(g.drawOps.clear, viewport) 301 defer g.ctx.EndFrame() 302 303 if g.drawOps.profile && g.timers.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) { 304 t := &g.timers 305 t.t = newTimers(g.ctx) 306 t.materials = g.timers.t.newTimer() 307 t.elements = g.timers.t.newTimer() 308 t.tileAlloc = g.timers.t.newTimer() 309 t.pathCoarse = g.timers.t.newTimer() 310 t.backdropBinning = g.timers.t.newTimer() 311 t.coarse = g.timers.t.newTimer() 312 t.kernel4 = g.timers.t.newTimer() 313 t.blit = g.timers.t.newTimer() 314 } 315 316 mat := g.timers.materials 317 mat.begin() 318 if err := g.uploadImages(); err != nil { 319 return err 320 } 321 if err := g.renderMaterials(); err != nil { 322 return err 323 } 324 mat.end() 325 if err := g.render(tileDims); err != nil { 326 return err 327 } 328 g.ctx.BindFramebuffer(defFBO) 329 g.blitOutput(viewport) 330 g.cache.frame() 331 g.drawOps.pathCache.frame() 332 t := &g.timers 333 if g.drawOps.profile && t.t.ready() { 334 mat := t.materials.Elapsed 335 et, tat, pct, bbt := t.elements.Elapsed, t.tileAlloc.Elapsed, t.pathCoarse.Elapsed, t.backdropBinning.Elapsed 336 ct, k4t := t.coarse.Elapsed, t.kernel4.Elapsed 337 blit := t.blit.Elapsed 338 ft := mat + et + tat + pct + bbt + ct + k4t + blit 339 q := 100 * time.Microsecond 340 ft = ft.Round(q) 341 mat = mat.Round(q) 342 et, tat, pct, bbt = et.Round(q), tat.Round(q), pct.Round(q), bbt.Round(q) 343 ct, k4t = ct.Round(q), k4t.Round(q) 344 blit = blit.Round(q) 345 t.profile = fmt.Sprintf("ft:%7s mat: %7s et:%7s tat:%7s pct:%7s bbt:%7s ct:%7s k4t:%7s blit:%7s", ft, mat, et, tat, pct, bbt, ct, k4t, blit) 346 } 347 g.drawOps.clear = false 348 return nil 349 } 350 351 func (g *compute) Profile() string { 352 return g.timers.profile 353 } 354 355 // blitOutput copies the compute render output to the output FBO. We need to 356 // copy because compute shaders can only write to textures, not FBOs. Compute 357 // shader can only write to RGBA textures, but since we actually render in sRGB 358 // format we can't use glBlitFramebuffer, because it does sRGB conversion. 359 func (g *compute) blitOutput(viewport image.Point) { 360 t := g.timers.blit 361 t.begin() 362 if !g.drawOps.clear { 363 g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha) 364 g.ctx.SetBlend(true) 365 defer g.ctx.SetBlend(false) 366 } 367 g.ctx.Viewport(0, 0, viewport.X, viewport.Y) 368 g.ctx.BindTexture(0, g.output.image) 369 g.ctx.BindProgram(g.output.blitProg) 370 g.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4) 371 t.end() 372 } 373 374 func (g *compute) encode(viewport image.Point) { 375 g.texOps = g.texOps[:0] 376 g.enc.reset() 377 378 // Flip Y-axis. 379 flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y))) 380 g.enc.transform(flipY) 381 if g.drawOps.clear { 382 g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) 383 g.enc.fillColor(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB())) 384 } 385 g.encodeOps(flipY, viewport, g.drawOps.allImageOps) 386 } 387 388 func (g *compute) renderMaterials() error { 389 m := &g.materials 390 m.quads = m.quads[:0] 391 resize := false 392 reclaimed := false 393 restart: 394 for { 395 for _, op := range g.texOps { 396 if off, exists := m.offsets[op.key]; exists { 397 g.enc.setFillImageOffset(op.sceneIdx, off) 398 continue 399 } 400 quad, bounds := g.materialQuad(op.key.transform, op.img, op.pos) 401 402 // A material is clipped to avoid drawing outside its bounds inside the atlas. However, 403 // imprecision in the clipping may cause a single pixel overflow. Be safe. 404 size := bounds.Size().Add(image.Pt(1, 1)) 405 place, fits := m.packer.tryAdd(size) 406 if !fits { 407 m.offsets = nil 408 m.quads = m.quads[:0] 409 m.packer.clear() 410 if !reclaimed { 411 // Some images may no longer be in use, try again 412 // after clearing existing maps. 413 reclaimed = true 414 } else { 415 m.packer.maxDim += 256 416 resize = true 417 if m.packer.maxDim > g.maxTextureDim { 418 return errors.New("compute: no space left in material atlas") 419 } 420 } 421 m.packer.newPage() 422 continue restart 423 } 424 // Position quad to match place. 425 offset := place.Pos.Sub(bounds.Min) 426 offsetf := layout.FPt(offset) 427 for i := range quad { 428 quad[i].posX += offsetf.X 429 quad[i].posY += offsetf.Y 430 } 431 // Draw quad as two triangles. 432 m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2]) 433 if m.offsets == nil { 434 m.offsets = make(map[textureKey]image.Point) 435 } 436 m.offsets[op.key] = offset 437 g.enc.setFillImageOffset(op.sceneIdx, offset) 438 } 439 break 440 } 441 if len(m.quads) == 0 { 442 return nil 443 } 444 texSize := m.packer.maxDim 445 if resize { 446 if m.fbo != nil { 447 m.fbo.Release() 448 m.fbo = nil 449 } 450 if m.tex != nil { 451 m.tex.Release() 452 m.tex = nil 453 } 454 handle, err := g.ctx.NewTexture(driver.TextureFormatRGBA8, texSize, texSize, 455 driver.FilterNearest, driver.FilterNearest, 456 driver.BufferBindingShaderStorage|driver.BufferBindingFramebuffer) 457 if err != nil { 458 return fmt.Errorf("compute: failed to create material atlas: %v", err) 459 } 460 m.tex = handle 461 fbo, err := g.ctx.NewFramebuffer(handle, 0) 462 if err != nil { 463 return fmt.Errorf("compute: failed to create material framebuffer: %v", err) 464 } 465 m.fbo = fbo 466 } 467 // Transform to clip space: [-1, -1] - [1, 1]. 468 g.materials.uniforms.scale = [2]float32{2 / float32(texSize), 2 / float32(texSize)} 469 g.materials.uniforms.pos = [2]float32{-1, -1} 470 g.materials.uniBuf.Upload(byteslice.Struct(g.materials.uniforms)) 471 vertexData := byteslice.Slice(m.quads) 472 n := pow2Ceil(len(vertexData)) 473 m.buffer.ensureCapacity(g.ctx, driver.BufferBindingVertices, n) 474 m.buffer.buffer.Upload(vertexData) 475 g.ctx.BindTexture(0, g.images.tex) 476 g.ctx.BindFramebuffer(m.fbo) 477 g.ctx.Viewport(0, 0, texSize, texSize) 478 if reclaimed { 479 g.ctx.Clear(0, 0, 0, 0) 480 } 481 g.ctx.BindProgram(m.prog) 482 g.ctx.BindVertexBuffer(m.buffer.buffer, int(unsafe.Sizeof(m.quads[0])), 0) 483 g.ctx.BindInputLayout(m.layout) 484 g.ctx.DrawArrays(driver.DrawModeTriangles, 0, len(m.quads)) 485 return nil 486 } 487 488 func (g *compute) uploadImages() error { 489 // padding is the number of pixels added to the right and below 490 // images, to avoid atlas filtering artifacts. 491 const padding = 1 492 493 a := &g.images 494 var uploads map[interface{}]*image.RGBA 495 resize := false 496 reclaimed := false 497 restart: 498 for { 499 for i, op := range g.texOps { 500 if pos, exists := a.positions[op.img.handle]; exists { 501 g.texOps[i].pos = pos 502 continue 503 } 504 size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding)) 505 place, fits := a.packer.tryAdd(size) 506 if !fits { 507 a.positions = nil 508 uploads = nil 509 a.packer.clear() 510 if !reclaimed { 511 // Some images may no longer be in use, try again 512 // after clearing existing maps. 513 reclaimed = true 514 } else { 515 a.packer.maxDim += 256 516 resize = true 517 if a.packer.maxDim > g.maxTextureDim { 518 return errors.New("compute: no space left in image atlas") 519 } 520 } 521 a.packer.newPage() 522 continue restart 523 } 524 if a.positions == nil { 525 a.positions = make(map[interface{}]image.Point) 526 } 527 a.positions[op.img.handle] = place.Pos 528 g.texOps[i].pos = place.Pos 529 if uploads == nil { 530 uploads = make(map[interface{}]*image.RGBA) 531 } 532 uploads[op.img.handle] = op.img.src 533 } 534 break 535 } 536 if len(uploads) == 0 { 537 return nil 538 } 539 if resize { 540 if a.tex != nil { 541 a.tex.Release() 542 a.tex = nil 543 } 544 sz := a.packer.maxDim 545 handle, err := g.ctx.NewTexture(driver.TextureFormatSRGB, sz, sz, driver.FilterLinear, driver.FilterLinear, driver.BufferBindingTexture) 546 if err != nil { 547 return fmt.Errorf("compute: failed to create image atlas: %v", err) 548 } 549 a.tex = handle 550 } 551 for h, img := range uploads { 552 pos, ok := a.positions[h] 553 if !ok { 554 panic("compute: internal error: image not placed") 555 } 556 size := img.Bounds().Size() 557 driver.UploadImage(a.tex, pos, img) 558 rightPadding := image.Pt(padding, size.Y) 559 a.tex.Upload(image.Pt(pos.X+size.X, pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4)) 560 bottomPadding := image.Pt(size.X, padding) 561 a.tex.Upload(image.Pt(pos.X, pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4)) 562 } 563 return nil 564 } 565 566 func pow2Ceil(v int) int { 567 exp := bits.Len(uint(v)) 568 if bits.OnesCount(uint(v)) == 1 { 569 exp-- 570 } 571 return 1 << exp 572 } 573 574 // materialQuad constructs a quad that represents the transformed image. It returns the quad 575 // and its bounds. 576 func (g *compute) materialQuad(M f32.Affine2D, img imageOpData, uvPos image.Point) ([4]materialVertex, image.Rectangle) { 577 imgSize := layout.FPt(img.src.Bounds().Size()) 578 sx, hx, ox, hy, sy, oy := M.Elems() 579 transOff := f32.Pt(ox, oy) 580 // The 4 corners of the image rectangle transformed by M, excluding its offset, are: 581 // 582 // q0: M * (0, 0) q3: M * (w, 0) 583 // q1: M * (0, h) q2: M * (w, h) 584 // 585 // Note that q0 = M*0 = 0, q2 = q1 + q3. 586 q0 := f32.Pt(0, 0) 587 q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y) 588 q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X) 589 q2 := q1.Add(q3) 590 q0 = q0.Add(transOff) 591 q1 = q1.Add(transOff) 592 q2 = q2.Add(transOff) 593 q3 = q3.Add(transOff) 594 595 boundsf := f32.Rectangle{ 596 Min: min(min(q0, q1), min(q2, q3)), 597 Max: max(max(q0, q1), max(q2, q3)), 598 } 599 600 bounds := boundRectF(boundsf) 601 uvPosf := layout.FPt(uvPos) 602 atlasScale := 1 / float32(g.images.packer.maxDim) 603 uvBounds := f32.Rectangle{ 604 Min: uvPosf.Mul(atlasScale), 605 Max: uvPosf.Add(imgSize).Mul(atlasScale), 606 } 607 quad := [4]materialVertex{ 608 {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y}, 609 {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y}, 610 {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y}, 611 {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y}, 612 } 613 return quad, bounds 614 } 615 616 func max(p1, p2 f32.Point) f32.Point { 617 p := p1 618 if p2.X > p.X { 619 p.X = p2.X 620 } 621 if p2.Y > p.Y { 622 p.Y = p2.Y 623 } 624 return p 625 } 626 627 func min(p1, p2 f32.Point) f32.Point { 628 p := p1 629 if p2.X < p.X { 630 p.X = p2.X 631 } 632 if p2.Y < p.Y { 633 p.Y = p2.Y 634 } 635 return p 636 } 637 638 func (g *compute) encodeOps(trans f32.Affine2D, viewport image.Point, ops []imageOp) { 639 for _, op := range ops { 640 bounds := layout.FRect(op.clip) 641 // clip is the union of all drawing affected by the clipping 642 // operation. TODO: tighten. 643 clip := f32.Rect(0, 0, float32(viewport.X), float32(viewport.Y)) 644 nclips := g.encodeClipStack(clip, bounds, op.path, false) 645 m := op.material 646 switch m.material { 647 case materialTexture: 648 t := trans.Mul(m.trans) 649 g.texOps = append(g.texOps, textureOp{ 650 sceneIdx: len(g.enc.scene), 651 img: m.data, 652 key: textureKey{ 653 transform: t, 654 handle: m.data.handle, 655 }, 656 }) 657 // Add fill command, its offset is resolved and filled in renderMaterials. 658 g.enc.fillImage(0) 659 case materialColor: 660 g.enc.fillColor(f32color.NRGBAToRGBA(op.material.color.SRGB())) 661 case materialLinearGradient: 662 // TODO: implement. 663 g.enc.fillColor(f32color.NRGBAToRGBA(op.material.color1.SRGB())) 664 default: 665 panic("not implemented") 666 } 667 if op.path != nil && op.path.path { 668 g.enc.fillMode(scene.FillModeNonzero) 669 g.enc.transform(op.path.trans.Invert()) 670 } 671 // Pop the clip stack. 672 for i := 0; i < nclips; i++ { 673 g.enc.endClip(clip) 674 } 675 } 676 } 677 678 // encodeClips encodes a stack of clip paths and return the stack depth. 679 func (g *compute) encodeClipStack(clip, bounds f32.Rectangle, p *pathOp, begin bool) int { 680 nclips := 0 681 if p != nil && p.parent != nil { 682 nclips += g.encodeClipStack(clip, bounds, p.parent, true) 683 nclips += 1 684 } 685 isStroke := p.stroke.Width > 0 686 if p != nil && p.path { 687 if isStroke { 688 g.enc.fillMode(scene.FillModeStroke) 689 g.enc.lineWidth(p.stroke.Width) 690 } 691 pathData, _ := g.drawOps.pathCache.get(p.pathKey) 692 g.enc.transform(p.trans) 693 g.enc.append(pathData.computePath) 694 } else { 695 g.enc.rect(bounds) 696 } 697 if begin { 698 g.enc.beginClip(clip) 699 if isStroke { 700 g.enc.fillMode(scene.FillModeNonzero) 701 } 702 if p != nil && p.path { 703 g.enc.transform(p.trans.Invert()) 704 } 705 } 706 return nclips 707 } 708 709 func encodePath(verts []byte) encoder { 710 var enc encoder 711 for len(verts) >= scene.CommandSize+4 { 712 cmd := ops.DecodeCommand(verts[4:]) 713 enc.scene = append(enc.scene, cmd) 714 enc.npathseg++ 715 verts = verts[scene.CommandSize+4:] 716 } 717 return enc 718 } 719 720 func (g *compute) render(tileDims image.Point) error { 721 const ( 722 // wgSize is the largest and most common workgroup size. 723 wgSize = 128 724 // PARTITION_SIZE from elements.comp 725 partitionSize = 32 * 4 726 ) 727 widthInBins := (tileDims.X + 15) / 16 728 heightInBins := (tileDims.Y + 7) / 8 729 if widthInBins*heightInBins > wgSize { 730 return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx) 731 } 732 733 // Pad scene with zeroes to avoid reading garbage in elements.comp. 734 scenePadding := partitionSize - len(g.enc.scene)%partitionSize 735 g.enc.scene = append(g.enc.scene, make([]scene.Command, scenePadding)...) 736 737 realloced := false 738 scene := byteslice.Slice(g.enc.scene) 739 if s := len(scene); s > g.buffers.scene.size { 740 realloced = true 741 paddedCap := s * 11 / 10 742 if err := g.buffers.scene.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, paddedCap); err != nil { 743 return err 744 } 745 } 746 g.buffers.scene.buffer.Upload(scene) 747 748 w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx 749 if g.output.size.X != w || g.output.size.Y != h { 750 if err := g.resizeOutput(image.Pt(w, h)); err != nil { 751 return err 752 } 753 } 754 g.ctx.BindImageTexture(kernel4OutputUnit, g.output.image, driver.AccessWrite, driver.TextureFormatRGBA8) 755 if t := g.materials.tex; t != nil { 756 g.ctx.BindImageTexture(kernel4AtlasUnit, t, driver.AccessRead, driver.TextureFormatRGBA8) 757 } 758 759 // alloc is the number of allocated bytes for static buffers. 760 var alloc uint32 761 round := func(v, quantum int) int { 762 return (v + quantum - 1) &^ (quantum - 1) 763 } 764 malloc := func(size int) memAlloc { 765 size = round(size, 4) 766 offset := alloc 767 alloc += uint32(size) 768 return memAlloc{offset /*, uint32(size)*/} 769 } 770 771 *g.conf = config{ 772 n_elements: uint32(g.enc.npath), 773 n_pathseg: uint32(g.enc.npathseg), 774 width_in_tiles: uint32(tileDims.X), 775 height_in_tiles: uint32(tileDims.Y), 776 tile_alloc: malloc(g.enc.npath * pathSize), 777 bin_alloc: malloc(round(g.enc.npath, wgSize) * binSize), 778 ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc), 779 pathseg_alloc: malloc(g.enc.npathseg * pathsegSize), 780 anno_alloc: malloc(g.enc.npath * annoSize), 781 trans_alloc: malloc(g.enc.ntrans * transSize), 782 } 783 784 numPartitions := (g.enc.numElements() + 127) / 128 785 // clearSize is the atomic partition counter plus flag and 2 states per partition. 786 clearSize := 4 + numPartitions*stateStride 787 if clearSize > g.buffers.state.size { 788 realloced = true 789 paddedCap := clearSize * 11 / 10 790 if err := g.buffers.state.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, paddedCap); err != nil { 791 return err 792 } 793 } 794 795 g.buffers.config.Upload(byteslice.Struct(g.conf)) 796 797 minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) 798 if minSize > g.buffers.memory.size { 799 realloced = true 800 // Add space for dynamic GPU allocations. 801 const sizeBump = 4 * 1024 * 1024 802 minSize += sizeBump 803 if err := g.buffers.memory.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, minSize); err != nil { 804 return err 805 } 806 } 807 for { 808 *g.memHeader = memoryHeader{ 809 mem_offset: alloc, 810 } 811 g.buffers.memory.buffer.Upload(byteslice.Struct(g.memHeader)) 812 g.buffers.state.buffer.Upload(g.zeros(clearSize)) 813 814 if realloced { 815 realloced = false 816 g.bindBuffers() 817 } 818 t := &g.timers 819 g.ctx.MemoryBarrier() 820 t.elements.begin() 821 g.ctx.BindProgram(g.programs.elements) 822 g.ctx.DispatchCompute(numPartitions, 1, 1) 823 g.ctx.MemoryBarrier() 824 t.elements.end() 825 t.tileAlloc.begin() 826 g.ctx.BindProgram(g.programs.tileAlloc) 827 g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1) 828 g.ctx.MemoryBarrier() 829 t.tileAlloc.end() 830 t.pathCoarse.begin() 831 g.ctx.BindProgram(g.programs.pathCoarse) 832 g.ctx.DispatchCompute((g.enc.npathseg+31)/32, 1, 1) 833 g.ctx.MemoryBarrier() 834 t.pathCoarse.end() 835 t.backdropBinning.begin() 836 g.ctx.BindProgram(g.programs.backdrop) 837 g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1) 838 // No barrier needed between backdrop and binning. 839 g.ctx.BindProgram(g.programs.binning) 840 g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1) 841 g.ctx.MemoryBarrier() 842 t.backdropBinning.end() 843 t.coarse.begin() 844 g.ctx.BindProgram(g.programs.coarse) 845 g.ctx.DispatchCompute(widthInBins, heightInBins, 1) 846 g.ctx.MemoryBarrier() 847 t.coarse.end() 848 t.kernel4.begin() 849 g.ctx.BindProgram(g.programs.kernel4) 850 g.ctx.DispatchCompute(tileDims.X, tileDims.Y, 1) 851 g.ctx.MemoryBarrier() 852 t.kernel4.end() 853 854 if err := g.buffers.memory.buffer.Download(byteslice.Struct(g.memHeader)); err != nil { 855 if err == driver.ErrContentLost { 856 continue 857 } 858 return err 859 } 860 switch errCode := g.memHeader.mem_error; errCode { 861 case memNoError: 862 return nil 863 case memMallocFailed: 864 // Resize memory and try again. 865 realloced = true 866 sz := g.buffers.memory.size * 15 / 10 867 if err := g.buffers.memory.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, sz); err != nil { 868 return err 869 } 870 continue 871 default: 872 return fmt.Errorf("compute: shader program failed with error %d", errCode) 873 } 874 } 875 } 876 877 // zeros returns a byte slice with size bytes of zeros. 878 func (g *compute) zeros(size int) []byte { 879 if cap(g.zeroSlice) < size { 880 g.zeroSlice = append(g.zeroSlice, make([]byte, size)...) 881 } 882 return g.zeroSlice[:size] 883 } 884 885 func (g *compute) resizeOutput(size image.Point) error { 886 if g.output.image != nil { 887 g.output.image.Release() 888 g.output.image = nil 889 } 890 img, err := g.ctx.NewTexture(driver.TextureFormatRGBA8, size.X, size.Y, 891 driver.FilterNearest, 892 driver.FilterNearest, 893 driver.BufferBindingShaderStorage|driver.BufferBindingTexture) 894 if err != nil { 895 return err 896 } 897 g.output.image = img 898 g.output.size = size 899 return nil 900 } 901 902 func (g *compute) Release() { 903 if g.drawOps.pathCache != nil { 904 g.drawOps.pathCache.release() 905 } 906 if g.cache != nil { 907 g.cache.release() 908 } 909 progs := []driver.Program{ 910 g.programs.elements, 911 g.programs.tileAlloc, 912 g.programs.pathCoarse, 913 g.programs.backdrop, 914 g.programs.binning, 915 g.programs.coarse, 916 g.programs.kernel4, 917 } 918 if p := g.output.blitProg; p != nil { 919 p.Release() 920 } 921 for _, p := range progs { 922 if p != nil { 923 p.Release() 924 } 925 } 926 g.buffers.scene.release() 927 g.buffers.state.release() 928 g.buffers.memory.release() 929 if b := g.buffers.config; b != nil { 930 b.Release() 931 } 932 if g.output.image != nil { 933 g.output.image.Release() 934 } 935 if g.images.tex != nil { 936 g.images.tex.Release() 937 } 938 if g.materials.layout != nil { 939 g.materials.layout.Release() 940 } 941 if g.materials.prog != nil { 942 g.materials.prog.Release() 943 } 944 if g.materials.fbo != nil { 945 g.materials.fbo.Release() 946 } 947 if g.materials.tex != nil { 948 g.materials.tex.Release() 949 } 950 g.materials.buffer.release() 951 if b := g.materials.uniBuf; b != nil { 952 b.Release() 953 } 954 if g.timers.t != nil { 955 g.timers.t.release() 956 } 957 958 *g = compute{} 959 } 960 961 func (g *compute) bindBuffers() { 962 bindStorageBuffers(g.programs.elements, g.buffers.memory.buffer, g.buffers.config, g.buffers.scene.buffer, g.buffers.state.buffer) 963 bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory.buffer, g.buffers.config) 964 bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory.buffer, g.buffers.config) 965 bindStorageBuffers(g.programs.backdrop, g.buffers.memory.buffer, g.buffers.config) 966 bindStorageBuffers(g.programs.binning, g.buffers.memory.buffer, g.buffers.config) 967 bindStorageBuffers(g.programs.coarse, g.buffers.memory.buffer, g.buffers.config) 968 bindStorageBuffers(g.programs.kernel4, g.buffers.memory.buffer, g.buffers.config) 969 } 970 971 func (b *sizedBuffer) release() { 972 if b.buffer == nil { 973 return 974 } 975 b.buffer.Release() 976 *b = sizedBuffer{} 977 } 978 979 func (b *sizedBuffer) ensureCapacity(ctx driver.Device, binding driver.BufferBinding, size int) error { 980 if b.size >= size { 981 return nil 982 } 983 if b.buffer != nil { 984 b.release() 985 } 986 buf, err := ctx.NewBuffer(binding, size) 987 if err != nil { 988 return err 989 } 990 b.buffer = buf 991 b.size = size 992 return nil 993 } 994 995 func bindStorageBuffers(prog driver.Program, buffers ...driver.Buffer) { 996 for i, buf := range buffers { 997 prog.SetStorageBuffer(i, buf) 998 } 999 } 1000 1001 var bo = binary.LittleEndian 1002 1003 func (e *encoder) reset() { 1004 e.scene = e.scene[:0] 1005 e.npath = 0 1006 e.npathseg = 0 1007 e.ntrans = 0 1008 } 1009 1010 func (e *encoder) numElements() int { 1011 return len(e.scene) 1012 } 1013 1014 func (e *encoder) append(e2 encoder) { 1015 e.scene = append(e.scene, e2.scene...) 1016 e.npath += e2.npath 1017 e.npathseg += e2.npathseg 1018 e.ntrans += e2.ntrans 1019 } 1020 1021 func (e *encoder) transform(m f32.Affine2D) { 1022 e.scene = append(e.scene, scene.Transform(m)) 1023 e.ntrans++ 1024 } 1025 1026 func (e *encoder) lineWidth(width float32) { 1027 e.scene = append(e.scene, scene.SetLineWidth(width)) 1028 } 1029 1030 func (e *encoder) fillMode(mode scene.FillMode) { 1031 e.scene = append(e.scene, scene.SetFillMode(mode)) 1032 } 1033 1034 func (e *encoder) beginClip(bbox f32.Rectangle) { 1035 e.scene = append(e.scene, scene.BeginClip(bbox)) 1036 e.npath++ 1037 } 1038 1039 func (e *encoder) endClip(bbox f32.Rectangle) { 1040 e.scene = append(e.scene, scene.EndClip(bbox)) 1041 e.npath++ 1042 } 1043 1044 func (e *encoder) rect(r f32.Rectangle) { 1045 // Rectangle corners, clock-wise. 1046 c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y) 1047 e.line(c0, c1) 1048 e.line(c1, c2) 1049 e.line(c2, c3) 1050 e.line(c3, c0) 1051 } 1052 1053 func (e *encoder) fillColor(col color.RGBA) { 1054 e.scene = append(e.scene, scene.FillColor(col)) 1055 e.npath++ 1056 } 1057 1058 func (e *encoder) setFillImageOffset(index int, offset image.Point) { 1059 x := int16(offset.X) 1060 y := int16(offset.Y) 1061 e.scene[index][2] = uint32(uint16(x)) | uint32(uint16(y))<<16 1062 } 1063 1064 func (e *encoder) fillImage(index int) { 1065 e.scene = append(e.scene, scene.FillImage(index)) 1066 e.npath++ 1067 } 1068 1069 func (e *encoder) line(start, end f32.Point) { 1070 e.scene = append(e.scene, scene.Line(start, end)) 1071 e.npathseg++ 1072 } 1073 1074 func (e *encoder) quad(start, ctrl, end f32.Point) { 1075 e.scene = append(e.scene, scene.Quad(start, ctrl, end)) 1076 e.npathseg++ 1077 }