github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/compute.go (about)

     1  // SPDX-License-Identifier: Unlicense OR MIT
     2  
     3  package gpu
     4  
     5  import (
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"image"
    10  	"image/color"
    11  	"math/bits"
    12  	"time"
    13  	"unsafe"
    14  
    15  	"github.com/cybriq/giocore/f32"
    16  	"github.com/cybriq/giocore/gpu/internal/driver"
    17  	"github.com/cybriq/giocore/internal/byteslice"
    18  	"github.com/cybriq/giocore/internal/f32color"
    19  	"github.com/cybriq/giocore/internal/ops"
    20  	"github.com/cybriq/giocore/internal/scene"
    21  	"github.com/cybriq/giocore/op"
    22  	layout "github.com/cybriq/giocore/utils"
    23  )
    24  
    25  type compute struct {
    26  	ctx driver.Device
    27  	enc encoder
    28  
    29  	drawOps       drawOps
    30  	texOps        []textureOp
    31  	cache         *resourceCache
    32  	maxTextureDim int
    33  
    34  	programs struct {
    35  		elements   driver.Program
    36  		tileAlloc  driver.Program
    37  		pathCoarse driver.Program
    38  		backdrop   driver.Program
    39  		binning    driver.Program
    40  		coarse     driver.Program
    41  		kernel4    driver.Program
    42  	}
    43  	buffers struct {
    44  		config driver.Buffer
    45  		scene  sizedBuffer
    46  		state  sizedBuffer
    47  		memory sizedBuffer
    48  	}
    49  	output struct {
    50  		size image.Point
    51  		// image is the output texture. Note that it is in RGBA format,
    52  		// but contains data in sRGB. See blitOutput for more detail.
    53  		image    driver.Texture
    54  		blitProg driver.Program
    55  	}
    56  	// images contains ImageOp images packed into a texture atlas.
    57  	images struct {
    58  		packer packer
    59  		// positions maps imageOpData.handles to positions inside tex.
    60  		positions map[interface{}]image.Point
    61  		tex       driver.Texture
    62  	}
    63  	// materials contains the pre-processed materials (transformed images for
    64  	// now, gradients etc. later) packed in a texture atlas. The atlas is used
    65  	// as source in kernel4.
    66  	materials struct {
    67  		// offsets maps texture ops to the offsets to put in their FillImage commands.
    68  		offsets map[textureKey]image.Point
    69  
    70  		prog   driver.Program
    71  		layout driver.InputLayout
    72  
    73  		packer packer
    74  
    75  		tex   driver.Texture
    76  		fbo   driver.Framebuffer
    77  		quads []materialVertex
    78  
    79  		buffer sizedBuffer
    80  
    81  		uniforms *materialUniforms
    82  		uniBuf   driver.Buffer
    83  	}
    84  	timers struct {
    85  		profile         string
    86  		t               *timers
    87  		materials       *timer
    88  		elements        *timer
    89  		tileAlloc       *timer
    90  		pathCoarse      *timer
    91  		backdropBinning *timer
    92  		coarse          *timer
    93  		kernel4         *timer
    94  		blit            *timer
    95  	}
    96  
    97  	// The following fields hold scratch space to avoid garbage.
    98  	zeroSlice []byte
    99  	memHeader *memoryHeader
   100  	conf      *config
   101  }
   102  
   103  type materialUniforms struct {
   104  	scale [2]float32
   105  	pos   [2]float32
   106  }
   107  
   108  // materialVertex describes a vertex of a quad used to render a transformed
   109  // material.
   110  type materialVertex struct {
   111  	posX, posY float32
   112  	u, v       float32
   113  }
   114  
   115  // textureKey identifies textureOp.
   116  type textureKey struct {
   117  	handle    interface{}
   118  	transform f32.Affine2D
   119  }
   120  
   121  // textureOp represents an imageOp that requires texture space.
   122  type textureOp struct {
   123  	// sceneIdx is the index in the scene that contains the fill image command
   124  	// that corresponds to the operation.
   125  	sceneIdx int
   126  	key      textureKey
   127  	img      imageOpData
   128  
   129  	// pos is the position of the untransformed image in the images texture.
   130  	pos image.Point
   131  }
   132  
   133  type encoder struct {
   134  	scene    []scene.Command
   135  	npath    int
   136  	npathseg int
   137  	ntrans   int
   138  }
   139  
   140  type encodeState struct {
   141  	trans f32.Affine2D
   142  	clip  f32.Rectangle
   143  }
   144  
   145  type sizedBuffer struct {
   146  	size   int
   147  	buffer driver.Buffer
   148  }
   149  
   150  // config matches Config in setup.h
   151  type config struct {
   152  	n_elements      uint32 // paths
   153  	n_pathseg       uint32
   154  	width_in_tiles  uint32
   155  	height_in_tiles uint32
   156  	tile_alloc      memAlloc
   157  	bin_alloc       memAlloc
   158  	ptcl_alloc      memAlloc
   159  	pathseg_alloc   memAlloc
   160  	anno_alloc      memAlloc
   161  	trans_alloc     memAlloc
   162  }
   163  
   164  // memAlloc matches Alloc in mem.h
   165  type memAlloc struct {
   166  	offset uint32
   167  	//size   uint32
   168  }
   169  
   170  // memoryHeader matches the header of Memory in mem.h.
   171  type memoryHeader struct {
   172  	mem_offset uint32
   173  	mem_error  uint32
   174  }
   175  
   176  // GPU structure sizes and constants.
   177  const (
   178  	tileWidthPx       = 32
   179  	tileHeightPx      = 32
   180  	ptclInitialAlloc  = 1024
   181  	kernel4OutputUnit = 2
   182  	kernel4AtlasUnit  = 3
   183  
   184  	pathSize    = 12
   185  	binSize     = 8
   186  	pathsegSize = 52
   187  	annoSize    = 32
   188  	transSize   = 24
   189  	stateSize   = 60
   190  	stateStride = 4 + 2*stateSize
   191  )
   192  
   193  // mem.h constants.
   194  const (
   195  	memNoError      = 0 // NO_ERROR
   196  	memMallocFailed = 1 // ERR_MALLOC_FAILED
   197  )
   198  
   199  func newCompute(ctx driver.Device) (*compute, error) {
   200  	maxDim := ctx.Caps().MaxTextureSize
   201  	// Large atlas textures cause artifacts due to precision loss in
   202  	// shaders.
   203  	if cap := 8192; maxDim > cap {
   204  		maxDim = cap
   205  	}
   206  	g := &compute{
   207  		ctx:           ctx,
   208  		cache:         newResourceCache(),
   209  		maxTextureDim: maxDim,
   210  		conf:          new(config),
   211  		memHeader:     new(memoryHeader),
   212  	}
   213  
   214  	blitProg, err := ctx.NewProgram(shader_copy_vert, shader_copy_frag)
   215  	if err != nil {
   216  		g.Release()
   217  		return nil, err
   218  	}
   219  	g.output.blitProg = blitProg
   220  
   221  	materialProg, err := ctx.NewProgram(shader_material_vert, shader_material_frag)
   222  	if err != nil {
   223  		g.Release()
   224  		return nil, err
   225  	}
   226  	g.materials.prog = materialProg
   227  	progLayout, err := ctx.NewInputLayout(shader_material_vert, []driver.InputDesc{
   228  		{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
   229  		{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
   230  	})
   231  	if err != nil {
   232  		g.Release()
   233  		return nil, err
   234  	}
   235  	g.materials.layout = progLayout
   236  	g.materials.uniforms = new(materialUniforms)
   237  
   238  	buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms)))
   239  	if err != nil {
   240  		g.Release()
   241  		return nil, err
   242  	}
   243  	g.materials.uniBuf = buf
   244  	g.materials.prog.SetVertexUniforms(buf)
   245  
   246  	g.drawOps.pathCache = newOpCache()
   247  	g.drawOps.compute = true
   248  
   249  	buf, err = ctx.NewBuffer(driver.BufferBindingShaderStorage, int(unsafe.Sizeof(config{})))
   250  	if err != nil {
   251  		g.Release()
   252  		return nil, err
   253  	}
   254  	g.buffers.config = buf
   255  
   256  	shaders := []struct {
   257  		prog *driver.Program
   258  		src  driver.ShaderSources
   259  	}{
   260  		{&g.programs.elements, shader_elements_comp},
   261  		{&g.programs.tileAlloc, shader_tile_alloc_comp},
   262  		{&g.programs.pathCoarse, shader_path_coarse_comp},
   263  		{&g.programs.backdrop, shader_backdrop_comp},
   264  		{&g.programs.binning, shader_binning_comp},
   265  		{&g.programs.coarse, shader_coarse_comp},
   266  		{&g.programs.kernel4, shader_kernel4_comp},
   267  	}
   268  	for _, shader := range shaders {
   269  		p, err := ctx.NewComputeProgram(shader.src)
   270  		if err != nil {
   271  			g.Release()
   272  			return nil, err
   273  		}
   274  		*shader.prog = p
   275  	}
   276  	return g, nil
   277  }
   278  
   279  func (g *compute) Collect(viewport image.Point, ops *op.Ops) {
   280  	g.drawOps.reset(g.cache, viewport)
   281  	g.drawOps.collect(g.ctx, g.cache, ops, viewport)
   282  	for _, img := range g.drawOps.allImageOps {
   283  		expandPathOp(img.path, img.clip)
   284  	}
   285  	g.encode(viewport)
   286  }
   287  
   288  func (g *compute) Clear(col color.NRGBA) {
   289  	g.drawOps.clear = true
   290  	g.drawOps.clearColor = f32color.LinearFromSRGB(col)
   291  }
   292  
   293  func (g *compute) Frame() error {
   294  	viewport := g.drawOps.viewport
   295  	tileDims := image.Point{
   296  		X: (viewport.X + tileWidthPx - 1) / tileWidthPx,
   297  		Y: (viewport.Y + tileHeightPx - 1) / tileHeightPx,
   298  	}
   299  
   300  	defFBO := g.ctx.BeginFrame(g.drawOps.clear, viewport)
   301  	defer g.ctx.EndFrame()
   302  
   303  	if g.drawOps.profile && g.timers.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
   304  		t := &g.timers
   305  		t.t = newTimers(g.ctx)
   306  		t.materials = g.timers.t.newTimer()
   307  		t.elements = g.timers.t.newTimer()
   308  		t.tileAlloc = g.timers.t.newTimer()
   309  		t.pathCoarse = g.timers.t.newTimer()
   310  		t.backdropBinning = g.timers.t.newTimer()
   311  		t.coarse = g.timers.t.newTimer()
   312  		t.kernel4 = g.timers.t.newTimer()
   313  		t.blit = g.timers.t.newTimer()
   314  	}
   315  
   316  	mat := g.timers.materials
   317  	mat.begin()
   318  	if err := g.uploadImages(); err != nil {
   319  		return err
   320  	}
   321  	if err := g.renderMaterials(); err != nil {
   322  		return err
   323  	}
   324  	mat.end()
   325  	if err := g.render(tileDims); err != nil {
   326  		return err
   327  	}
   328  	g.ctx.BindFramebuffer(defFBO)
   329  	g.blitOutput(viewport)
   330  	g.cache.frame()
   331  	g.drawOps.pathCache.frame()
   332  	t := &g.timers
   333  	if g.drawOps.profile && t.t.ready() {
   334  		mat := t.materials.Elapsed
   335  		et, tat, pct, bbt := t.elements.Elapsed, t.tileAlloc.Elapsed, t.pathCoarse.Elapsed, t.backdropBinning.Elapsed
   336  		ct, k4t := t.coarse.Elapsed, t.kernel4.Elapsed
   337  		blit := t.blit.Elapsed
   338  		ft := mat + et + tat + pct + bbt + ct + k4t + blit
   339  		q := 100 * time.Microsecond
   340  		ft = ft.Round(q)
   341  		mat = mat.Round(q)
   342  		et, tat, pct, bbt = et.Round(q), tat.Round(q), pct.Round(q), bbt.Round(q)
   343  		ct, k4t = ct.Round(q), k4t.Round(q)
   344  		blit = blit.Round(q)
   345  		t.profile = fmt.Sprintf("ft:%7s mat: %7s et:%7s tat:%7s pct:%7s bbt:%7s ct:%7s k4t:%7s blit:%7s", ft, mat, et, tat, pct, bbt, ct, k4t, blit)
   346  	}
   347  	g.drawOps.clear = false
   348  	return nil
   349  }
   350  
   351  func (g *compute) Profile() string {
   352  	return g.timers.profile
   353  }
   354  
   355  // blitOutput copies the compute render output to the output FBO. We need to
   356  // copy because compute shaders can only write to textures, not FBOs. Compute
   357  // shader can only write to RGBA textures, but since we actually render in sRGB
   358  // format we can't use glBlitFramebuffer, because it does sRGB conversion.
   359  func (g *compute) blitOutput(viewport image.Point) {
   360  	t := g.timers.blit
   361  	t.begin()
   362  	if !g.drawOps.clear {
   363  		g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha)
   364  		g.ctx.SetBlend(true)
   365  		defer g.ctx.SetBlend(false)
   366  	}
   367  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
   368  	g.ctx.BindTexture(0, g.output.image)
   369  	g.ctx.BindProgram(g.output.blitProg)
   370  	g.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4)
   371  	t.end()
   372  }
   373  
   374  func (g *compute) encode(viewport image.Point) {
   375  	g.texOps = g.texOps[:0]
   376  	g.enc.reset()
   377  
   378  	// Flip Y-axis.
   379  	flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y)))
   380  	g.enc.transform(flipY)
   381  	if g.drawOps.clear {
   382  		g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)})
   383  		g.enc.fillColor(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB()))
   384  	}
   385  	g.encodeOps(flipY, viewport, g.drawOps.allImageOps)
   386  }
   387  
   388  func (g *compute) renderMaterials() error {
   389  	m := &g.materials
   390  	m.quads = m.quads[:0]
   391  	resize := false
   392  	reclaimed := false
   393  restart:
   394  	for {
   395  		for _, op := range g.texOps {
   396  			if off, exists := m.offsets[op.key]; exists {
   397  				g.enc.setFillImageOffset(op.sceneIdx, off)
   398  				continue
   399  			}
   400  			quad, bounds := g.materialQuad(op.key.transform, op.img, op.pos)
   401  
   402  			// A material is clipped to avoid drawing outside its bounds inside the atlas. However,
   403  			// imprecision in the clipping may cause a single pixel overflow. Be safe.
   404  			size := bounds.Size().Add(image.Pt(1, 1))
   405  			place, fits := m.packer.tryAdd(size)
   406  			if !fits {
   407  				m.offsets = nil
   408  				m.quads = m.quads[:0]
   409  				m.packer.clear()
   410  				if !reclaimed {
   411  					// Some images may no longer be in use, try again
   412  					// after clearing existing maps.
   413  					reclaimed = true
   414  				} else {
   415  					m.packer.maxDim += 256
   416  					resize = true
   417  					if m.packer.maxDim > g.maxTextureDim {
   418  						return errors.New("compute: no space left in material atlas")
   419  					}
   420  				}
   421  				m.packer.newPage()
   422  				continue restart
   423  			}
   424  			// Position quad to match place.
   425  			offset := place.Pos.Sub(bounds.Min)
   426  			offsetf := layout.FPt(offset)
   427  			for i := range quad {
   428  				quad[i].posX += offsetf.X
   429  				quad[i].posY += offsetf.Y
   430  			}
   431  			// Draw quad as two triangles.
   432  			m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2])
   433  			if m.offsets == nil {
   434  				m.offsets = make(map[textureKey]image.Point)
   435  			}
   436  			m.offsets[op.key] = offset
   437  			g.enc.setFillImageOffset(op.sceneIdx, offset)
   438  		}
   439  		break
   440  	}
   441  	if len(m.quads) == 0 {
   442  		return nil
   443  	}
   444  	texSize := m.packer.maxDim
   445  	if resize {
   446  		if m.fbo != nil {
   447  			m.fbo.Release()
   448  			m.fbo = nil
   449  		}
   450  		if m.tex != nil {
   451  			m.tex.Release()
   452  			m.tex = nil
   453  		}
   454  		handle, err := g.ctx.NewTexture(driver.TextureFormatRGBA8, texSize, texSize,
   455  			driver.FilterNearest, driver.FilterNearest,
   456  			driver.BufferBindingShaderStorage|driver.BufferBindingFramebuffer)
   457  		if err != nil {
   458  			return fmt.Errorf("compute: failed to create material atlas: %v", err)
   459  		}
   460  		m.tex = handle
   461  		fbo, err := g.ctx.NewFramebuffer(handle, 0)
   462  		if err != nil {
   463  			return fmt.Errorf("compute: failed to create material framebuffer: %v", err)
   464  		}
   465  		m.fbo = fbo
   466  	}
   467  	// Transform to clip space: [-1, -1] - [1, 1].
   468  	g.materials.uniforms.scale = [2]float32{2 / float32(texSize), 2 / float32(texSize)}
   469  	g.materials.uniforms.pos = [2]float32{-1, -1}
   470  	g.materials.uniBuf.Upload(byteslice.Struct(g.materials.uniforms))
   471  	vertexData := byteslice.Slice(m.quads)
   472  	n := pow2Ceil(len(vertexData))
   473  	m.buffer.ensureCapacity(g.ctx, driver.BufferBindingVertices, n)
   474  	m.buffer.buffer.Upload(vertexData)
   475  	g.ctx.BindTexture(0, g.images.tex)
   476  	g.ctx.BindFramebuffer(m.fbo)
   477  	g.ctx.Viewport(0, 0, texSize, texSize)
   478  	if reclaimed {
   479  		g.ctx.Clear(0, 0, 0, 0)
   480  	}
   481  	g.ctx.BindProgram(m.prog)
   482  	g.ctx.BindVertexBuffer(m.buffer.buffer, int(unsafe.Sizeof(m.quads[0])), 0)
   483  	g.ctx.BindInputLayout(m.layout)
   484  	g.ctx.DrawArrays(driver.DrawModeTriangles, 0, len(m.quads))
   485  	return nil
   486  }
   487  
   488  func (g *compute) uploadImages() error {
   489  	// padding is the number of pixels added to the right and below
   490  	// images, to avoid atlas filtering artifacts.
   491  	const padding = 1
   492  
   493  	a := &g.images
   494  	var uploads map[interface{}]*image.RGBA
   495  	resize := false
   496  	reclaimed := false
   497  restart:
   498  	for {
   499  		for i, op := range g.texOps {
   500  			if pos, exists := a.positions[op.img.handle]; exists {
   501  				g.texOps[i].pos = pos
   502  				continue
   503  			}
   504  			size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding))
   505  			place, fits := a.packer.tryAdd(size)
   506  			if !fits {
   507  				a.positions = nil
   508  				uploads = nil
   509  				a.packer.clear()
   510  				if !reclaimed {
   511  					// Some images may no longer be in use, try again
   512  					// after clearing existing maps.
   513  					reclaimed = true
   514  				} else {
   515  					a.packer.maxDim += 256
   516  					resize = true
   517  					if a.packer.maxDim > g.maxTextureDim {
   518  						return errors.New("compute: no space left in image atlas")
   519  					}
   520  				}
   521  				a.packer.newPage()
   522  				continue restart
   523  			}
   524  			if a.positions == nil {
   525  				a.positions = make(map[interface{}]image.Point)
   526  			}
   527  			a.positions[op.img.handle] = place.Pos
   528  			g.texOps[i].pos = place.Pos
   529  			if uploads == nil {
   530  				uploads = make(map[interface{}]*image.RGBA)
   531  			}
   532  			uploads[op.img.handle] = op.img.src
   533  		}
   534  		break
   535  	}
   536  	if len(uploads) == 0 {
   537  		return nil
   538  	}
   539  	if resize {
   540  		if a.tex != nil {
   541  			a.tex.Release()
   542  			a.tex = nil
   543  		}
   544  		sz := a.packer.maxDim
   545  		handle, err := g.ctx.NewTexture(driver.TextureFormatSRGB, sz, sz, driver.FilterLinear, driver.FilterLinear, driver.BufferBindingTexture)
   546  		if err != nil {
   547  			return fmt.Errorf("compute: failed to create image atlas: %v", err)
   548  		}
   549  		a.tex = handle
   550  	}
   551  	for h, img := range uploads {
   552  		pos, ok := a.positions[h]
   553  		if !ok {
   554  			panic("compute: internal error: image not placed")
   555  		}
   556  		size := img.Bounds().Size()
   557  		driver.UploadImage(a.tex, pos, img)
   558  		rightPadding := image.Pt(padding, size.Y)
   559  		a.tex.Upload(image.Pt(pos.X+size.X, pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4))
   560  		bottomPadding := image.Pt(size.X, padding)
   561  		a.tex.Upload(image.Pt(pos.X, pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4))
   562  	}
   563  	return nil
   564  }
   565  
   566  func pow2Ceil(v int) int {
   567  	exp := bits.Len(uint(v))
   568  	if bits.OnesCount(uint(v)) == 1 {
   569  		exp--
   570  	}
   571  	return 1 << exp
   572  }
   573  
   574  // materialQuad constructs a quad that represents the transformed image. It returns the quad
   575  // and its bounds.
   576  func (g *compute) materialQuad(M f32.Affine2D, img imageOpData, uvPos image.Point) ([4]materialVertex, image.Rectangle) {
   577  	imgSize := layout.FPt(img.src.Bounds().Size())
   578  	sx, hx, ox, hy, sy, oy := M.Elems()
   579  	transOff := f32.Pt(ox, oy)
   580  	// The 4 corners of the image rectangle transformed by M, excluding its offset, are:
   581  	//
   582  	// q0: M * (0, 0)   q3: M * (w, 0)
   583  	// q1: M * (0, h)   q2: M * (w, h)
   584  	//
   585  	// Note that q0 = M*0 = 0, q2 = q1 + q3.
   586  	q0 := f32.Pt(0, 0)
   587  	q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y)
   588  	q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X)
   589  	q2 := q1.Add(q3)
   590  	q0 = q0.Add(transOff)
   591  	q1 = q1.Add(transOff)
   592  	q2 = q2.Add(transOff)
   593  	q3 = q3.Add(transOff)
   594  
   595  	boundsf := f32.Rectangle{
   596  		Min: min(min(q0, q1), min(q2, q3)),
   597  		Max: max(max(q0, q1), max(q2, q3)),
   598  	}
   599  
   600  	bounds := boundRectF(boundsf)
   601  	uvPosf := layout.FPt(uvPos)
   602  	atlasScale := 1 / float32(g.images.packer.maxDim)
   603  	uvBounds := f32.Rectangle{
   604  		Min: uvPosf.Mul(atlasScale),
   605  		Max: uvPosf.Add(imgSize).Mul(atlasScale),
   606  	}
   607  	quad := [4]materialVertex{
   608  		{posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y},
   609  		{posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y},
   610  		{posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y},
   611  		{posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y},
   612  	}
   613  	return quad, bounds
   614  }
   615  
   616  func max(p1, p2 f32.Point) f32.Point {
   617  	p := p1
   618  	if p2.X > p.X {
   619  		p.X = p2.X
   620  	}
   621  	if p2.Y > p.Y {
   622  		p.Y = p2.Y
   623  	}
   624  	return p
   625  }
   626  
   627  func min(p1, p2 f32.Point) f32.Point {
   628  	p := p1
   629  	if p2.X < p.X {
   630  		p.X = p2.X
   631  	}
   632  	if p2.Y < p.Y {
   633  		p.Y = p2.Y
   634  	}
   635  	return p
   636  }
   637  
   638  func (g *compute) encodeOps(trans f32.Affine2D, viewport image.Point, ops []imageOp) {
   639  	for _, op := range ops {
   640  		bounds := layout.FRect(op.clip)
   641  		// clip is the union of all drawing affected by the clipping
   642  		// operation. TODO: tighten.
   643  		clip := f32.Rect(0, 0, float32(viewport.X), float32(viewport.Y))
   644  		nclips := g.encodeClipStack(clip, bounds, op.path, false)
   645  		m := op.material
   646  		switch m.material {
   647  		case materialTexture:
   648  			t := trans.Mul(m.trans)
   649  			g.texOps = append(g.texOps, textureOp{
   650  				sceneIdx: len(g.enc.scene),
   651  				img:      m.data,
   652  				key: textureKey{
   653  					transform: t,
   654  					handle:    m.data.handle,
   655  				},
   656  			})
   657  			// Add fill command, its offset is resolved and filled in renderMaterials.
   658  			g.enc.fillImage(0)
   659  		case materialColor:
   660  			g.enc.fillColor(f32color.NRGBAToRGBA(op.material.color.SRGB()))
   661  		case materialLinearGradient:
   662  			// TODO: implement.
   663  			g.enc.fillColor(f32color.NRGBAToRGBA(op.material.color1.SRGB()))
   664  		default:
   665  			panic("not implemented")
   666  		}
   667  		if op.path != nil && op.path.path {
   668  			g.enc.fillMode(scene.FillModeNonzero)
   669  			g.enc.transform(op.path.trans.Invert())
   670  		}
   671  		// Pop the clip stack.
   672  		for i := 0; i < nclips; i++ {
   673  			g.enc.endClip(clip)
   674  		}
   675  	}
   676  }
   677  
   678  // encodeClips encodes a stack of clip paths and return the stack depth.
   679  func (g *compute) encodeClipStack(clip, bounds f32.Rectangle, p *pathOp, begin bool) int {
   680  	nclips := 0
   681  	if p != nil && p.parent != nil {
   682  		nclips += g.encodeClipStack(clip, bounds, p.parent, true)
   683  		nclips += 1
   684  	}
   685  	isStroke := p.stroke.Width > 0
   686  	if p != nil && p.path {
   687  		if isStroke {
   688  			g.enc.fillMode(scene.FillModeStroke)
   689  			g.enc.lineWidth(p.stroke.Width)
   690  		}
   691  		pathData, _ := g.drawOps.pathCache.get(p.pathKey)
   692  		g.enc.transform(p.trans)
   693  		g.enc.append(pathData.computePath)
   694  	} else {
   695  		g.enc.rect(bounds)
   696  	}
   697  	if begin {
   698  		g.enc.beginClip(clip)
   699  		if isStroke {
   700  			g.enc.fillMode(scene.FillModeNonzero)
   701  		}
   702  		if p != nil && p.path {
   703  			g.enc.transform(p.trans.Invert())
   704  		}
   705  	}
   706  	return nclips
   707  }
   708  
   709  func encodePath(verts []byte) encoder {
   710  	var enc encoder
   711  	for len(verts) >= scene.CommandSize+4 {
   712  		cmd := ops.DecodeCommand(verts[4:])
   713  		enc.scene = append(enc.scene, cmd)
   714  		enc.npathseg++
   715  		verts = verts[scene.CommandSize+4:]
   716  	}
   717  	return enc
   718  }
   719  
   720  func (g *compute) render(tileDims image.Point) error {
   721  	const (
   722  		// wgSize is the largest and most common workgroup size.
   723  		wgSize = 128
   724  		// PARTITION_SIZE from elements.comp
   725  		partitionSize = 32 * 4
   726  	)
   727  	widthInBins := (tileDims.X + 15) / 16
   728  	heightInBins := (tileDims.Y + 7) / 8
   729  	if widthInBins*heightInBins > wgSize {
   730  		return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx)
   731  	}
   732  
   733  	// Pad scene with zeroes to avoid reading garbage in elements.comp.
   734  	scenePadding := partitionSize - len(g.enc.scene)%partitionSize
   735  	g.enc.scene = append(g.enc.scene, make([]scene.Command, scenePadding)...)
   736  
   737  	realloced := false
   738  	scene := byteslice.Slice(g.enc.scene)
   739  	if s := len(scene); s > g.buffers.scene.size {
   740  		realloced = true
   741  		paddedCap := s * 11 / 10
   742  		if err := g.buffers.scene.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, paddedCap); err != nil {
   743  			return err
   744  		}
   745  	}
   746  	g.buffers.scene.buffer.Upload(scene)
   747  
   748  	w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx
   749  	if g.output.size.X != w || g.output.size.Y != h {
   750  		if err := g.resizeOutput(image.Pt(w, h)); err != nil {
   751  			return err
   752  		}
   753  	}
   754  	g.ctx.BindImageTexture(kernel4OutputUnit, g.output.image, driver.AccessWrite, driver.TextureFormatRGBA8)
   755  	if t := g.materials.tex; t != nil {
   756  		g.ctx.BindImageTexture(kernel4AtlasUnit, t, driver.AccessRead, driver.TextureFormatRGBA8)
   757  	}
   758  
   759  	// alloc is the number of allocated bytes for static buffers.
   760  	var alloc uint32
   761  	round := func(v, quantum int) int {
   762  		return (v + quantum - 1) &^ (quantum - 1)
   763  	}
   764  	malloc := func(size int) memAlloc {
   765  		size = round(size, 4)
   766  		offset := alloc
   767  		alloc += uint32(size)
   768  		return memAlloc{offset /*, uint32(size)*/}
   769  	}
   770  
   771  	*g.conf = config{
   772  		n_elements:      uint32(g.enc.npath),
   773  		n_pathseg:       uint32(g.enc.npathseg),
   774  		width_in_tiles:  uint32(tileDims.X),
   775  		height_in_tiles: uint32(tileDims.Y),
   776  		tile_alloc:      malloc(g.enc.npath * pathSize),
   777  		bin_alloc:       malloc(round(g.enc.npath, wgSize) * binSize),
   778  		ptcl_alloc:      malloc(tileDims.X * tileDims.Y * ptclInitialAlloc),
   779  		pathseg_alloc:   malloc(g.enc.npathseg * pathsegSize),
   780  		anno_alloc:      malloc(g.enc.npath * annoSize),
   781  		trans_alloc:     malloc(g.enc.ntrans * transSize),
   782  	}
   783  
   784  	numPartitions := (g.enc.numElements() + 127) / 128
   785  	// clearSize is the atomic partition counter plus flag and 2 states per partition.
   786  	clearSize := 4 + numPartitions*stateStride
   787  	if clearSize > g.buffers.state.size {
   788  		realloced = true
   789  		paddedCap := clearSize * 11 / 10
   790  		if err := g.buffers.state.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, paddedCap); err != nil {
   791  			return err
   792  		}
   793  	}
   794  
   795  	g.buffers.config.Upload(byteslice.Struct(g.conf))
   796  
   797  	minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc)
   798  	if minSize > g.buffers.memory.size {
   799  		realloced = true
   800  		// Add space for dynamic GPU allocations.
   801  		const sizeBump = 4 * 1024 * 1024
   802  		minSize += sizeBump
   803  		if err := g.buffers.memory.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, minSize); err != nil {
   804  			return err
   805  		}
   806  	}
   807  	for {
   808  		*g.memHeader = memoryHeader{
   809  			mem_offset: alloc,
   810  		}
   811  		g.buffers.memory.buffer.Upload(byteslice.Struct(g.memHeader))
   812  		g.buffers.state.buffer.Upload(g.zeros(clearSize))
   813  
   814  		if realloced {
   815  			realloced = false
   816  			g.bindBuffers()
   817  		}
   818  		t := &g.timers
   819  		g.ctx.MemoryBarrier()
   820  		t.elements.begin()
   821  		g.ctx.BindProgram(g.programs.elements)
   822  		g.ctx.DispatchCompute(numPartitions, 1, 1)
   823  		g.ctx.MemoryBarrier()
   824  		t.elements.end()
   825  		t.tileAlloc.begin()
   826  		g.ctx.BindProgram(g.programs.tileAlloc)
   827  		g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
   828  		g.ctx.MemoryBarrier()
   829  		t.tileAlloc.end()
   830  		t.pathCoarse.begin()
   831  		g.ctx.BindProgram(g.programs.pathCoarse)
   832  		g.ctx.DispatchCompute((g.enc.npathseg+31)/32, 1, 1)
   833  		g.ctx.MemoryBarrier()
   834  		t.pathCoarse.end()
   835  		t.backdropBinning.begin()
   836  		g.ctx.BindProgram(g.programs.backdrop)
   837  		g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
   838  		// No barrier needed between backdrop and binning.
   839  		g.ctx.BindProgram(g.programs.binning)
   840  		g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
   841  		g.ctx.MemoryBarrier()
   842  		t.backdropBinning.end()
   843  		t.coarse.begin()
   844  		g.ctx.BindProgram(g.programs.coarse)
   845  		g.ctx.DispatchCompute(widthInBins, heightInBins, 1)
   846  		g.ctx.MemoryBarrier()
   847  		t.coarse.end()
   848  		t.kernel4.begin()
   849  		g.ctx.BindProgram(g.programs.kernel4)
   850  		g.ctx.DispatchCompute(tileDims.X, tileDims.Y, 1)
   851  		g.ctx.MemoryBarrier()
   852  		t.kernel4.end()
   853  
   854  		if err := g.buffers.memory.buffer.Download(byteslice.Struct(g.memHeader)); err != nil {
   855  			if err == driver.ErrContentLost {
   856  				continue
   857  			}
   858  			return err
   859  		}
   860  		switch errCode := g.memHeader.mem_error; errCode {
   861  		case memNoError:
   862  			return nil
   863  		case memMallocFailed:
   864  			// Resize memory and try again.
   865  			realloced = true
   866  			sz := g.buffers.memory.size * 15 / 10
   867  			if err := g.buffers.memory.ensureCapacity(g.ctx, driver.BufferBindingShaderStorage, sz); err != nil {
   868  				return err
   869  			}
   870  			continue
   871  		default:
   872  			return fmt.Errorf("compute: shader program failed with error %d", errCode)
   873  		}
   874  	}
   875  }
   876  
   877  // zeros returns a byte slice with size bytes of zeros.
   878  func (g *compute) zeros(size int) []byte {
   879  	if cap(g.zeroSlice) < size {
   880  		g.zeroSlice = append(g.zeroSlice, make([]byte, size)...)
   881  	}
   882  	return g.zeroSlice[:size]
   883  }
   884  
   885  func (g *compute) resizeOutput(size image.Point) error {
   886  	if g.output.image != nil {
   887  		g.output.image.Release()
   888  		g.output.image = nil
   889  	}
   890  	img, err := g.ctx.NewTexture(driver.TextureFormatRGBA8, size.X, size.Y,
   891  		driver.FilterNearest,
   892  		driver.FilterNearest,
   893  		driver.BufferBindingShaderStorage|driver.BufferBindingTexture)
   894  	if err != nil {
   895  		return err
   896  	}
   897  	g.output.image = img
   898  	g.output.size = size
   899  	return nil
   900  }
   901  
   902  func (g *compute) Release() {
   903  	if g.drawOps.pathCache != nil {
   904  		g.drawOps.pathCache.release()
   905  	}
   906  	if g.cache != nil {
   907  		g.cache.release()
   908  	}
   909  	progs := []driver.Program{
   910  		g.programs.elements,
   911  		g.programs.tileAlloc,
   912  		g.programs.pathCoarse,
   913  		g.programs.backdrop,
   914  		g.programs.binning,
   915  		g.programs.coarse,
   916  		g.programs.kernel4,
   917  	}
   918  	if p := g.output.blitProg; p != nil {
   919  		p.Release()
   920  	}
   921  	for _, p := range progs {
   922  		if p != nil {
   923  			p.Release()
   924  		}
   925  	}
   926  	g.buffers.scene.release()
   927  	g.buffers.state.release()
   928  	g.buffers.memory.release()
   929  	if b := g.buffers.config; b != nil {
   930  		b.Release()
   931  	}
   932  	if g.output.image != nil {
   933  		g.output.image.Release()
   934  	}
   935  	if g.images.tex != nil {
   936  		g.images.tex.Release()
   937  	}
   938  	if g.materials.layout != nil {
   939  		g.materials.layout.Release()
   940  	}
   941  	if g.materials.prog != nil {
   942  		g.materials.prog.Release()
   943  	}
   944  	if g.materials.fbo != nil {
   945  		g.materials.fbo.Release()
   946  	}
   947  	if g.materials.tex != nil {
   948  		g.materials.tex.Release()
   949  	}
   950  	g.materials.buffer.release()
   951  	if b := g.materials.uniBuf; b != nil {
   952  		b.Release()
   953  	}
   954  	if g.timers.t != nil {
   955  		g.timers.t.release()
   956  	}
   957  
   958  	*g = compute{}
   959  }
   960  
   961  func (g *compute) bindBuffers() {
   962  	bindStorageBuffers(g.programs.elements, g.buffers.memory.buffer, g.buffers.config, g.buffers.scene.buffer, g.buffers.state.buffer)
   963  	bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory.buffer, g.buffers.config)
   964  	bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory.buffer, g.buffers.config)
   965  	bindStorageBuffers(g.programs.backdrop, g.buffers.memory.buffer, g.buffers.config)
   966  	bindStorageBuffers(g.programs.binning, g.buffers.memory.buffer, g.buffers.config)
   967  	bindStorageBuffers(g.programs.coarse, g.buffers.memory.buffer, g.buffers.config)
   968  	bindStorageBuffers(g.programs.kernel4, g.buffers.memory.buffer, g.buffers.config)
   969  }
   970  
   971  func (b *sizedBuffer) release() {
   972  	if b.buffer == nil {
   973  		return
   974  	}
   975  	b.buffer.Release()
   976  	*b = sizedBuffer{}
   977  }
   978  
   979  func (b *sizedBuffer) ensureCapacity(ctx driver.Device, binding driver.BufferBinding, size int) error {
   980  	if b.size >= size {
   981  		return nil
   982  	}
   983  	if b.buffer != nil {
   984  		b.release()
   985  	}
   986  	buf, err := ctx.NewBuffer(binding, size)
   987  	if err != nil {
   988  		return err
   989  	}
   990  	b.buffer = buf
   991  	b.size = size
   992  	return nil
   993  }
   994  
   995  func bindStorageBuffers(prog driver.Program, buffers ...driver.Buffer) {
   996  	for i, buf := range buffers {
   997  		prog.SetStorageBuffer(i, buf)
   998  	}
   999  }
  1000  
  1001  var bo = binary.LittleEndian
  1002  
  1003  func (e *encoder) reset() {
  1004  	e.scene = e.scene[:0]
  1005  	e.npath = 0
  1006  	e.npathseg = 0
  1007  	e.ntrans = 0
  1008  }
  1009  
  1010  func (e *encoder) numElements() int {
  1011  	return len(e.scene)
  1012  }
  1013  
  1014  func (e *encoder) append(e2 encoder) {
  1015  	e.scene = append(e.scene, e2.scene...)
  1016  	e.npath += e2.npath
  1017  	e.npathseg += e2.npathseg
  1018  	e.ntrans += e2.ntrans
  1019  }
  1020  
  1021  func (e *encoder) transform(m f32.Affine2D) {
  1022  	e.scene = append(e.scene, scene.Transform(m))
  1023  	e.ntrans++
  1024  }
  1025  
  1026  func (e *encoder) lineWidth(width float32) {
  1027  	e.scene = append(e.scene, scene.SetLineWidth(width))
  1028  }
  1029  
  1030  func (e *encoder) fillMode(mode scene.FillMode) {
  1031  	e.scene = append(e.scene, scene.SetFillMode(mode))
  1032  }
  1033  
  1034  func (e *encoder) beginClip(bbox f32.Rectangle) {
  1035  	e.scene = append(e.scene, scene.BeginClip(bbox))
  1036  	e.npath++
  1037  }
  1038  
  1039  func (e *encoder) endClip(bbox f32.Rectangle) {
  1040  	e.scene = append(e.scene, scene.EndClip(bbox))
  1041  	e.npath++
  1042  }
  1043  
  1044  func (e *encoder) rect(r f32.Rectangle) {
  1045  	// Rectangle corners, clock-wise.
  1046  	c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y)
  1047  	e.line(c0, c1)
  1048  	e.line(c1, c2)
  1049  	e.line(c2, c3)
  1050  	e.line(c3, c0)
  1051  }
  1052  
  1053  func (e *encoder) fillColor(col color.RGBA) {
  1054  	e.scene = append(e.scene, scene.FillColor(col))
  1055  	e.npath++
  1056  }
  1057  
  1058  func (e *encoder) setFillImageOffset(index int, offset image.Point) {
  1059  	x := int16(offset.X)
  1060  	y := int16(offset.Y)
  1061  	e.scene[index][2] = uint32(uint16(x)) | uint32(uint16(y))<<16
  1062  }
  1063  
  1064  func (e *encoder) fillImage(index int) {
  1065  	e.scene = append(e.scene, scene.FillImage(index))
  1066  	e.npath++
  1067  }
  1068  
  1069  func (e *encoder) line(start, end f32.Point) {
  1070  	e.scene = append(e.scene, scene.Line(start, end))
  1071  	e.npathseg++
  1072  }
  1073  
  1074  func (e *encoder) quad(start, ctrl, end f32.Point) {
  1075  	e.scene = append(e.scene, scene.Quad(start, ctrl, end))
  1076  	e.npathseg++
  1077  }