github.com/Seikaijyu/gio@v0.0.1/gpu/compute.go (about)

     1  // SPDX-License-Identifier: Unlicense OR MIT
     2  
     3  package gpu
     4  
     5  import (
     6  	"bytes"
     7  	"encoding/binary"
     8  	"errors"
     9  	"fmt"
    10  	"hash/maphash"
    11  	"image"
    12  	"image/color"
    13  	"image/draw"
    14  	"image/png"
    15  	"math"
    16  	"math/bits"
    17  	"os"
    18  	"runtime"
    19  	"sort"
    20  	"time"
    21  	"unsafe"
    22  
    23  	"gioui.org/cpu"
    24  	"gioui.org/shader"
    25  	"gioui.org/shader/gio"
    26  	"gioui.org/shader/piet"
    27  	"github.com/Seikaijyu/gio/gpu/internal/driver"
    28  	"github.com/Seikaijyu/gio/internal/byteslice"
    29  	"github.com/Seikaijyu/gio/internal/f32"
    30  	"github.com/Seikaijyu/gio/internal/f32color"
    31  	"github.com/Seikaijyu/gio/internal/ops"
    32  	"github.com/Seikaijyu/gio/internal/scene"
    33  	"github.com/Seikaijyu/gio/layout"
    34  	"github.com/Seikaijyu/gio/op"
    35  )
    36  
    37  type compute struct {
    38  	ctx driver.Device
    39  
    40  	collector     collector
    41  	enc           encoder
    42  	texOps        []textureOp
    43  	viewport      image.Point
    44  	maxTextureDim int
    45  	srgb          bool
    46  	atlases       []*textureAtlas
    47  	frameCount    uint
    48  	moves         []atlasMove
    49  
    50  	programs struct {
    51  		elements   computeProgram
    52  		tileAlloc  computeProgram
    53  		pathCoarse computeProgram
    54  		backdrop   computeProgram
    55  		binning    computeProgram
    56  		coarse     computeProgram
    57  		kernel4    computeProgram
    58  	}
    59  	buffers struct {
    60  		config sizedBuffer
    61  		scene  sizedBuffer
    62  		state  sizedBuffer
    63  		memory sizedBuffer
    64  	}
    65  	output struct {
    66  		blitPipeline driver.Pipeline
    67  
    68  		buffer sizedBuffer
    69  
    70  		uniforms *copyUniforms
    71  		uniBuf   driver.Buffer
    72  
    73  		layerVertices []layerVertex
    74  		descriptors   *piet.Kernel4DescriptorSetLayout
    75  
    76  		nullMaterials driver.Texture
    77  	}
    78  	// imgAllocs maps imageOpData.handles to allocs.
    79  	imgAllocs map[interface{}]*atlasAlloc
    80  	// materials contains the pre-processed materials (transformed images for
    81  	// now, gradients etc. later) packed in a texture atlas. The atlas is used
    82  	// as source in kernel4.
    83  	materials struct {
    84  		// allocs maps texture ops the their atlases and FillImage offsets.
    85  		allocs map[textureKey]materialAlloc
    86  
    87  		pipeline driver.Pipeline
    88  		buffer   sizedBuffer
    89  		quads    []materialVertex
    90  		uniforms struct {
    91  			u   *materialUniforms
    92  			buf driver.Buffer
    93  		}
    94  	}
    95  	timers struct {
    96  		profile string
    97  		t       *timers
    98  		compact *timer
    99  		render  *timer
   100  		blit    *timer
   101  	}
   102  
   103  	// CPU fallback fields.
   104  	useCPU     bool
   105  	dispatcher *dispatcher
   106  
   107  	// The following fields hold scratch space to avoid garbage.
   108  	zeroSlice []byte
   109  	memHeader *memoryHeader
   110  	conf      *config
   111  }
   112  
   113  type materialAlloc struct {
   114  	alloc  *atlasAlloc
   115  	offset image.Point
   116  }
   117  
   118  type layer struct {
   119  	rect      image.Rectangle
   120  	alloc     *atlasAlloc
   121  	ops       []paintOp
   122  	materials *textureAtlas
   123  }
   124  
   125  type allocQuery struct {
   126  	atlas     *textureAtlas
   127  	size      image.Point
   128  	empty     bool
   129  	format    driver.TextureFormat
   130  	bindings  driver.BufferBinding
   131  	nocompact bool
   132  }
   133  
   134  type atlasAlloc struct {
   135  	atlas      *textureAtlas
   136  	rect       image.Rectangle
   137  	cpu        bool
   138  	dead       bool
   139  	frameCount uint
   140  }
   141  
   142  type atlasMove struct {
   143  	src     *textureAtlas
   144  	dstPos  image.Point
   145  	srcRect image.Rectangle
   146  	cpu     bool
   147  }
   148  
   149  type textureAtlas struct {
   150  	image     driver.Texture
   151  	format    driver.TextureFormat
   152  	bindings  driver.BufferBinding
   153  	hasCPU    bool
   154  	cpuImage  cpu.ImageDescriptor
   155  	size      image.Point
   156  	allocs    []*atlasAlloc
   157  	packer    packer
   158  	realized  bool
   159  	lastFrame uint
   160  	compact   bool
   161  }
   162  
   163  type copyUniforms struct {
   164  	scale   [2]float32
   165  	pos     [2]float32
   166  	uvScale [2]float32
   167  	_       [8]byte // Pad to 16 bytes.
   168  }
   169  
   170  type materialUniforms struct {
   171  	scale       [2]float32
   172  	pos         [2]float32
   173  	emulatesRGB float32
   174  	_           [12]byte // Pad to 16 bytes
   175  }
   176  
   177  type collector struct {
   178  	hasher     maphash.Hash
   179  	profile    bool
   180  	reader     ops.Reader
   181  	states     []f32.Affine2D
   182  	clear      bool
   183  	clearColor f32color.RGBA
   184  	clipStates []clipState
   185  	order      []hashIndex
   186  	transStack []transEntry
   187  	prevFrame  opsCollector
   188  	frame      opsCollector
   189  }
   190  
   191  type transEntry struct {
   192  	t        f32.Affine2D
   193  	relTrans f32.Affine2D
   194  }
   195  
   196  type hashIndex struct {
   197  	index int
   198  	hash  uint64
   199  }
   200  
   201  type opsCollector struct {
   202  	paths    []byte
   203  	clipCmds []clipCmd
   204  	ops      []paintOp
   205  	layers   []layer
   206  }
   207  
   208  type paintOp struct {
   209  	clipStack []clipCmd
   210  	offset    image.Point
   211  	state     paintKey
   212  	intersect f32.Rectangle
   213  	hash      uint64
   214  	layer     int
   215  	texOpIdx  int
   216  }
   217  
   218  // clipCmd describes a clipping command ready to be used for the compute
   219  // pipeline.
   220  type clipCmd struct {
   221  	// union of the bounds of the operations that are clipped.
   222  	union     f32.Rectangle
   223  	state     clipKey
   224  	path      []byte
   225  	pathKey   ops.Key
   226  	absBounds f32.Rectangle
   227  }
   228  
   229  type encoderState struct {
   230  	relTrans f32.Affine2D
   231  	clip     *clipState
   232  
   233  	paintKey
   234  }
   235  
   236  // clipKey completely describes a clip operation (along with its path) and is appropriate
   237  // for hashing and equality checks.
   238  type clipKey struct {
   239  	bounds      f32.Rectangle
   240  	strokeWidth float32
   241  	relTrans    f32.Affine2D
   242  	pathHash    uint64
   243  }
   244  
   245  // paintKey completely defines a paint operation. It is suitable for hashing and
   246  // equality checks.
   247  type paintKey struct {
   248  	t       f32.Affine2D
   249  	matType materialType
   250  	// Current paint.ImageOp
   251  	image imageOpData
   252  	// Current paint.ColorOp, if any.
   253  	color color.NRGBA
   254  
   255  	// Current paint.LinearGradientOp.
   256  	stop1  f32.Point
   257  	stop2  f32.Point
   258  	color1 color.NRGBA
   259  	color2 color.NRGBA
   260  }
   261  
   262  type clipState struct {
   263  	absBounds f32.Rectangle
   264  	parent    *clipState
   265  	path      []byte
   266  	pathKey   ops.Key
   267  	intersect f32.Rectangle
   268  
   269  	clipKey
   270  }
   271  
   272  type layerVertex struct {
   273  	posX, posY float32
   274  	u, v       float32
   275  }
   276  
   277  // materialVertex describes a vertex of a quad used to render a transformed
   278  // material.
   279  type materialVertex struct {
   280  	posX, posY float32
   281  	u, v       float32
   282  }
   283  
   284  // textureKey identifies textureOp.
   285  type textureKey struct {
   286  	handle    interface{}
   287  	transform f32.Affine2D
   288  	bounds    image.Rectangle
   289  }
   290  
   291  // textureOp represents an paintOp that requires texture space.
   292  type textureOp struct {
   293  	img imageOpData
   294  	key textureKey
   295  	// offset is the integer offset separated from key.transform to increase cache hit rate.
   296  	off image.Point
   297  	// matAlloc is the atlas placement for material.
   298  	matAlloc materialAlloc
   299  	// imgAlloc is the atlas placement for the source image
   300  	imgAlloc *atlasAlloc
   301  }
   302  
   303  type encoder struct {
   304  	scene    []scene.Command
   305  	npath    int
   306  	npathseg int
   307  	ntrans   int
   308  }
   309  
   310  // sizedBuffer holds a GPU buffer, or its equivalent CPU memory.
   311  type sizedBuffer struct {
   312  	size   int
   313  	buffer driver.Buffer
   314  	// cpuBuf is initialized when useCPU is true.
   315  	cpuBuf cpu.BufferDescriptor
   316  }
   317  
   318  // computeProgram holds a compute program, or its equivalent CPU implementation.
   319  type computeProgram struct {
   320  	prog driver.Program
   321  
   322  	// CPU fields.
   323  	progInfo    *cpu.ProgramInfo
   324  	descriptors unsafe.Pointer
   325  	buffers     []*cpu.BufferDescriptor
   326  }
   327  
   328  // config matches Config in setup.h
   329  type config struct {
   330  	n_elements      uint32 // paths
   331  	n_pathseg       uint32
   332  	width_in_tiles  uint32
   333  	height_in_tiles uint32
   334  	tile_alloc      memAlloc
   335  	bin_alloc       memAlloc
   336  	ptcl_alloc      memAlloc
   337  	pathseg_alloc   memAlloc
   338  	anno_alloc      memAlloc
   339  	trans_alloc     memAlloc
   340  }
   341  
   342  // memAlloc matches Alloc in mem.h
   343  type memAlloc struct {
   344  	offset uint32
   345  	//size   uint32
   346  }
   347  
   348  // memoryHeader matches the header of Memory in mem.h.
   349  type memoryHeader struct {
   350  	mem_offset uint32
   351  	mem_error  uint32
   352  }
   353  
   354  // rect is a oriented rectangle.
   355  type rectangle [4]f32.Point
   356  
   357  const (
   358  	layersBindings    = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture
   359  	materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead
   360  	// Materials and layers can share texture storage if their bindings match.
   361  	combinedBindings = layersBindings | materialsBindings
   362  )
   363  
   364  // GPU structure sizes and constants.
   365  const (
   366  	tileWidthPx       = 32
   367  	tileHeightPx      = 32
   368  	ptclInitialAlloc  = 1024
   369  	kernel4OutputUnit = 2
   370  	kernel4AtlasUnit  = 3
   371  
   372  	pathSize    = 12
   373  	binSize     = 8
   374  	pathsegSize = 52
   375  	annoSize    = 32
   376  	transSize   = 24
   377  	stateSize   = 60
   378  	stateStride = 4 + 2*stateSize
   379  )
   380  
   381  // mem.h constants.
   382  const (
   383  	memNoError      = 0 // NO_ERROR
   384  	memMallocFailed = 1 // ERR_MALLOC_FAILED
   385  )
   386  
   387  func newCompute(ctx driver.Device) (*compute, error) {
   388  	caps := ctx.Caps()
   389  	maxDim := caps.MaxTextureSize
   390  	// Large atlas textures cause artifacts due to precision loss in
   391  	// shaders.
   392  	if cap := 8192; maxDim > cap {
   393  		maxDim = cap
   394  	}
   395  	// The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the
   396  	// complexity of a rectangular limit for later.
   397  	if computeCap := 4096; maxDim > computeCap {
   398  		maxDim = computeCap
   399  	}
   400  	g := &compute{
   401  		ctx:           ctx,
   402  		maxTextureDim: maxDim,
   403  		srgb:          caps.Features.Has(driver.FeatureSRGB),
   404  		conf:          new(config),
   405  		memHeader:     new(memoryHeader),
   406  	}
   407  	shaders := []struct {
   408  		prog *computeProgram
   409  		src  shader.Sources
   410  		info *cpu.ProgramInfo
   411  	}{
   412  		{&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo},
   413  		{&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo},
   414  		{&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo},
   415  		{&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo},
   416  		{&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo},
   417  		{&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo},
   418  		{&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo},
   419  	}
   420  	if !caps.Features.Has(driver.FeatureCompute) {
   421  		if !cpu.Supported {
   422  			return nil, errors.New("gpu: missing support for compute programs")
   423  		}
   424  		g.useCPU = true
   425  	}
   426  	if g.useCPU {
   427  		g.dispatcher = newDispatcher(runtime.NumCPU())
   428  	} else {
   429  		null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead)
   430  		if err != nil {
   431  			g.Release()
   432  			return nil, err
   433  		}
   434  		g.output.nullMaterials = null
   435  	}
   436  
   437  	copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag)
   438  	if err != nil {
   439  		g.Release()
   440  		return nil, err
   441  	}
   442  	defer copyVert.Release()
   443  	defer copyFrag.Release()
   444  	pipe, err := ctx.NewPipeline(driver.PipelineDesc{
   445  		VertexShader:   copyVert,
   446  		FragmentShader: copyFrag,
   447  		VertexLayout: driver.VertexLayout{
   448  			Inputs: []driver.InputDesc{
   449  				{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
   450  				{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
   451  			},
   452  			Stride: int(unsafe.Sizeof(g.output.layerVertices[0])),
   453  		},
   454  		PixelFormat: driver.TextureFormatOutput,
   455  		BlendDesc: driver.BlendDesc{
   456  			Enable:    true,
   457  			SrcFactor: driver.BlendFactorOne,
   458  			DstFactor: driver.BlendFactorOneMinusSrcAlpha,
   459  		},
   460  		Topology: driver.TopologyTriangles,
   461  	})
   462  	if err != nil {
   463  		g.Release()
   464  		return nil, err
   465  	}
   466  	g.output.blitPipeline = pipe
   467  	g.output.uniforms = new(copyUniforms)
   468  
   469  	buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms)))
   470  	if err != nil {
   471  		g.Release()
   472  		return nil, err
   473  	}
   474  	g.output.uniBuf = buf
   475  
   476  	materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag)
   477  	if err != nil {
   478  		g.Release()
   479  		return nil, err
   480  	}
   481  	defer materialVert.Release()
   482  	defer materialFrag.Release()
   483  	pipe, err = ctx.NewPipeline(driver.PipelineDesc{
   484  		VertexShader:   materialVert,
   485  		FragmentShader: materialFrag,
   486  		VertexLayout: driver.VertexLayout{
   487  			Inputs: []driver.InputDesc{
   488  				{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
   489  				{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
   490  			},
   491  			Stride: int(unsafe.Sizeof(g.materials.quads[0])),
   492  		},
   493  		PixelFormat: driver.TextureFormatRGBA8,
   494  		Topology:    driver.TopologyTriangles,
   495  	})
   496  	if err != nil {
   497  		g.Release()
   498  		return nil, err
   499  	}
   500  	g.materials.pipeline = pipe
   501  	g.materials.uniforms.u = new(materialUniforms)
   502  
   503  	buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u)))
   504  	if err != nil {
   505  		g.Release()
   506  		return nil, err
   507  	}
   508  	g.materials.uniforms.buf = buf
   509  
   510  	for _, shader := range shaders {
   511  		if !g.useCPU {
   512  			p, err := ctx.NewComputeProgram(shader.src)
   513  			if err != nil {
   514  				g.Release()
   515  				return nil, err
   516  			}
   517  			shader.prog.prog = p
   518  		} else {
   519  			shader.prog.progInfo = shader.info
   520  		}
   521  	}
   522  	if g.useCPU {
   523  		{
   524  			desc := new(piet.ElementsDescriptorSetLayout)
   525  			g.programs.elements.descriptors = unsafe.Pointer(desc)
   526  			g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()}
   527  		}
   528  		{
   529  			desc := new(piet.Tile_allocDescriptorSetLayout)
   530  			g.programs.tileAlloc.descriptors = unsafe.Pointer(desc)
   531  			g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   532  		}
   533  		{
   534  			desc := new(piet.Path_coarseDescriptorSetLayout)
   535  			g.programs.pathCoarse.descriptors = unsafe.Pointer(desc)
   536  			g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   537  		}
   538  		{
   539  			desc := new(piet.BackdropDescriptorSetLayout)
   540  			g.programs.backdrop.descriptors = unsafe.Pointer(desc)
   541  			g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   542  		}
   543  		{
   544  			desc := new(piet.BinningDescriptorSetLayout)
   545  			g.programs.binning.descriptors = unsafe.Pointer(desc)
   546  			g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   547  		}
   548  		{
   549  			desc := new(piet.CoarseDescriptorSetLayout)
   550  			g.programs.coarse.descriptors = unsafe.Pointer(desc)
   551  			g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   552  		}
   553  		{
   554  			desc := new(piet.Kernel4DescriptorSetLayout)
   555  			g.programs.kernel4.descriptors = unsafe.Pointer(desc)
   556  			g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   557  			g.output.descriptors = desc
   558  		}
   559  	}
   560  	return g, nil
   561  }
   562  
   563  func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) {
   564  	vert, err = ctx.NewVertexShader(vsrc)
   565  	if err != nil {
   566  		return
   567  	}
   568  	frag, err = ctx.NewFragmentShader(fsrc)
   569  	if err != nil {
   570  		vert.Release()
   571  	}
   572  	return
   573  }
   574  
   575  func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error {
   576  	g.frameCount++
   577  	g.collect(viewport, frameOps)
   578  	return g.frame(target)
   579  }
   580  
   581  func (g *compute) collect(viewport image.Point, ops *op.Ops) {
   582  	g.viewport = viewport
   583  	g.collector.reset()
   584  
   585  	g.texOps = g.texOps[:0]
   586  	g.collector.collect(ops, viewport, &g.texOps)
   587  }
   588  
   589  func (g *compute) Clear(col color.NRGBA) {
   590  	g.collector.clear = true
   591  	g.collector.clearColor = f32color.LinearFromSRGB(col)
   592  }
   593  
   594  func (g *compute) frame(target RenderTarget) error {
   595  	viewport := g.viewport
   596  	defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport)
   597  	defer g.ctx.EndFrame()
   598  
   599  	t := &g.timers
   600  	if g.collector.profile && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
   601  		t.t = newTimers(g.ctx)
   602  		t.compact = t.t.newTimer()
   603  		t.render = t.t.newTimer()
   604  		t.blit = t.t.newTimer()
   605  	}
   606  
   607  	if err := g.uploadImages(); err != nil {
   608  		return err
   609  	}
   610  	if err := g.renderMaterials(); err != nil {
   611  		return err
   612  	}
   613  	g.layer(viewport, g.texOps)
   614  	t.render.begin()
   615  	if err := g.renderLayers(viewport); err != nil {
   616  		return err
   617  	}
   618  	t.render.end()
   619  	d := driver.LoadDesc{
   620  		ClearColor: g.collector.clearColor,
   621  	}
   622  	if g.collector.clear {
   623  		g.collector.clear = false
   624  		d.Action = driver.LoadActionClear
   625  	}
   626  	t.blit.begin()
   627  	g.blitLayers(d, defFBO, viewport)
   628  	t.blit.end()
   629  	t.compact.begin()
   630  	if err := g.compactAllocs(); err != nil {
   631  		return err
   632  	}
   633  	t.compact.end()
   634  	if g.collector.profile && t.t.ready() {
   635  		com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed
   636  		ft := com + ren + blit
   637  		q := 100 * time.Microsecond
   638  		ft = ft.Round(q)
   639  		com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q)
   640  		t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit)
   641  	}
   642  	return nil
   643  }
   644  
   645  func (g *compute) dumpAtlases() {
   646  	for i, a := range g.atlases {
   647  		dump := image.NewRGBA(image.Rectangle{Max: a.size})
   648  		err := driver.DownloadImage(g.ctx, a.image, dump)
   649  		if err != nil {
   650  			panic(err)
   651  		}
   652  		nrgba := image.NewNRGBA(dump.Bounds())
   653  		draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src)
   654  		var buf bytes.Buffer
   655  		if err := png.Encode(&buf, nrgba); err != nil {
   656  			panic(err)
   657  		}
   658  		if err := os.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil {
   659  			panic(err)
   660  		}
   661  	}
   662  }
   663  
   664  func (g *compute) Profile() string {
   665  	return g.timers.profile
   666  }
   667  
   668  func (g *compute) compactAllocs() error {
   669  	const (
   670  		maxAllocAge = 3
   671  		maxAtlasAge = 10
   672  	)
   673  	atlases := g.atlases
   674  	for _, a := range atlases {
   675  		if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge {
   676  			a.compact = true
   677  		}
   678  	}
   679  	for len(atlases) > 0 {
   680  		var (
   681  			dstAtlas *textureAtlas
   682  			format   driver.TextureFormat
   683  			bindings driver.BufferBinding
   684  		)
   685  		g.moves = g.moves[:0]
   686  		addedLayers := false
   687  		useCPU := false
   688  	fill:
   689  		for len(atlases) > 0 {
   690  			srcAtlas := atlases[0]
   691  			allocs := srcAtlas.allocs
   692  			if !srcAtlas.compact {
   693  				atlases = atlases[1:]
   694  				continue
   695  			}
   696  			if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) {
   697  				break
   698  			}
   699  			format = srcAtlas.format
   700  			bindings = srcAtlas.bindings
   701  			for len(srcAtlas.allocs) > 0 {
   702  				a := srcAtlas.allocs[0]
   703  				n := len(srcAtlas.allocs)
   704  				if g.frameCount-a.frameCount > maxAllocAge {
   705  					a.dead = true
   706  					srcAtlas.allocs[0] = srcAtlas.allocs[n-1]
   707  					srcAtlas.allocs = srcAtlas.allocs[:n-1]
   708  					continue
   709  				}
   710  				size := a.rect.Size()
   711  				alloc, fits := g.atlasAlloc(allocQuery{
   712  					atlas:     dstAtlas,
   713  					size:      size,
   714  					format:    format,
   715  					bindings:  bindings,
   716  					nocompact: true,
   717  				})
   718  				if !fits {
   719  					break fill
   720  				}
   721  				dstAtlas = alloc.atlas
   722  				allocs = append(allocs, a)
   723  				addedLayers = true
   724  				useCPU = useCPU || a.cpu
   725  				dstAtlas.allocs = append(dstAtlas.allocs, a)
   726  				pos := alloc.rect.Min
   727  				g.moves = append(g.moves, atlasMove{
   728  					src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu,
   729  				})
   730  				a.atlas = dstAtlas
   731  				a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())}
   732  				srcAtlas.allocs[0] = srcAtlas.allocs[n-1]
   733  				srcAtlas.allocs = srcAtlas.allocs[:n-1]
   734  			}
   735  			srcAtlas.compact = false
   736  			srcAtlas.realized = false
   737  			srcAtlas.packer.clear()
   738  			srcAtlas.packer.newPage()
   739  			srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim)
   740  			atlases = atlases[1:]
   741  		}
   742  		if !addedLayers {
   743  			break
   744  		}
   745  		outputSize := dstAtlas.packer.sizes[0]
   746  		if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil {
   747  			return err
   748  		}
   749  		for _, move := range g.moves {
   750  			if !move.cpu {
   751  				g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect)
   752  			} else {
   753  				src := move.src.cpuImage.Data()
   754  				dst := dstAtlas.cpuImage.Data()
   755  				sstride := move.src.size.X * 4
   756  				dstride := dstAtlas.size.X * 4
   757  				copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect)
   758  			}
   759  		}
   760  	}
   761  	for i := len(g.atlases) - 1; i >= 0; i-- {
   762  		a := g.atlases[i]
   763  		if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge {
   764  			a.Release()
   765  			n := len(g.atlases)
   766  			g.atlases[i] = g.atlases[n-1]
   767  			g.atlases = g.atlases[:n-1]
   768  		}
   769  	}
   770  	return nil
   771  }
   772  
   773  func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) {
   774  	sz := srcRect.Size()
   775  	soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4
   776  	doff := dstPos.Y*dstStride + dstPos.X*4
   777  	rowLen := sz.X * 4
   778  	for y := 0; y < sz.Y; y++ {
   779  		srow := src[soff : soff+rowLen]
   780  		drow := dst[doff : doff+rowLen]
   781  		copy(drow, srow)
   782  		soff += srcStride
   783  		doff += dstStride
   784  	}
   785  }
   786  
   787  func (g *compute) renderLayers(viewport image.Point) error {
   788  	layers := g.collector.frame.layers
   789  	for len(layers) > 0 {
   790  		var materials, dst *textureAtlas
   791  		addedLayers := false
   792  		g.enc.reset()
   793  		for len(layers) > 0 {
   794  			l := &layers[0]
   795  			if l.alloc != nil {
   796  				layers = layers[1:]
   797  				continue
   798  			}
   799  			if materials != nil {
   800  				if l.materials != nil && materials != l.materials {
   801  					// Only one materials texture per compute pass.
   802  					break
   803  				}
   804  			} else {
   805  				materials = l.materials
   806  			}
   807  			size := l.rect.Size()
   808  			alloc, fits := g.atlasAlloc(allocQuery{
   809  				atlas:    dst,
   810  				empty:    true,
   811  				format:   driver.TextureFormatRGBA8,
   812  				bindings: combinedBindings,
   813  				// Pad to avoid overlap.
   814  				size: size.Add(image.Pt(1, 1)),
   815  			})
   816  			if !fits {
   817  				// Only one output atlas per compute pass.
   818  				break
   819  			}
   820  			dst = alloc.atlas
   821  			dst.compact = true
   822  			addedLayers = true
   823  			l.alloc = &alloc
   824  			dst.allocs = append(dst.allocs, l.alloc)
   825  			encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps)
   826  			layers = layers[1:]
   827  		}
   828  		if !addedLayers {
   829  			break
   830  		}
   831  		outputSize := dst.packer.sizes[0]
   832  		tileDims := image.Point{
   833  			X: (outputSize.X + tileWidthPx - 1) / tileWidthPx,
   834  			Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx,
   835  		}
   836  		w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx
   837  		if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil {
   838  			return err
   839  		}
   840  		if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil {
   841  			return err
   842  		}
   843  	}
   844  	return nil
   845  }
   846  
   847  func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) {
   848  	layers := g.collector.frame.layers
   849  	g.output.layerVertices = g.output.layerVertices[:0]
   850  	for _, l := range layers {
   851  		placef := layout.FPt(l.alloc.rect.Min)
   852  		sizef := layout.FPt(l.rect.Size())
   853  		r := f32.FRect(l.rect)
   854  		quad := [4]layerVertex{
   855  			{posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y},
   856  			{posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y},
   857  			{posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y},
   858  			{posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y},
   859  		}
   860  		g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1])
   861  		g.ctx.PrepareTexture(l.alloc.atlas.image)
   862  	}
   863  	if len(g.output.layerVertices) > 0 {
   864  		vertexData := byteslice.Slice(g.output.layerVertices)
   865  		g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData))
   866  		g.output.buffer.buffer.Upload(vertexData)
   867  	}
   868  	g.ctx.BeginRenderPass(fbo, d)
   869  	defer g.ctx.EndRenderPass()
   870  	if len(layers) == 0 {
   871  		return
   872  	}
   873  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
   874  	g.ctx.BindPipeline(g.output.blitPipeline)
   875  	g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0)
   876  	start := 0
   877  	for len(layers) > 0 {
   878  		count := 0
   879  		atlas := layers[0].alloc.atlas
   880  		for len(layers) > 0 {
   881  			l := layers[0]
   882  			if l.alloc.atlas != atlas {
   883  				break
   884  			}
   885  			layers = layers[1:]
   886  			const verticesPerQuad = 6
   887  			count += verticesPerQuad
   888  		}
   889  
   890  		// Transform positions to clip space: [-1, -1] - [1, 1], and texture
   891  		// coordinates to texture space: [0, 0] - [1, 1].
   892  		clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1))
   893  		sx, _, ox, _, sy, oy := clip.Elems()
   894  		g.output.uniforms.scale = [2]float32{sx, sy}
   895  		g.output.uniforms.pos = [2]float32{ox, oy}
   896  		g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)}
   897  		g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms))
   898  		g.ctx.BindUniforms(g.output.uniBuf)
   899  		g.ctx.BindTexture(0, atlas.image)
   900  		g.ctx.DrawArrays(start, count)
   901  		start += count
   902  	}
   903  }
   904  
   905  func (g *compute) renderMaterials() error {
   906  	m := &g.materials
   907  	for k, place := range m.allocs {
   908  		if place.alloc.dead {
   909  			delete(m.allocs, k)
   910  		}
   911  	}
   912  	texOps := g.texOps
   913  	for len(texOps) > 0 {
   914  		m.quads = m.quads[:0]
   915  		var (
   916  			atlas    *textureAtlas
   917  			imgAtlas *textureAtlas
   918  		)
   919  		// A material is clipped to avoid drawing outside its atlas bounds.
   920  		// However, imprecision in the clipping may cause a single pixel
   921  		// overflow.
   922  		var padding = image.Pt(1, 1)
   923  		var allocStart int
   924  		for len(texOps) > 0 {
   925  			op := &texOps[0]
   926  			if a, exists := m.allocs[op.key]; exists {
   927  				g.touchAlloc(a.alloc)
   928  				op.matAlloc = a
   929  				texOps = texOps[1:]
   930  				continue
   931  			}
   932  
   933  			if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas {
   934  				// Only one image atlas per render pass.
   935  				break
   936  			}
   937  			imgAtlas = op.imgAlloc.atlas
   938  			quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min)
   939  			boundsf := quadBounds(quad)
   940  			bounds := boundsf.Round()
   941  			bounds = bounds.Intersect(op.key.bounds)
   942  
   943  			size := bounds.Size()
   944  			alloc, fits := g.atlasAlloc(allocQuery{
   945  				atlas:    atlas,
   946  				size:     size.Add(padding),
   947  				format:   driver.TextureFormatRGBA8,
   948  				bindings: combinedBindings,
   949  			})
   950  			if !fits {
   951  				break
   952  			}
   953  			if atlas == nil {
   954  				allocStart = len(alloc.atlas.allocs)
   955  			}
   956  			atlas = alloc.atlas
   957  			alloc.cpu = g.useCPU
   958  			offsetf := layout.FPt(bounds.Min.Mul(-1))
   959  			scale := f32.Pt(float32(size.X), float32(size.Y))
   960  			for i := range quad {
   961  				// Position quad to match place.
   962  				quad[i].posX += offsetf.X
   963  				quad[i].posY += offsetf.Y
   964  				// Scale to match viewport [0, 1].
   965  				quad[i].posX /= scale.X
   966  				quad[i].posY /= scale.Y
   967  			}
   968  			// Draw quad as two triangles.
   969  			m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2])
   970  			if m.allocs == nil {
   971  				m.allocs = make(map[textureKey]materialAlloc)
   972  			}
   973  			atlasAlloc := materialAlloc{
   974  				alloc:  &alloc,
   975  				offset: bounds.Min.Mul(-1),
   976  			}
   977  			atlas.allocs = append(atlas.allocs, atlasAlloc.alloc)
   978  			m.allocs[op.key] = atlasAlloc
   979  			op.matAlloc = atlasAlloc
   980  			texOps = texOps[1:]
   981  		}
   982  		if len(m.quads) == 0 {
   983  			break
   984  		}
   985  		realized := atlas.realized
   986  		if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil {
   987  			return err
   988  		}
   989  		// Transform to clip space: [-1, -1] - [1, 1].
   990  		*m.uniforms.u = materialUniforms{
   991  			scale: [2]float32{2, 2},
   992  			pos:   [2]float32{-1, -1},
   993  		}
   994  		if !g.srgb {
   995  			m.uniforms.u.emulatesRGB = 1.0
   996  		}
   997  		m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u))
   998  		vertexData := byteslice.Slice(m.quads)
   999  		n := pow2Ceil(len(vertexData))
  1000  		m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n)
  1001  		m.buffer.buffer.Upload(vertexData)
  1002  		var d driver.LoadDesc
  1003  		if !realized {
  1004  			d.Action = driver.LoadActionClear
  1005  		}
  1006  		g.ctx.PrepareTexture(imgAtlas.image)
  1007  		g.ctx.BeginRenderPass(atlas.image, d)
  1008  		g.ctx.BindTexture(0, imgAtlas.image)
  1009  		g.ctx.BindPipeline(m.pipeline)
  1010  		g.ctx.BindUniforms(m.uniforms.buf)
  1011  		g.ctx.BindVertexBuffer(m.buffer.buffer, 0)
  1012  		newAllocs := atlas.allocs[allocStart:]
  1013  		for i, a := range newAllocs {
  1014  			sz := a.rect.Size().Sub(padding)
  1015  			g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y)
  1016  			g.ctx.DrawArrays(i*6, 6)
  1017  		}
  1018  		g.ctx.EndRenderPass()
  1019  		if !g.useCPU {
  1020  			continue
  1021  		}
  1022  		src := atlas.image
  1023  		data := atlas.cpuImage.Data()
  1024  		for _, a := range newAllocs {
  1025  			stride := atlas.size.X * 4
  1026  			col := a.rect.Min.X * 4
  1027  			row := stride * a.rect.Min.Y
  1028  			off := col + row
  1029  			src.ReadPixels(a.rect, data[off:], stride)
  1030  		}
  1031  	}
  1032  	return nil
  1033  }
  1034  
  1035  func (g *compute) uploadImages() error {
  1036  	for k, a := range g.imgAllocs {
  1037  		if a.dead {
  1038  			delete(g.imgAllocs, k)
  1039  		}
  1040  	}
  1041  	type upload struct {
  1042  		pos image.Point
  1043  		img *image.RGBA
  1044  	}
  1045  	var uploads []upload
  1046  	format := driver.TextureFormatSRGBA
  1047  	if !g.srgb {
  1048  		format = driver.TextureFormatRGBA8
  1049  	}
  1050  	// padding is the number of pixels added to the right and below
  1051  	// images, to avoid atlas filtering artifacts.
  1052  	const padding = 1
  1053  	texOps := g.texOps
  1054  	for len(texOps) > 0 {
  1055  		uploads = uploads[:0]
  1056  		var atlas *textureAtlas
  1057  		for len(texOps) > 0 {
  1058  			op := &texOps[0]
  1059  			if a, exists := g.imgAllocs[op.img.handle]; exists {
  1060  				g.touchAlloc(a)
  1061  				op.imgAlloc = a
  1062  				texOps = texOps[1:]
  1063  				continue
  1064  			}
  1065  			size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding))
  1066  			alloc, fits := g.atlasAlloc(allocQuery{
  1067  				atlas:    atlas,
  1068  				size:     size,
  1069  				format:   format,
  1070  				bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer,
  1071  			})
  1072  			if !fits {
  1073  				break
  1074  			}
  1075  			atlas = alloc.atlas
  1076  			if g.imgAllocs == nil {
  1077  				g.imgAllocs = make(map[interface{}]*atlasAlloc)
  1078  			}
  1079  			op.imgAlloc = &alloc
  1080  			atlas.allocs = append(atlas.allocs, op.imgAlloc)
  1081  			g.imgAllocs[op.img.handle] = op.imgAlloc
  1082  			uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src})
  1083  			texOps = texOps[1:]
  1084  		}
  1085  		if len(uploads) == 0 {
  1086  			break
  1087  		}
  1088  		if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil {
  1089  			return err
  1090  		}
  1091  		for _, u := range uploads {
  1092  			size := u.img.Bounds().Size()
  1093  			driver.UploadImage(atlas.image, u.pos, u.img)
  1094  			rightPadding := image.Pt(padding, size.Y)
  1095  			atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0)
  1096  			bottomPadding := image.Pt(size.X, padding)
  1097  			atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0)
  1098  		}
  1099  	}
  1100  	return nil
  1101  }
  1102  
  1103  func pow2Ceil(v int) int {
  1104  	exp := bits.Len(uint(v))
  1105  	if bits.OnesCount(uint(v)) == 1 {
  1106  		exp--
  1107  	}
  1108  	return 1 << exp
  1109  }
  1110  
  1111  // materialQuad constructs a quad that represents the transformed image. It returns the quad
  1112  // and its bounds.
  1113  func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex {
  1114  	imgSize := layout.FPt(img.src.Bounds().Size())
  1115  	sx, hx, ox, hy, sy, oy := M.Elems()
  1116  	transOff := f32.Pt(ox, oy)
  1117  	// The 4 corners of the image rectangle transformed by M, excluding its offset, are:
  1118  	//
  1119  	// q0: M * (0, 0)   q3: M * (w, 0)
  1120  	// q1: M * (0, h)   q2: M * (w, h)
  1121  	//
  1122  	// Note that q0 = M*0 = 0, q2 = q1 + q3.
  1123  	q0 := f32.Pt(0, 0)
  1124  	q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y)
  1125  	q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X)
  1126  	q2 := q1.Add(q3)
  1127  	q0 = q0.Add(transOff)
  1128  	q1 = q1.Add(transOff)
  1129  	q2 = q2.Add(transOff)
  1130  	q3 = q3.Add(transOff)
  1131  
  1132  	uvPosf := layout.FPt(uvPos)
  1133  	atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y))
  1134  	uvBounds := f32.Rectangle{
  1135  		Min: uvPosf,
  1136  		Max: uvPosf.Add(imgSize),
  1137  	}
  1138  	uvBounds.Min.X *= atlasScale.X
  1139  	uvBounds.Min.Y *= atlasScale.Y
  1140  	uvBounds.Max.X *= atlasScale.X
  1141  	uvBounds.Max.Y *= atlasScale.Y
  1142  	quad := [4]materialVertex{
  1143  		{posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y},
  1144  		{posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y},
  1145  		{posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y},
  1146  		{posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y},
  1147  	}
  1148  	return quad
  1149  }
  1150  
  1151  func quadBounds(q [4]materialVertex) f32.Rectangle {
  1152  	q0 := f32.Pt(q[0].posX, q[0].posY)
  1153  	q1 := f32.Pt(q[1].posX, q[1].posY)
  1154  	q2 := f32.Pt(q[2].posX, q[2].posY)
  1155  	q3 := f32.Pt(q[3].posX, q[3].posY)
  1156  	return f32.Rectangle{
  1157  		Min: min(min(q0, q1), min(q2, q3)),
  1158  		Max: max(max(q0, q1), max(q2, q3)),
  1159  	}
  1160  }
  1161  
  1162  func max(p1, p2 f32.Point) f32.Point {
  1163  	p := p1
  1164  	if p2.X > p.X {
  1165  		p.X = p2.X
  1166  	}
  1167  	if p2.Y > p.Y {
  1168  		p.Y = p2.Y
  1169  	}
  1170  	return p
  1171  }
  1172  
  1173  func min(p1, p2 f32.Point) f32.Point {
  1174  	p := p1
  1175  	if p2.X < p.X {
  1176  		p.X = p2.X
  1177  	}
  1178  	if p2.Y < p.Y {
  1179  		p.Y = p2.Y
  1180  	}
  1181  	return p
  1182  }
  1183  
  1184  func (enc *encoder) encodePath(verts []byte, fillMode int) {
  1185  	for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] {
  1186  		cmd := ops.DecodeCommand(verts[4:])
  1187  		if cmd.Op() == scene.OpGap {
  1188  			if fillMode != scene.FillModeNonzero {
  1189  				// Skip gaps in strokes.
  1190  				continue
  1191  			}
  1192  			// Replace them by a straight line in outlines.
  1193  			cmd = scene.Line(scene.DecodeGap(cmd))
  1194  		}
  1195  		enc.scene = append(enc.scene, cmd)
  1196  		enc.npathseg++
  1197  	}
  1198  }
  1199  
  1200  func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error {
  1201  	const (
  1202  		// wgSize is the largest and most common workgroup size.
  1203  		wgSize = 128
  1204  		// PARTITION_SIZE from elements.comp
  1205  		partitionSize = 32 * 4
  1206  	)
  1207  	widthInBins := (tileDims.X + 15) / 16
  1208  	heightInBins := (tileDims.Y + 7) / 8
  1209  	if widthInBins*heightInBins > wgSize {
  1210  		return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx)
  1211  	}
  1212  
  1213  	enc := &g.enc
  1214  	// Pad scene with zeroes to avoid reading garbage in elements.comp.
  1215  	scenePadding := partitionSize - len(enc.scene)%partitionSize
  1216  	enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...)
  1217  
  1218  	scene := byteslice.Slice(enc.scene)
  1219  	if s := len(scene); s > g.buffers.scene.size {
  1220  		paddedCap := s * 11 / 10
  1221  		if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil {
  1222  			return err
  1223  		}
  1224  	}
  1225  	g.buffers.scene.upload(scene)
  1226  
  1227  	// alloc is the number of allocated bytes for static buffers.
  1228  	var alloc uint32
  1229  	round := func(v, quantum int) int {
  1230  		return (v + quantum - 1) &^ (quantum - 1)
  1231  	}
  1232  	malloc := func(size int) memAlloc {
  1233  		size = round(size, 4)
  1234  		offset := alloc
  1235  		alloc += uint32(size)
  1236  		return memAlloc{offset /*, uint32(size)*/}
  1237  	}
  1238  
  1239  	*g.conf = config{
  1240  		n_elements:      uint32(enc.npath),
  1241  		n_pathseg:       uint32(enc.npathseg),
  1242  		width_in_tiles:  uint32(tileDims.X),
  1243  		height_in_tiles: uint32(tileDims.Y),
  1244  		tile_alloc:      malloc(enc.npath * pathSize),
  1245  		bin_alloc:       malloc(round(enc.npath, wgSize) * binSize),
  1246  		ptcl_alloc:      malloc(tileDims.X * tileDims.Y * ptclInitialAlloc),
  1247  		pathseg_alloc:   malloc(enc.npathseg * pathsegSize),
  1248  		anno_alloc:      malloc(enc.npath * annoSize),
  1249  		trans_alloc:     malloc(enc.ntrans * transSize),
  1250  	}
  1251  
  1252  	numPartitions := (enc.numElements() + 127) / 128
  1253  	// clearSize is the atomic partition counter plus flag and 2 states per partition.
  1254  	clearSize := 4 + numPartitions*stateStride
  1255  	if clearSize > g.buffers.state.size {
  1256  		paddedCap := clearSize * 11 / 10
  1257  		if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil {
  1258  			return err
  1259  		}
  1260  	}
  1261  
  1262  	confData := byteslice.Struct(g.conf)
  1263  	g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData))
  1264  	g.buffers.config.upload(confData)
  1265  
  1266  	minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc)
  1267  	if minSize > g.buffers.memory.size {
  1268  		// Add space for dynamic GPU allocations.
  1269  		const sizeBump = 4 * 1024 * 1024
  1270  		minSize += sizeBump
  1271  		if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil {
  1272  			return err
  1273  		}
  1274  	}
  1275  
  1276  	for {
  1277  		*g.memHeader = memoryHeader{
  1278  			mem_offset: alloc,
  1279  		}
  1280  		g.buffers.memory.upload(byteslice.Struct(g.memHeader))
  1281  		g.buffers.state.upload(g.zeros(clearSize))
  1282  
  1283  		if !g.useCPU {
  1284  			g.ctx.BeginCompute()
  1285  			g.ctx.BindImageTexture(kernel4OutputUnit, dst)
  1286  			img := g.output.nullMaterials
  1287  			if images != nil {
  1288  				img = images.image
  1289  			}
  1290  			g.ctx.BindImageTexture(kernel4AtlasUnit, img)
  1291  		} else {
  1292  			*g.output.descriptors.Binding2() = cpuDst
  1293  			if images != nil {
  1294  				*g.output.descriptors.Binding3() = images.cpuImage
  1295  			}
  1296  		}
  1297  
  1298  		g.bindBuffers()
  1299  		g.memoryBarrier()
  1300  		g.dispatch(g.programs.elements, numPartitions, 1, 1)
  1301  		g.memoryBarrier()
  1302  		g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1)
  1303  		g.memoryBarrier()
  1304  		g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1)
  1305  		g.memoryBarrier()
  1306  		g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1)
  1307  		// No barrier needed between backdrop and binning.
  1308  		g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1)
  1309  		g.memoryBarrier()
  1310  		g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1)
  1311  		g.memoryBarrier()
  1312  		g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1)
  1313  		g.memoryBarrier()
  1314  		if !g.useCPU {
  1315  			g.ctx.EndCompute()
  1316  		} else {
  1317  			g.dispatcher.Sync()
  1318  		}
  1319  
  1320  		if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil {
  1321  			if err == driver.ErrContentLost {
  1322  				continue
  1323  			}
  1324  			return err
  1325  		}
  1326  		switch errCode := g.memHeader.mem_error; errCode {
  1327  		case memNoError:
  1328  			if g.useCPU {
  1329  				w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx
  1330  				dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride)
  1331  			}
  1332  			return nil
  1333  		case memMallocFailed:
  1334  			// Resize memory and try again.
  1335  			sz := g.buffers.memory.size * 15 / 10
  1336  			if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil {
  1337  				return err
  1338  			}
  1339  			continue
  1340  		default:
  1341  			return fmt.Errorf("compute: shader program failed with error %d", errCode)
  1342  		}
  1343  	}
  1344  }
  1345  
  1346  func (g *compute) memoryBarrier() {
  1347  	if g.useCPU {
  1348  		g.dispatcher.Barrier()
  1349  	}
  1350  }
  1351  
  1352  func (g *compute) dispatch(p computeProgram, x, y, z int) {
  1353  	if !g.useCPU {
  1354  		g.ctx.BindProgram(p.prog)
  1355  		g.ctx.DispatchCompute(x, y, z)
  1356  	} else {
  1357  		g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z)
  1358  	}
  1359  }
  1360  
  1361  // zeros returns a byte slice with size bytes of zeros.
  1362  func (g *compute) zeros(size int) []byte {
  1363  	if cap(g.zeroSlice) < size {
  1364  		g.zeroSlice = append(g.zeroSlice, make([]byte, size)...)
  1365  	}
  1366  	return g.zeroSlice[:size]
  1367  }
  1368  
  1369  func (g *compute) touchAlloc(a *atlasAlloc) {
  1370  	if a.dead {
  1371  		panic("re-use of dead allocation")
  1372  	}
  1373  	a.frameCount = g.frameCount
  1374  	a.atlas.lastFrame = a.frameCount
  1375  }
  1376  
  1377  func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) {
  1378  	var (
  1379  		place placement
  1380  		fits  bool
  1381  		atlas = q.atlas
  1382  	)
  1383  	if atlas != nil {
  1384  		place, fits = atlas.packer.tryAdd(q.size)
  1385  		if !fits {
  1386  			atlas.compact = true
  1387  		}
  1388  	}
  1389  	if atlas == nil {
  1390  		// Look for matching atlas to re-use.
  1391  		for _, a := range g.atlases {
  1392  			if q.empty && len(a.allocs) > 0 {
  1393  				continue
  1394  			}
  1395  			if q.nocompact && a.compact {
  1396  				continue
  1397  			}
  1398  			if a.format != q.format || a.bindings&q.bindings != q.bindings {
  1399  				continue
  1400  			}
  1401  			place, fits = a.packer.tryAdd(q.size)
  1402  			if !fits {
  1403  				a.compact = true
  1404  				continue
  1405  			}
  1406  			atlas = a
  1407  			break
  1408  		}
  1409  	}
  1410  	if atlas == nil {
  1411  		atlas = &textureAtlas{
  1412  			format:   q.format,
  1413  			bindings: q.bindings,
  1414  		}
  1415  		atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim)
  1416  		atlas.packer.newPage()
  1417  		g.atlases = append(g.atlases, atlas)
  1418  		place, fits = atlas.packer.tryAdd(q.size)
  1419  		if !fits {
  1420  			panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size))
  1421  		}
  1422  	}
  1423  	if !fits {
  1424  		return atlasAlloc{}, false
  1425  	}
  1426  	atlas.lastFrame = g.frameCount
  1427  	return atlasAlloc{
  1428  		frameCount: g.frameCount,
  1429  		atlas:      atlas,
  1430  		rect:       image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)},
  1431  	}, true
  1432  }
  1433  
  1434  func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error {
  1435  	defer func() {
  1436  		atlas.packer.maxDims = atlas.size
  1437  		atlas.realized = true
  1438  		atlas.ensureCPUImage(useCPU)
  1439  	}()
  1440  	if atlas.size.X >= size.X && atlas.size.Y >= size.Y {
  1441  		return nil
  1442  	}
  1443  	if atlas.realized {
  1444  		panic("resizing a realized atlas")
  1445  	}
  1446  	if err := atlas.resize(g.ctx, size); err != nil {
  1447  		return err
  1448  	}
  1449  	return nil
  1450  }
  1451  
  1452  func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error {
  1453  	a.Release()
  1454  
  1455  	img, err := ctx.NewTexture(a.format, size.X, size.Y,
  1456  		driver.FilterNearest,
  1457  		driver.FilterNearest,
  1458  		a.bindings)
  1459  	if err != nil {
  1460  		return err
  1461  	}
  1462  	a.image = img
  1463  	a.size = size
  1464  	return nil
  1465  }
  1466  
  1467  func (a *textureAtlas) ensureCPUImage(useCPU bool) {
  1468  	if !useCPU || a.hasCPU {
  1469  		return
  1470  	}
  1471  	a.hasCPU = true
  1472  	a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y)
  1473  }
  1474  
  1475  func (g *compute) Release() {
  1476  	if g.useCPU {
  1477  		g.dispatcher.Stop()
  1478  	}
  1479  	type resource interface {
  1480  		Release()
  1481  	}
  1482  	res := []resource{
  1483  		g.output.nullMaterials,
  1484  		&g.programs.elements,
  1485  		&g.programs.tileAlloc,
  1486  		&g.programs.pathCoarse,
  1487  		&g.programs.backdrop,
  1488  		&g.programs.binning,
  1489  		&g.programs.coarse,
  1490  		&g.programs.kernel4,
  1491  		g.output.blitPipeline,
  1492  		&g.output.buffer,
  1493  		g.output.uniBuf,
  1494  		&g.buffers.scene,
  1495  		&g.buffers.state,
  1496  		&g.buffers.memory,
  1497  		&g.buffers.config,
  1498  		g.materials.pipeline,
  1499  		&g.materials.buffer,
  1500  		g.materials.uniforms.buf,
  1501  		g.timers.t,
  1502  	}
  1503  	for _, r := range res {
  1504  		if r != nil {
  1505  			r.Release()
  1506  		}
  1507  	}
  1508  	for _, a := range g.atlases {
  1509  		a.Release()
  1510  	}
  1511  	g.ctx.Release()
  1512  	*g = compute{}
  1513  }
  1514  
  1515  func (a *textureAtlas) Release() {
  1516  	if a.image != nil {
  1517  		a.image.Release()
  1518  		a.image = nil
  1519  	}
  1520  	a.cpuImage.Free()
  1521  	a.hasCPU = false
  1522  }
  1523  
  1524  func (g *compute) bindBuffers() {
  1525  	g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state)
  1526  	g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config)
  1527  	g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config)
  1528  	g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config)
  1529  	g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config)
  1530  	g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config)
  1531  	g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config)
  1532  }
  1533  
  1534  func (p *computeProgram) Release() {
  1535  	if p.prog != nil {
  1536  		p.prog.Release()
  1537  	}
  1538  	*p = computeProgram{}
  1539  }
  1540  
  1541  func (b *sizedBuffer) Release() {
  1542  	if b.buffer != nil {
  1543  		b.buffer.Release()
  1544  	}
  1545  	b.cpuBuf.Free()
  1546  	*b = sizedBuffer{}
  1547  }
  1548  
  1549  func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error {
  1550  	if b.size >= size {
  1551  		return nil
  1552  	}
  1553  	if b.buffer != nil {
  1554  		b.Release()
  1555  	}
  1556  	b.cpuBuf.Free()
  1557  	if !useCPU {
  1558  		buf, err := ctx.NewBuffer(binding, size)
  1559  		if err != nil {
  1560  			return err
  1561  		}
  1562  		b.buffer = buf
  1563  	} else {
  1564  		b.cpuBuf = cpu.NewBuffer(size)
  1565  	}
  1566  	b.size = size
  1567  	return nil
  1568  }
  1569  
  1570  func (b *sizedBuffer) download(data []byte) error {
  1571  	if b.buffer != nil {
  1572  		return b.buffer.Download(data)
  1573  	} else {
  1574  		copy(data, b.cpuBuf.Data())
  1575  		return nil
  1576  	}
  1577  }
  1578  
  1579  func (b *sizedBuffer) upload(data []byte) {
  1580  	if b.buffer != nil {
  1581  		b.buffer.Upload(data)
  1582  	} else {
  1583  		copy(b.cpuBuf.Data(), data)
  1584  	}
  1585  }
  1586  
  1587  func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) {
  1588  	for i, buf := range buffers {
  1589  		if !g.useCPU {
  1590  			g.ctx.BindStorageBuffer(i, buf.buffer)
  1591  		} else {
  1592  			*prog.buffers[i] = buf.cpuBuf
  1593  		}
  1594  	}
  1595  }
  1596  
  1597  var bo = binary.LittleEndian
  1598  
  1599  func (e *encoder) reset() {
  1600  	e.scene = e.scene[:0]
  1601  	e.npath = 0
  1602  	e.npathseg = 0
  1603  	e.ntrans = 0
  1604  }
  1605  
  1606  func (e *encoder) numElements() int {
  1607  	return len(e.scene)
  1608  }
  1609  
  1610  func (e *encoder) transform(m f32.Affine2D) {
  1611  	e.scene = append(e.scene, scene.Transform(m))
  1612  	e.ntrans++
  1613  }
  1614  
  1615  func (e *encoder) lineWidth(width float32) {
  1616  	e.scene = append(e.scene, scene.SetLineWidth(width))
  1617  }
  1618  
  1619  func (e *encoder) fillMode(mode scene.FillMode) {
  1620  	e.scene = append(e.scene, scene.SetFillMode(mode))
  1621  }
  1622  
  1623  func (e *encoder) beginClip(bbox f32.Rectangle) {
  1624  	e.scene = append(e.scene, scene.BeginClip(bbox))
  1625  	e.npath++
  1626  }
  1627  
  1628  func (e *encoder) endClip(bbox f32.Rectangle) {
  1629  	e.scene = append(e.scene, scene.EndClip(bbox))
  1630  	e.npath++
  1631  }
  1632  
  1633  func (e *encoder) rect(r f32.Rectangle) {
  1634  	// Rectangle corners, clock-wise.
  1635  	c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y)
  1636  	e.line(c0, c1)
  1637  	e.line(c1, c2)
  1638  	e.line(c2, c3)
  1639  	e.line(c3, c0)
  1640  }
  1641  
  1642  func (e *encoder) fillColor(col color.RGBA) {
  1643  	e.scene = append(e.scene, scene.FillColor(col))
  1644  	e.npath++
  1645  }
  1646  
  1647  func (e *encoder) fillImage(index int, offset image.Point) {
  1648  	e.scene = append(e.scene, scene.FillImage(index, offset))
  1649  	e.npath++
  1650  }
  1651  
  1652  func (e *encoder) line(start, end f32.Point) {
  1653  	e.scene = append(e.scene, scene.Line(start, end))
  1654  	e.npathseg++
  1655  }
  1656  
  1657  func (c *collector) reset() {
  1658  	c.prevFrame, c.frame = c.frame, c.prevFrame
  1659  	c.profile = false
  1660  	c.clipStates = c.clipStates[:0]
  1661  	c.transStack = c.transStack[:0]
  1662  	c.frame.reset()
  1663  }
  1664  
  1665  func (c *opsCollector) reset() {
  1666  	c.paths = c.paths[:0]
  1667  	c.clipCmds = c.clipCmds[:0]
  1668  	c.ops = c.ops[:0]
  1669  	c.layers = c.layers[:0]
  1670  }
  1671  
  1672  func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) {
  1673  	// Rectangle clip regions.
  1674  	if len(path) == 0 && !push {
  1675  		// If the rectangular clip region contains a previous path it can be discarded.
  1676  		p := state.clip
  1677  		t := state.relTrans.Invert()
  1678  		for p != nil {
  1679  			// rect is the parent bounds transformed relative to the rectangle.
  1680  			rect := transformBounds(t, p.bounds)
  1681  			if rect.In(bounds) {
  1682  				return
  1683  			}
  1684  			t = p.relTrans.Invert().Mul(t)
  1685  			p = p.parent
  1686  		}
  1687  	}
  1688  
  1689  	absBounds := transformBounds(state.t, bounds).Bounds()
  1690  	intersect := absBounds
  1691  	if state.clip != nil {
  1692  		intersect = state.clip.intersect.Intersect(intersect)
  1693  	}
  1694  	c.clipStates = append(c.clipStates, clipState{
  1695  		parent:    state.clip,
  1696  		absBounds: absBounds,
  1697  		path:      path,
  1698  		pathKey:   key,
  1699  		intersect: intersect,
  1700  		clipKey: clipKey{
  1701  			bounds:      bounds,
  1702  			relTrans:    state.relTrans,
  1703  			strokeWidth: strokeWidth,
  1704  			pathHash:    hash,
  1705  		},
  1706  	})
  1707  	state.clip = &c.clipStates[len(c.clipStates)-1]
  1708  	state.relTrans = f32.Affine2D{}
  1709  }
  1710  
  1711  func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) {
  1712  	fview := f32.Rectangle{Max: layout.FPt(viewport)}
  1713  	var intOps *ops.Ops
  1714  	if root != nil {
  1715  		intOps = &root.Internal
  1716  	}
  1717  	c.reader.Reset(intOps)
  1718  	var state encoderState
  1719  	reset := func() {
  1720  		state = encoderState{
  1721  			paintKey: paintKey{
  1722  				color: color.NRGBA{A: 0xff},
  1723  			},
  1724  		}
  1725  	}
  1726  	reset()
  1727  	r := &c.reader
  1728  	var (
  1729  		pathData struct {
  1730  			data []byte
  1731  			key  ops.Key
  1732  			hash uint64
  1733  		}
  1734  		strWidth float32
  1735  	)
  1736  	c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false)
  1737  	for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() {
  1738  		switch ops.OpType(encOp.Data[0]) {
  1739  		case ops.TypeProfile:
  1740  			c.profile = true
  1741  		case ops.TypeTransform:
  1742  			dop, push := ops.DecodeTransform(encOp.Data)
  1743  			if push {
  1744  				c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans})
  1745  			}
  1746  			state.t = state.t.Mul(dop)
  1747  			state.relTrans = state.relTrans.Mul(dop)
  1748  		case ops.TypePopTransform:
  1749  			n := len(c.transStack)
  1750  			st := c.transStack[n-1]
  1751  			c.transStack = c.transStack[:n-1]
  1752  			state.t = st.t
  1753  			state.relTrans = st.relTrans
  1754  		case ops.TypeStroke:
  1755  			strWidth = decodeStrokeOp(encOp.Data)
  1756  		case ops.TypePath:
  1757  			hash := bo.Uint64(encOp.Data[1:])
  1758  			encOp, ok = r.Decode()
  1759  			if !ok {
  1760  				panic("unexpected end of path operation")
  1761  			}
  1762  			pathData.data = encOp.Data[ops.TypeAuxLen:]
  1763  			pathData.key = encOp.Key
  1764  			pathData.hash = hash
  1765  		case ops.TypeClip:
  1766  			var op ops.ClipOp
  1767  			op.Decode(encOp.Data)
  1768  			bounds := f32.FRect(op.Bounds)
  1769  			c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true)
  1770  			pathData.data = nil
  1771  			strWidth = 0
  1772  		case ops.TypePopClip:
  1773  			state.relTrans = state.clip.relTrans.Mul(state.relTrans)
  1774  			state.clip = state.clip.parent
  1775  		case ops.TypeColor:
  1776  			state.matType = materialColor
  1777  			state.color = decodeColorOp(encOp.Data)
  1778  		case ops.TypeLinearGradient:
  1779  			state.matType = materialLinearGradient
  1780  			op := decodeLinearGradientOp(encOp.Data)
  1781  			state.stop1 = op.stop1
  1782  			state.stop2 = op.stop2
  1783  			state.color1 = op.color1
  1784  			state.color2 = op.color2
  1785  		case ops.TypeImage:
  1786  			state.matType = materialTexture
  1787  			state.image = decodeImageOp(encOp.Data, encOp.Refs)
  1788  		case ops.TypePaint:
  1789  			paintState := state
  1790  			if paintState.matType == materialTexture {
  1791  				// Clip to the bounds of the image, to hide other images in the atlas.
  1792  				sz := state.image.src.Rect.Size()
  1793  				bounds := f32.Rectangle{Max: layout.FPt(sz)}
  1794  				c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false)
  1795  			}
  1796  			intersect := paintState.clip.intersect
  1797  			if intersect.Empty() {
  1798  				break
  1799  			}
  1800  
  1801  			// If the paint is a uniform opaque color that takes up the whole
  1802  			// screen, it covers all previous paints and we can discard all
  1803  			// rendering commands recorded so far.
  1804  			if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 {
  1805  				c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque()
  1806  				c.clear = true
  1807  				c.frame.reset()
  1808  				break
  1809  			}
  1810  
  1811  			// Flatten clip stack.
  1812  			p := paintState.clip
  1813  			startIdx := len(c.frame.clipCmds)
  1814  			for p != nil {
  1815  				idx := len(c.frame.paths)
  1816  				c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...)
  1817  				path := c.frame.paths[idx:]
  1818  				copy(path, p.path)
  1819  				c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{
  1820  					state:     p.clipKey,
  1821  					path:      path,
  1822  					pathKey:   p.pathKey,
  1823  					absBounds: p.absBounds,
  1824  				})
  1825  				p = p.parent
  1826  			}
  1827  			clipStack := c.frame.clipCmds[startIdx:]
  1828  			c.frame.ops = append(c.frame.ops, paintOp{
  1829  				clipStack: clipStack,
  1830  				state:     paintState.paintKey,
  1831  				intersect: intersect,
  1832  			})
  1833  		case ops.TypeSave:
  1834  			id := ops.DecodeSave(encOp.Data)
  1835  			c.save(id, state.t)
  1836  		case ops.TypeLoad:
  1837  			reset()
  1838  			id := ops.DecodeLoad(encOp.Data)
  1839  			state.t = c.states[id]
  1840  			state.relTrans = state.t
  1841  		}
  1842  	}
  1843  	for i := range c.frame.ops {
  1844  		op := &c.frame.ops[i]
  1845  		// For each clip, cull rectangular clip regions that contain its
  1846  		// (transformed) bounds. addClip already handled the converse case.
  1847  		// TODO: do better than O(n²) to efficiently deal with deep stacks.
  1848  		for j := 0; j < len(op.clipStack)-1; j++ {
  1849  			cl := op.clipStack[j]
  1850  			p := cl.state
  1851  			r := transformBounds(p.relTrans, p.bounds)
  1852  			for k := j + 1; k < len(op.clipStack); k++ {
  1853  				cl2 := op.clipStack[k]
  1854  				p2 := cl2.state
  1855  				if len(cl2.path) == 0 && r.In(cl2.state.bounds) {
  1856  					op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...)
  1857  					k--
  1858  					op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans)
  1859  				}
  1860  				r = transformRect(p2.relTrans, r)
  1861  			}
  1862  		}
  1863  		// Separate the integer offset from the first transform. Two ops that differ
  1864  		// only in integer offsets may share backing storage.
  1865  		if len(op.clipStack) > 0 {
  1866  			c := &op.clipStack[len(op.clipStack)-1]
  1867  			t := c.state.relTrans
  1868  			t, off := separateTransform(t)
  1869  			c.state.relTrans = t
  1870  			op.offset = off
  1871  			op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1)))
  1872  		}
  1873  		op.hash = c.hashOp(*op)
  1874  		op.texOpIdx = -1
  1875  		switch op.state.matType {
  1876  		case materialTexture:
  1877  			op.texOpIdx = len(*texOps)
  1878  			// Separate integer offset from transformation. TextureOps that have identical transforms
  1879  			// except for their integer offsets can share a transformed image.
  1880  			t := op.state.t.Offset(layout.FPt(op.offset))
  1881  			t, off := separateTransform(t)
  1882  			bounds := op.intersect.Round().Sub(off)
  1883  			*texOps = append(*texOps, textureOp{
  1884  				img: op.state.image,
  1885  				off: off,
  1886  				key: textureKey{
  1887  					bounds:    bounds,
  1888  					transform: t,
  1889  					handle:    op.state.image.handle,
  1890  				},
  1891  			})
  1892  		}
  1893  	}
  1894  }
  1895  
  1896  func (c *collector) hashOp(op paintOp) uint64 {
  1897  	c.hasher.Reset()
  1898  	for _, cl := range op.clipStack {
  1899  		k := cl.state
  1900  		keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k)))
  1901  		c.hasher.Write(keyBytes[:])
  1902  	}
  1903  	k := op.state
  1904  	keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k)))
  1905  	c.hasher.Write(keyBytes[:])
  1906  	return c.hasher.Sum64()
  1907  }
  1908  
  1909  func (g *compute) layer(viewport image.Point, texOps []textureOp) {
  1910  	// Sort ops from previous frames by hash.
  1911  	c := &g.collector
  1912  	prevOps := c.prevFrame.ops
  1913  	c.order = c.order[:0]
  1914  	for i, op := range prevOps {
  1915  		c.order = append(c.order, hashIndex{
  1916  			index: i,
  1917  			hash:  op.hash,
  1918  		})
  1919  	}
  1920  	sort.Slice(c.order, func(i, j int) bool {
  1921  		return c.order[i].hash < c.order[j].hash
  1922  	})
  1923  	// Split layers with different materials atlas; the compute stage has only
  1924  	// one materials slot.
  1925  	splitLayer := func(ops []paintOp, prevLayerIdx int) {
  1926  		for len(ops) > 0 {
  1927  			var materials *textureAtlas
  1928  			idx := 0
  1929  			for idx < len(ops) {
  1930  				if i := ops[idx].texOpIdx; i != -1 {
  1931  					omats := texOps[i].matAlloc.alloc.atlas
  1932  					if materials != nil && omats != nil && omats != materials {
  1933  						break
  1934  					}
  1935  					materials = omats
  1936  				}
  1937  				idx++
  1938  			}
  1939  			l := layer{ops: ops[:idx], materials: materials}
  1940  			if prevLayerIdx != -1 {
  1941  				prev := c.prevFrame.layers[prevLayerIdx]
  1942  				if !prev.alloc.dead && len(prev.ops) == len(l.ops) {
  1943  					l.alloc = prev.alloc
  1944  					l.materials = prev.materials
  1945  					g.touchAlloc(l.alloc)
  1946  				}
  1947  			}
  1948  			for i, op := range l.ops {
  1949  				l.rect = l.rect.Union(op.intersect.Round())
  1950  				l.ops[i].layer = len(c.frame.layers)
  1951  			}
  1952  			c.frame.layers = append(c.frame.layers, l)
  1953  			ops = ops[idx:]
  1954  		}
  1955  	}
  1956  	ops := c.frame.ops
  1957  	idx := 0
  1958  	for idx < len(ops) {
  1959  		op := ops[idx]
  1960  		// Search for longest matching op sequence.
  1961  		// start is the earliest index of a match.
  1962  		start := searchOp(c.order, op.hash)
  1963  		layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:])
  1964  		if len(layerOps) == 0 {
  1965  			idx++
  1966  			continue
  1967  		}
  1968  		if unmatched := ops[:idx]; len(unmatched) > 0 {
  1969  			// Flush layer of unmatched ops.
  1970  			splitLayer(unmatched, -1)
  1971  			ops = ops[idx:]
  1972  			idx = 0
  1973  		}
  1974  		splitLayer(layerOps, prevLayerIdx)
  1975  		ops = ops[len(layerOps):]
  1976  	}
  1977  	if len(ops) > 0 {
  1978  		splitLayer(ops, -1)
  1979  	}
  1980  }
  1981  
  1982  func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) {
  1983  	longest := 0
  1984  	longestIdx := -1
  1985  outer:
  1986  	for len(order) > 0 {
  1987  		first := order[0]
  1988  		order = order[1:]
  1989  		match := prev[first.index:]
  1990  		// Potential match found. Now find longest matching sequence.
  1991  		end := 0
  1992  		layer := match[0].layer
  1993  		off := match[0].offset.Sub(ops[0].offset)
  1994  		for end < len(match) && end < len(ops) {
  1995  			m := match[end]
  1996  			o := ops[end]
  1997  			// End layers on previous match.
  1998  			if m.layer != layer {
  1999  				break
  2000  			}
  2001  			// End layer when the next op doesn't match.
  2002  			if m.hash != o.hash {
  2003  				if end == 0 {
  2004  					// Hashes are sorted so if the first op doesn't match, no
  2005  					// more matches are possible.
  2006  					break outer
  2007  				}
  2008  				break
  2009  			}
  2010  			if !opEqual(off, m, o) {
  2011  				break
  2012  			}
  2013  			end++
  2014  		}
  2015  		if end > longest {
  2016  			longest = end
  2017  			longestIdx = layer
  2018  
  2019  		}
  2020  	}
  2021  	return ops[:longest], longestIdx
  2022  }
  2023  
  2024  func searchOp(order []hashIndex, hash uint64) int {
  2025  	lo, hi := 0, len(order)
  2026  	for lo < hi {
  2027  		mid := (lo + hi) / 2
  2028  		if order[mid].hash < hash {
  2029  			lo = mid + 1
  2030  		} else {
  2031  			hi = mid
  2032  		}
  2033  	}
  2034  	return lo
  2035  }
  2036  
  2037  func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool {
  2038  	if len(o1.clipStack) != len(o2.clipStack) {
  2039  		return false
  2040  	}
  2041  	if o1.state != o2.state {
  2042  		return false
  2043  	}
  2044  	if o1.offset.Sub(o2.offset) != off {
  2045  		return false
  2046  	}
  2047  	for i, cl1 := range o1.clipStack {
  2048  		cl2 := o2.clipStack[i]
  2049  		if len(cl1.path) != len(cl2.path) {
  2050  			return false
  2051  		}
  2052  		if cl1.state != cl2.state {
  2053  			return false
  2054  		}
  2055  		if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) {
  2056  			return false
  2057  		}
  2058  	}
  2059  	return true
  2060  }
  2061  
  2062  func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) {
  2063  	off := pos.Sub(l.rect.Min)
  2064  	offf := layout.FPt(off)
  2065  
  2066  	enc.transform(f32.Affine2D{}.Offset(offf))
  2067  	for _, op := range l.ops {
  2068  		encodeOp(viewport, off, enc, texOps, op)
  2069  	}
  2070  	enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1)))
  2071  }
  2072  
  2073  func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) {
  2074  	// Fill in clip bounds, which the shaders expect to be the union
  2075  	// of all affected bounds.
  2076  	var union f32.Rectangle
  2077  	for i, cl := range op.clipStack {
  2078  		union = union.Union(cl.absBounds)
  2079  		op.clipStack[i].union = union
  2080  	}
  2081  
  2082  	absOfff := layout.FPt(absOff)
  2083  	fillMode := scene.FillModeNonzero
  2084  	opOff := layout.FPt(op.offset)
  2085  	inv := f32.Affine2D{}.Offset(opOff)
  2086  	enc.transform(inv)
  2087  	for i := len(op.clipStack) - 1; i >= 0; i-- {
  2088  		cl := op.clipStack[i]
  2089  		if w := cl.state.strokeWidth; w > 0 {
  2090  			enc.fillMode(scene.FillModeStroke)
  2091  			enc.lineWidth(w)
  2092  			fillMode = scene.FillModeStroke
  2093  		} else if fillMode != scene.FillModeNonzero {
  2094  			enc.fillMode(scene.FillModeNonzero)
  2095  			fillMode = scene.FillModeNonzero
  2096  		}
  2097  		enc.transform(cl.state.relTrans)
  2098  		inv = inv.Mul(cl.state.relTrans)
  2099  		if len(cl.path) == 0 {
  2100  			enc.rect(cl.state.bounds)
  2101  		} else {
  2102  			enc.encodePath(cl.path, fillMode)
  2103  		}
  2104  		if i != 0 {
  2105  			enc.beginClip(cl.union.Add(absOfff))
  2106  		}
  2107  	}
  2108  	if len(op.clipStack) == 0 {
  2109  		// No clipping; fill the entire view.
  2110  		enc.rect(f32.Rectangle{Max: layout.FPt(viewport)})
  2111  	}
  2112  
  2113  	switch op.state.matType {
  2114  	case materialTexture:
  2115  		texOp := texOps[op.texOpIdx]
  2116  		off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff)
  2117  		enc.fillImage(0, off)
  2118  	case materialColor:
  2119  		enc.fillColor(f32color.NRGBAToRGBA(op.state.color))
  2120  	case materialLinearGradient:
  2121  		// TODO: implement.
  2122  		enc.fillColor(f32color.NRGBAToRGBA(op.state.color1))
  2123  	default:
  2124  		panic("not implemented")
  2125  	}
  2126  	enc.transform(inv.Invert())
  2127  	// Pop the clip stack, except the first entry used for fill.
  2128  	for i := 1; i < len(op.clipStack); i++ {
  2129  		cl := op.clipStack[i]
  2130  		enc.endClip(cl.union.Add(absOfff))
  2131  	}
  2132  	if fillMode != scene.FillModeNonzero {
  2133  		enc.fillMode(scene.FillModeNonzero)
  2134  	}
  2135  }
  2136  
  2137  func (c *collector) save(id int, state f32.Affine2D) {
  2138  	if extra := id - len(c.states) + 1; extra > 0 {
  2139  		c.states = append(c.states, make([]f32.Affine2D, extra)...)
  2140  	}
  2141  	c.states[id] = state
  2142  }
  2143  
  2144  func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle {
  2145  	return rectangle{
  2146  		t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)),
  2147  		t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)),
  2148  	}
  2149  }
  2150  
  2151  func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) {
  2152  	sx, hx, ox, hy, sy, oy := t.Elems()
  2153  	intx, fracx := math.Modf(float64(ox))
  2154  	inty, fracy := math.Modf(float64(oy))
  2155  	t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy))
  2156  	return t, image.Pt(int(intx), int(inty))
  2157  }
  2158  
  2159  func transformRect(t f32.Affine2D, r rectangle) rectangle {
  2160  	var tr rectangle
  2161  	for i, c := range r {
  2162  		tr[i] = t.Transform(c)
  2163  	}
  2164  	return tr
  2165  }
  2166  
  2167  func (r rectangle) In(b f32.Rectangle) bool {
  2168  	for _, c := range r {
  2169  		inside := b.Min.X <= c.X && c.X <= b.Max.X &&
  2170  			b.Min.Y <= c.Y && c.Y <= b.Max.Y
  2171  		if !inside {
  2172  			return false
  2173  		}
  2174  	}
  2175  	return true
  2176  }
  2177  
  2178  func (r rectangle) Contains(b f32.Rectangle) bool {
  2179  	return true
  2180  }
  2181  
  2182  func (r rectangle) Bounds() f32.Rectangle {
  2183  	bounds := f32.Rectangle{
  2184  		Min: f32.Pt(math.MaxFloat32, math.MaxFloat32),
  2185  		Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32),
  2186  	}
  2187  	for _, c := range r {
  2188  		if c.X < bounds.Min.X {
  2189  			bounds.Min.X = c.X
  2190  		}
  2191  		if c.Y < bounds.Min.Y {
  2192  			bounds.Min.Y = c.Y
  2193  		}
  2194  		if c.X > bounds.Max.X {
  2195  			bounds.Max.X = c.X
  2196  		}
  2197  		if c.Y > bounds.Max.Y {
  2198  			bounds.Max.Y = c.Y
  2199  		}
  2200  	}
  2201  	return bounds
  2202  }