gioui.org@v0.6.1-0.20240506124620-7a9ce51988ce/gpu/compute.go (about)

     1  // SPDX-License-Identifier: Unlicense OR MIT
     2  
     3  package gpu
     4  
     5  import (
     6  	"bytes"
     7  	"encoding/binary"
     8  	"errors"
     9  	"fmt"
    10  	"hash/maphash"
    11  	"image"
    12  	"image/color"
    13  	"image/draw"
    14  	"image/png"
    15  	"math"
    16  	"math/bits"
    17  	"os"
    18  	"runtime"
    19  	"sort"
    20  	"time"
    21  	"unsafe"
    22  
    23  	"gioui.org/cpu"
    24  	"gioui.org/gpu/internal/driver"
    25  	"gioui.org/internal/byteslice"
    26  	"gioui.org/internal/f32"
    27  	"gioui.org/internal/f32color"
    28  	"gioui.org/internal/ops"
    29  	"gioui.org/internal/scene"
    30  	"gioui.org/layout"
    31  	"gioui.org/op"
    32  	"gioui.org/shader"
    33  	"gioui.org/shader/gio"
    34  	"gioui.org/shader/piet"
    35  )
    36  
    37  type compute struct {
    38  	ctx driver.Device
    39  
    40  	collector     collector
    41  	enc           encoder
    42  	texOps        []textureOp
    43  	viewport      image.Point
    44  	maxTextureDim int
    45  	srgb          bool
    46  	atlases       []*textureAtlas
    47  	frameCount    uint
    48  	moves         []atlasMove
    49  
    50  	programs struct {
    51  		elements   computeProgram
    52  		tileAlloc  computeProgram
    53  		pathCoarse computeProgram
    54  		backdrop   computeProgram
    55  		binning    computeProgram
    56  		coarse     computeProgram
    57  		kernel4    computeProgram
    58  	}
    59  	buffers struct {
    60  		config sizedBuffer
    61  		scene  sizedBuffer
    62  		state  sizedBuffer
    63  		memory sizedBuffer
    64  	}
    65  	output struct {
    66  		blitPipeline driver.Pipeline
    67  
    68  		buffer sizedBuffer
    69  
    70  		uniforms *copyUniforms
    71  		uniBuf   driver.Buffer
    72  
    73  		layerVertices []layerVertex
    74  		descriptors   *piet.Kernel4DescriptorSetLayout
    75  
    76  		nullMaterials driver.Texture
    77  	}
    78  	// imgAllocs maps imageOpData.handles to allocs.
    79  	imgAllocs map[interface{}]*atlasAlloc
    80  	// materials contains the pre-processed materials (transformed images for
    81  	// now, gradients etc. later) packed in a texture atlas. The atlas is used
    82  	// as source in kernel4.
    83  	materials struct {
    84  		// allocs maps texture ops the their atlases and FillImage offsets.
    85  		allocs map[textureKey]materialAlloc
    86  
    87  		pipeline driver.Pipeline
    88  		buffer   sizedBuffer
    89  		quads    []materialVertex
    90  		uniforms struct {
    91  			u   *materialUniforms
    92  			buf driver.Buffer
    93  		}
    94  	}
    95  	timers struct {
    96  		t       *timers
    97  		compact *timer
    98  		render  *timer
    99  		blit    *timer
   100  	}
   101  
   102  	// CPU fallback fields.
   103  	useCPU     bool
   104  	dispatcher *dispatcher
   105  
   106  	// The following fields hold scratch space to avoid garbage.
   107  	zeroSlice []byte
   108  	memHeader *memoryHeader
   109  	conf      *config
   110  }
   111  
   112  type materialAlloc struct {
   113  	alloc  *atlasAlloc
   114  	offset image.Point
   115  }
   116  
   117  type layer struct {
   118  	rect      image.Rectangle
   119  	alloc     *atlasAlloc
   120  	ops       []paintOp
   121  	materials *textureAtlas
   122  }
   123  
   124  type allocQuery struct {
   125  	atlas     *textureAtlas
   126  	size      image.Point
   127  	empty     bool
   128  	format    driver.TextureFormat
   129  	bindings  driver.BufferBinding
   130  	nocompact bool
   131  }
   132  
   133  type atlasAlloc struct {
   134  	atlas      *textureAtlas
   135  	rect       image.Rectangle
   136  	cpu        bool
   137  	dead       bool
   138  	frameCount uint
   139  }
   140  
   141  type atlasMove struct {
   142  	src     *textureAtlas
   143  	dstPos  image.Point
   144  	srcRect image.Rectangle
   145  	cpu     bool
   146  }
   147  
   148  type textureAtlas struct {
   149  	image     driver.Texture
   150  	format    driver.TextureFormat
   151  	bindings  driver.BufferBinding
   152  	hasCPU    bool
   153  	cpuImage  cpu.ImageDescriptor
   154  	size      image.Point
   155  	allocs    []*atlasAlloc
   156  	packer    packer
   157  	realized  bool
   158  	lastFrame uint
   159  	compact   bool
   160  }
   161  
   162  type copyUniforms struct {
   163  	scale   [2]float32
   164  	pos     [2]float32
   165  	uvScale [2]float32
   166  	_       [8]byte // Pad to 16 bytes.
   167  }
   168  
   169  type materialUniforms struct {
   170  	scale       [2]float32
   171  	pos         [2]float32
   172  	emulatesRGB float32
   173  	_           [12]byte // Pad to 16 bytes
   174  }
   175  
   176  type collector struct {
   177  	hasher     maphash.Hash
   178  	reader     ops.Reader
   179  	states     []f32.Affine2D
   180  	clear      bool
   181  	clearColor f32color.RGBA
   182  	clipStates []clipState
   183  	order      []hashIndex
   184  	transStack []transEntry
   185  	prevFrame  opsCollector
   186  	frame      opsCollector
   187  }
   188  
   189  type transEntry struct {
   190  	t        f32.Affine2D
   191  	relTrans f32.Affine2D
   192  }
   193  
   194  type hashIndex struct {
   195  	index int
   196  	hash  uint64
   197  }
   198  
   199  type opsCollector struct {
   200  	paths    []byte
   201  	clipCmds []clipCmd
   202  	ops      []paintOp
   203  	layers   []layer
   204  }
   205  
   206  type paintOp struct {
   207  	clipStack []clipCmd
   208  	offset    image.Point
   209  	state     paintKey
   210  	intersect f32.Rectangle
   211  	hash      uint64
   212  	layer     int
   213  	texOpIdx  int
   214  }
   215  
   216  // clipCmd describes a clipping command ready to be used for the compute
   217  // pipeline.
   218  type clipCmd struct {
   219  	// union of the bounds of the operations that are clipped.
   220  	union     f32.Rectangle
   221  	state     clipKey
   222  	path      []byte
   223  	pathKey   ops.Key
   224  	absBounds f32.Rectangle
   225  }
   226  
   227  type encoderState struct {
   228  	relTrans f32.Affine2D
   229  	clip     *clipState
   230  
   231  	paintKey
   232  }
   233  
   234  // clipKey completely describes a clip operation (along with its path) and is appropriate
   235  // for hashing and equality checks.
   236  type clipKey struct {
   237  	bounds      f32.Rectangle
   238  	strokeWidth float32
   239  	relTrans    f32.Affine2D
   240  	pathHash    uint64
   241  }
   242  
   243  // paintKey completely defines a paint operation. It is suitable for hashing and
   244  // equality checks.
   245  type paintKey struct {
   246  	t       f32.Affine2D
   247  	matType materialType
   248  	// Current paint.ImageOp
   249  	image imageOpData
   250  	// Current paint.ColorOp, if any.
   251  	color color.NRGBA
   252  
   253  	// Current paint.LinearGradientOp.
   254  	stop1  f32.Point
   255  	stop2  f32.Point
   256  	color1 color.NRGBA
   257  	color2 color.NRGBA
   258  }
   259  
   260  type clipState struct {
   261  	absBounds f32.Rectangle
   262  	parent    *clipState
   263  	path      []byte
   264  	pathKey   ops.Key
   265  	intersect f32.Rectangle
   266  
   267  	clipKey
   268  }
   269  
   270  type layerVertex struct {
   271  	posX, posY float32
   272  	u, v       float32
   273  }
   274  
   275  // materialVertex describes a vertex of a quad used to render a transformed
   276  // material.
   277  type materialVertex struct {
   278  	posX, posY float32
   279  	u, v       float32
   280  }
   281  
   282  // textureKey identifies textureOp.
   283  type textureKey struct {
   284  	handle    interface{}
   285  	transform f32.Affine2D
   286  	bounds    image.Rectangle
   287  }
   288  
   289  // textureOp represents an paintOp that requires texture space.
   290  type textureOp struct {
   291  	img imageOpData
   292  	key textureKey
   293  	// offset is the integer offset separated from key.transform to increase cache hit rate.
   294  	off image.Point
   295  	// matAlloc is the atlas placement for material.
   296  	matAlloc materialAlloc
   297  	// imgAlloc is the atlas placement for the source image
   298  	imgAlloc *atlasAlloc
   299  }
   300  
   301  type encoder struct {
   302  	scene    []scene.Command
   303  	npath    int
   304  	npathseg int
   305  	ntrans   int
   306  }
   307  
   308  // sizedBuffer holds a GPU buffer, or its equivalent CPU memory.
   309  type sizedBuffer struct {
   310  	size   int
   311  	buffer driver.Buffer
   312  	// cpuBuf is initialized when useCPU is true.
   313  	cpuBuf cpu.BufferDescriptor
   314  }
   315  
   316  // computeProgram holds a compute program, or its equivalent CPU implementation.
   317  type computeProgram struct {
   318  	prog driver.Program
   319  
   320  	// CPU fields.
   321  	progInfo    *cpu.ProgramInfo
   322  	descriptors unsafe.Pointer
   323  	buffers     []*cpu.BufferDescriptor
   324  }
   325  
   326  // config matches Config in setup.h
   327  type config struct {
   328  	n_elements      uint32 // paths
   329  	n_pathseg       uint32
   330  	width_in_tiles  uint32
   331  	height_in_tiles uint32
   332  	tile_alloc      memAlloc
   333  	bin_alloc       memAlloc
   334  	ptcl_alloc      memAlloc
   335  	pathseg_alloc   memAlloc
   336  	anno_alloc      memAlloc
   337  	trans_alloc     memAlloc
   338  }
   339  
   340  // memAlloc matches Alloc in mem.h
   341  type memAlloc struct {
   342  	offset uint32
   343  	//size   uint32
   344  }
   345  
   346  // memoryHeader matches the header of Memory in mem.h.
   347  type memoryHeader struct {
   348  	mem_offset uint32
   349  	mem_error  uint32
   350  }
   351  
   352  // rect is a oriented rectangle.
   353  type rectangle [4]f32.Point
   354  
   355  const (
   356  	layersBindings    = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture
   357  	materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead
   358  	// Materials and layers can share texture storage if their bindings match.
   359  	combinedBindings = layersBindings | materialsBindings
   360  )
   361  
   362  // GPU structure sizes and constants.
   363  const (
   364  	tileWidthPx       = 32
   365  	tileHeightPx      = 32
   366  	ptclInitialAlloc  = 1024
   367  	kernel4OutputUnit = 2
   368  	kernel4AtlasUnit  = 3
   369  
   370  	pathSize    = 12
   371  	binSize     = 8
   372  	pathsegSize = 52
   373  	annoSize    = 32
   374  	transSize   = 24
   375  	stateSize   = 60
   376  	stateStride = 4 + 2*stateSize
   377  )
   378  
   379  // mem.h constants.
   380  const (
   381  	memNoError      = 0 // NO_ERROR
   382  	memMallocFailed = 1 // ERR_MALLOC_FAILED
   383  )
   384  
   385  func newCompute(ctx driver.Device) (*compute, error) {
   386  	caps := ctx.Caps()
   387  	maxDim := caps.MaxTextureSize
   388  	// Large atlas textures cause artifacts due to precision loss in
   389  	// shaders.
   390  	if cap := 8192; maxDim > cap {
   391  		maxDim = cap
   392  	}
   393  	// The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the
   394  	// complexity of a rectangular limit for later.
   395  	if computeCap := 4096; maxDim > computeCap {
   396  		maxDim = computeCap
   397  	}
   398  	g := &compute{
   399  		ctx:           ctx,
   400  		maxTextureDim: maxDim,
   401  		srgb:          caps.Features.Has(driver.FeatureSRGB),
   402  		conf:          new(config),
   403  		memHeader:     new(memoryHeader),
   404  	}
   405  	shaders := []struct {
   406  		prog *computeProgram
   407  		src  shader.Sources
   408  		info *cpu.ProgramInfo
   409  	}{
   410  		{&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo},
   411  		{&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo},
   412  		{&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo},
   413  		{&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo},
   414  		{&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo},
   415  		{&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo},
   416  		{&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo},
   417  	}
   418  	if !caps.Features.Has(driver.FeatureCompute) {
   419  		if !cpu.Supported {
   420  			return nil, errors.New("gpu: missing support for compute programs")
   421  		}
   422  		g.useCPU = true
   423  	}
   424  	if g.useCPU {
   425  		g.dispatcher = newDispatcher(runtime.NumCPU())
   426  	} else {
   427  		null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead)
   428  		if err != nil {
   429  			g.Release()
   430  			return nil, err
   431  		}
   432  		g.output.nullMaterials = null
   433  	}
   434  
   435  	copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag)
   436  	if err != nil {
   437  		g.Release()
   438  		return nil, err
   439  	}
   440  	defer copyVert.Release()
   441  	defer copyFrag.Release()
   442  	pipe, err := ctx.NewPipeline(driver.PipelineDesc{
   443  		VertexShader:   copyVert,
   444  		FragmentShader: copyFrag,
   445  		VertexLayout: driver.VertexLayout{
   446  			Inputs: []driver.InputDesc{
   447  				{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
   448  				{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
   449  			},
   450  			Stride: int(unsafe.Sizeof(g.output.layerVertices[0])),
   451  		},
   452  		PixelFormat: driver.TextureFormatOutput,
   453  		BlendDesc: driver.BlendDesc{
   454  			Enable:    true,
   455  			SrcFactor: driver.BlendFactorOne,
   456  			DstFactor: driver.BlendFactorOneMinusSrcAlpha,
   457  		},
   458  		Topology: driver.TopologyTriangles,
   459  	})
   460  	if err != nil {
   461  		g.Release()
   462  		return nil, err
   463  	}
   464  	g.output.blitPipeline = pipe
   465  	g.output.uniforms = new(copyUniforms)
   466  
   467  	buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms)))
   468  	if err != nil {
   469  		g.Release()
   470  		return nil, err
   471  	}
   472  	g.output.uniBuf = buf
   473  
   474  	materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag)
   475  	if err != nil {
   476  		g.Release()
   477  		return nil, err
   478  	}
   479  	defer materialVert.Release()
   480  	defer materialFrag.Release()
   481  	pipe, err = ctx.NewPipeline(driver.PipelineDesc{
   482  		VertexShader:   materialVert,
   483  		FragmentShader: materialFrag,
   484  		VertexLayout: driver.VertexLayout{
   485  			Inputs: []driver.InputDesc{
   486  				{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
   487  				{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
   488  			},
   489  			Stride: int(unsafe.Sizeof(g.materials.quads[0])),
   490  		},
   491  		PixelFormat: driver.TextureFormatRGBA8,
   492  		Topology:    driver.TopologyTriangles,
   493  	})
   494  	if err != nil {
   495  		g.Release()
   496  		return nil, err
   497  	}
   498  	g.materials.pipeline = pipe
   499  	g.materials.uniforms.u = new(materialUniforms)
   500  
   501  	buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u)))
   502  	if err != nil {
   503  		g.Release()
   504  		return nil, err
   505  	}
   506  	g.materials.uniforms.buf = buf
   507  
   508  	for _, shader := range shaders {
   509  		if !g.useCPU {
   510  			p, err := ctx.NewComputeProgram(shader.src)
   511  			if err != nil {
   512  				g.Release()
   513  				return nil, err
   514  			}
   515  			shader.prog.prog = p
   516  		} else {
   517  			shader.prog.progInfo = shader.info
   518  		}
   519  	}
   520  	if g.useCPU {
   521  		{
   522  			desc := new(piet.ElementsDescriptorSetLayout)
   523  			g.programs.elements.descriptors = unsafe.Pointer(desc)
   524  			g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()}
   525  		}
   526  		{
   527  			desc := new(piet.Tile_allocDescriptorSetLayout)
   528  			g.programs.tileAlloc.descriptors = unsafe.Pointer(desc)
   529  			g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   530  		}
   531  		{
   532  			desc := new(piet.Path_coarseDescriptorSetLayout)
   533  			g.programs.pathCoarse.descriptors = unsafe.Pointer(desc)
   534  			g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   535  		}
   536  		{
   537  			desc := new(piet.BackdropDescriptorSetLayout)
   538  			g.programs.backdrop.descriptors = unsafe.Pointer(desc)
   539  			g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   540  		}
   541  		{
   542  			desc := new(piet.BinningDescriptorSetLayout)
   543  			g.programs.binning.descriptors = unsafe.Pointer(desc)
   544  			g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   545  		}
   546  		{
   547  			desc := new(piet.CoarseDescriptorSetLayout)
   548  			g.programs.coarse.descriptors = unsafe.Pointer(desc)
   549  			g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   550  		}
   551  		{
   552  			desc := new(piet.Kernel4DescriptorSetLayout)
   553  			g.programs.kernel4.descriptors = unsafe.Pointer(desc)
   554  			g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()}
   555  			g.output.descriptors = desc
   556  		}
   557  	}
   558  	return g, nil
   559  }
   560  
   561  func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) {
   562  	vert, err = ctx.NewVertexShader(vsrc)
   563  	if err != nil {
   564  		return
   565  	}
   566  	frag, err = ctx.NewFragmentShader(fsrc)
   567  	if err != nil {
   568  		vert.Release()
   569  	}
   570  	return
   571  }
   572  
   573  func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error {
   574  	g.frameCount++
   575  	g.collect(viewport, frameOps)
   576  	return g.frame(target)
   577  }
   578  
   579  func (g *compute) collect(viewport image.Point, ops *op.Ops) {
   580  	g.viewport = viewport
   581  	g.collector.reset()
   582  
   583  	g.texOps = g.texOps[:0]
   584  	g.collector.collect(ops, viewport, &g.texOps)
   585  }
   586  
   587  func (g *compute) Clear(col color.NRGBA) {
   588  	g.collector.clear = true
   589  	g.collector.clearColor = f32color.LinearFromSRGB(col)
   590  }
   591  
   592  func (g *compute) frame(target RenderTarget) error {
   593  	viewport := g.viewport
   594  	defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport)
   595  	defer g.ctx.EndFrame()
   596  
   597  	t := &g.timers
   598  	if false && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
   599  		t.t = newTimers(g.ctx)
   600  		t.compact = t.t.newTimer()
   601  		t.render = t.t.newTimer()
   602  		t.blit = t.t.newTimer()
   603  	}
   604  
   605  	if err := g.uploadImages(); err != nil {
   606  		return err
   607  	}
   608  	if err := g.renderMaterials(); err != nil {
   609  		return err
   610  	}
   611  	g.layer(viewport, g.texOps)
   612  	t.render.begin()
   613  	if err := g.renderLayers(viewport); err != nil {
   614  		return err
   615  	}
   616  	t.render.end()
   617  	d := driver.LoadDesc{
   618  		ClearColor: g.collector.clearColor,
   619  	}
   620  	if g.collector.clear {
   621  		g.collector.clear = false
   622  		d.Action = driver.LoadActionClear
   623  	}
   624  	t.blit.begin()
   625  	g.blitLayers(d, defFBO, viewport)
   626  	t.blit.end()
   627  	t.compact.begin()
   628  	if err := g.compactAllocs(); err != nil {
   629  		return err
   630  	}
   631  	t.compact.end()
   632  	if false && t.t.ready() {
   633  		com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed
   634  		ft := com + ren + blit
   635  		q := 100 * time.Microsecond
   636  		ft = ft.Round(q)
   637  		com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q)
   638  		// t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit)
   639  	}
   640  	return nil
   641  }
   642  
   643  func (g *compute) dumpAtlases() {
   644  	for i, a := range g.atlases {
   645  		dump := image.NewRGBA(image.Rectangle{Max: a.size})
   646  		err := driver.DownloadImage(g.ctx, a.image, dump)
   647  		if err != nil {
   648  			panic(err)
   649  		}
   650  		nrgba := image.NewNRGBA(dump.Bounds())
   651  		draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src)
   652  		var buf bytes.Buffer
   653  		if err := png.Encode(&buf, nrgba); err != nil {
   654  			panic(err)
   655  		}
   656  		if err := os.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil {
   657  			panic(err)
   658  		}
   659  	}
   660  }
   661  
   662  func (g *compute) compactAllocs() error {
   663  	const (
   664  		maxAllocAge = 3
   665  		maxAtlasAge = 10
   666  	)
   667  	atlases := g.atlases
   668  	for _, a := range atlases {
   669  		if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge {
   670  			a.compact = true
   671  		}
   672  	}
   673  	for len(atlases) > 0 {
   674  		var (
   675  			dstAtlas *textureAtlas
   676  			format   driver.TextureFormat
   677  			bindings driver.BufferBinding
   678  		)
   679  		g.moves = g.moves[:0]
   680  		addedLayers := false
   681  		useCPU := false
   682  	fill:
   683  		for len(atlases) > 0 {
   684  			srcAtlas := atlases[0]
   685  			allocs := srcAtlas.allocs
   686  			if !srcAtlas.compact {
   687  				atlases = atlases[1:]
   688  				continue
   689  			}
   690  			if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) {
   691  				break
   692  			}
   693  			format = srcAtlas.format
   694  			bindings = srcAtlas.bindings
   695  			for len(srcAtlas.allocs) > 0 {
   696  				a := srcAtlas.allocs[0]
   697  				n := len(srcAtlas.allocs)
   698  				if g.frameCount-a.frameCount > maxAllocAge {
   699  					a.dead = true
   700  					srcAtlas.allocs[0] = srcAtlas.allocs[n-1]
   701  					srcAtlas.allocs = srcAtlas.allocs[:n-1]
   702  					continue
   703  				}
   704  				size := a.rect.Size()
   705  				alloc, fits := g.atlasAlloc(allocQuery{
   706  					atlas:     dstAtlas,
   707  					size:      size,
   708  					format:    format,
   709  					bindings:  bindings,
   710  					nocompact: true,
   711  				})
   712  				if !fits {
   713  					break fill
   714  				}
   715  				dstAtlas = alloc.atlas
   716  				allocs = append(allocs, a)
   717  				addedLayers = true
   718  				useCPU = useCPU || a.cpu
   719  				dstAtlas.allocs = append(dstAtlas.allocs, a)
   720  				pos := alloc.rect.Min
   721  				g.moves = append(g.moves, atlasMove{
   722  					src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu,
   723  				})
   724  				a.atlas = dstAtlas
   725  				a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())}
   726  				srcAtlas.allocs[0] = srcAtlas.allocs[n-1]
   727  				srcAtlas.allocs = srcAtlas.allocs[:n-1]
   728  			}
   729  			srcAtlas.compact = false
   730  			srcAtlas.realized = false
   731  			srcAtlas.packer.clear()
   732  			srcAtlas.packer.newPage()
   733  			srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim)
   734  			atlases = atlases[1:]
   735  		}
   736  		if !addedLayers {
   737  			break
   738  		}
   739  		outputSize := dstAtlas.packer.sizes[0]
   740  		if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil {
   741  			return err
   742  		}
   743  		for _, move := range g.moves {
   744  			if !move.cpu {
   745  				g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect)
   746  			} else {
   747  				src := move.src.cpuImage.Data()
   748  				dst := dstAtlas.cpuImage.Data()
   749  				sstride := move.src.size.X * 4
   750  				dstride := dstAtlas.size.X * 4
   751  				copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect)
   752  			}
   753  		}
   754  	}
   755  	for i := len(g.atlases) - 1; i >= 0; i-- {
   756  		a := g.atlases[i]
   757  		if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge {
   758  			a.Release()
   759  			n := len(g.atlases)
   760  			g.atlases[i] = g.atlases[n-1]
   761  			g.atlases = g.atlases[:n-1]
   762  		}
   763  	}
   764  	return nil
   765  }
   766  
   767  func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) {
   768  	sz := srcRect.Size()
   769  	soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4
   770  	doff := dstPos.Y*dstStride + dstPos.X*4
   771  	rowLen := sz.X * 4
   772  	for y := 0; y < sz.Y; y++ {
   773  		srow := src[soff : soff+rowLen]
   774  		drow := dst[doff : doff+rowLen]
   775  		copy(drow, srow)
   776  		soff += srcStride
   777  		doff += dstStride
   778  	}
   779  }
   780  
   781  func (g *compute) renderLayers(viewport image.Point) error {
   782  	layers := g.collector.frame.layers
   783  	for len(layers) > 0 {
   784  		var materials, dst *textureAtlas
   785  		addedLayers := false
   786  		g.enc.reset()
   787  		for len(layers) > 0 {
   788  			l := &layers[0]
   789  			if l.alloc != nil {
   790  				layers = layers[1:]
   791  				continue
   792  			}
   793  			if materials != nil {
   794  				if l.materials != nil && materials != l.materials {
   795  					// Only one materials texture per compute pass.
   796  					break
   797  				}
   798  			} else {
   799  				materials = l.materials
   800  			}
   801  			size := l.rect.Size()
   802  			alloc, fits := g.atlasAlloc(allocQuery{
   803  				atlas:    dst,
   804  				empty:    true,
   805  				format:   driver.TextureFormatRGBA8,
   806  				bindings: combinedBindings,
   807  				// Pad to avoid overlap.
   808  				size: size.Add(image.Pt(1, 1)),
   809  			})
   810  			if !fits {
   811  				// Only one output atlas per compute pass.
   812  				break
   813  			}
   814  			dst = alloc.atlas
   815  			dst.compact = true
   816  			addedLayers = true
   817  			l.alloc = &alloc
   818  			dst.allocs = append(dst.allocs, l.alloc)
   819  			encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps)
   820  			layers = layers[1:]
   821  		}
   822  		if !addedLayers {
   823  			break
   824  		}
   825  		outputSize := dst.packer.sizes[0]
   826  		tileDims := image.Point{
   827  			X: (outputSize.X + tileWidthPx - 1) / tileWidthPx,
   828  			Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx,
   829  		}
   830  		w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx
   831  		if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil {
   832  			return err
   833  		}
   834  		if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil {
   835  			return err
   836  		}
   837  	}
   838  	return nil
   839  }
   840  
   841  func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) {
   842  	layers := g.collector.frame.layers
   843  	g.output.layerVertices = g.output.layerVertices[:0]
   844  	for _, l := range layers {
   845  		placef := layout.FPt(l.alloc.rect.Min)
   846  		sizef := layout.FPt(l.rect.Size())
   847  		r := f32.FRect(l.rect)
   848  		quad := [4]layerVertex{
   849  			{posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y},
   850  			{posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y},
   851  			{posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y},
   852  			{posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y},
   853  		}
   854  		g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1])
   855  		g.ctx.PrepareTexture(l.alloc.atlas.image)
   856  	}
   857  	if len(g.output.layerVertices) > 0 {
   858  		vertexData := byteslice.Slice(g.output.layerVertices)
   859  		g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData))
   860  		g.output.buffer.buffer.Upload(vertexData)
   861  	}
   862  	g.ctx.BeginRenderPass(fbo, d)
   863  	defer g.ctx.EndRenderPass()
   864  	if len(layers) == 0 {
   865  		return
   866  	}
   867  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
   868  	g.ctx.BindPipeline(g.output.blitPipeline)
   869  	g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0)
   870  	start := 0
   871  	for len(layers) > 0 {
   872  		count := 0
   873  		atlas := layers[0].alloc.atlas
   874  		for len(layers) > 0 {
   875  			l := layers[0]
   876  			if l.alloc.atlas != atlas {
   877  				break
   878  			}
   879  			layers = layers[1:]
   880  			const verticesPerQuad = 6
   881  			count += verticesPerQuad
   882  		}
   883  
   884  		// Transform positions to clip space: [-1, -1] - [1, 1], and texture
   885  		// coordinates to texture space: [0, 0] - [1, 1].
   886  		clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1))
   887  		sx, _, ox, _, sy, oy := clip.Elems()
   888  		g.output.uniforms.scale = [2]float32{sx, sy}
   889  		g.output.uniforms.pos = [2]float32{ox, oy}
   890  		g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)}
   891  		g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms))
   892  		g.ctx.BindUniforms(g.output.uniBuf)
   893  		g.ctx.BindTexture(0, atlas.image)
   894  		g.ctx.DrawArrays(start, count)
   895  		start += count
   896  	}
   897  }
   898  
   899  func (g *compute) renderMaterials() error {
   900  	m := &g.materials
   901  	for k, place := range m.allocs {
   902  		if place.alloc.dead {
   903  			delete(m.allocs, k)
   904  		}
   905  	}
   906  	texOps := g.texOps
   907  	for len(texOps) > 0 {
   908  		m.quads = m.quads[:0]
   909  		var (
   910  			atlas    *textureAtlas
   911  			imgAtlas *textureAtlas
   912  		)
   913  		// A material is clipped to avoid drawing outside its atlas bounds.
   914  		// However, imprecision in the clipping may cause a single pixel
   915  		// overflow.
   916  		var padding = image.Pt(1, 1)
   917  		var allocStart int
   918  		for len(texOps) > 0 {
   919  			op := &texOps[0]
   920  			if a, exists := m.allocs[op.key]; exists {
   921  				g.touchAlloc(a.alloc)
   922  				op.matAlloc = a
   923  				texOps = texOps[1:]
   924  				continue
   925  			}
   926  
   927  			if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas {
   928  				// Only one image atlas per render pass.
   929  				break
   930  			}
   931  			imgAtlas = op.imgAlloc.atlas
   932  			quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min)
   933  			boundsf := quadBounds(quad)
   934  			bounds := boundsf.Round()
   935  			bounds = bounds.Intersect(op.key.bounds)
   936  
   937  			size := bounds.Size()
   938  			alloc, fits := g.atlasAlloc(allocQuery{
   939  				atlas:    atlas,
   940  				size:     size.Add(padding),
   941  				format:   driver.TextureFormatRGBA8,
   942  				bindings: combinedBindings,
   943  			})
   944  			if !fits {
   945  				break
   946  			}
   947  			if atlas == nil {
   948  				allocStart = len(alloc.atlas.allocs)
   949  			}
   950  			atlas = alloc.atlas
   951  			alloc.cpu = g.useCPU
   952  			offsetf := layout.FPt(bounds.Min.Mul(-1))
   953  			scale := f32.Pt(float32(size.X), float32(size.Y))
   954  			for i := range quad {
   955  				// Position quad to match place.
   956  				quad[i].posX += offsetf.X
   957  				quad[i].posY += offsetf.Y
   958  				// Scale to match viewport [0, 1].
   959  				quad[i].posX /= scale.X
   960  				quad[i].posY /= scale.Y
   961  			}
   962  			// Draw quad as two triangles.
   963  			m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2])
   964  			if m.allocs == nil {
   965  				m.allocs = make(map[textureKey]materialAlloc)
   966  			}
   967  			atlasAlloc := materialAlloc{
   968  				alloc:  &alloc,
   969  				offset: bounds.Min.Mul(-1),
   970  			}
   971  			atlas.allocs = append(atlas.allocs, atlasAlloc.alloc)
   972  			m.allocs[op.key] = atlasAlloc
   973  			op.matAlloc = atlasAlloc
   974  			texOps = texOps[1:]
   975  		}
   976  		if len(m.quads) == 0 {
   977  			break
   978  		}
   979  		realized := atlas.realized
   980  		if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil {
   981  			return err
   982  		}
   983  		// Transform to clip space: [-1, -1] - [1, 1].
   984  		*m.uniforms.u = materialUniforms{
   985  			scale: [2]float32{2, 2},
   986  			pos:   [2]float32{-1, -1},
   987  		}
   988  		if !g.srgb {
   989  			m.uniforms.u.emulatesRGB = 1.0
   990  		}
   991  		m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u))
   992  		vertexData := byteslice.Slice(m.quads)
   993  		n := pow2Ceil(len(vertexData))
   994  		m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n)
   995  		m.buffer.buffer.Upload(vertexData)
   996  		var d driver.LoadDesc
   997  		if !realized {
   998  			d.Action = driver.LoadActionClear
   999  		}
  1000  		g.ctx.PrepareTexture(imgAtlas.image)
  1001  		g.ctx.BeginRenderPass(atlas.image, d)
  1002  		g.ctx.BindTexture(0, imgAtlas.image)
  1003  		g.ctx.BindPipeline(m.pipeline)
  1004  		g.ctx.BindUniforms(m.uniforms.buf)
  1005  		g.ctx.BindVertexBuffer(m.buffer.buffer, 0)
  1006  		newAllocs := atlas.allocs[allocStart:]
  1007  		for i, a := range newAllocs {
  1008  			sz := a.rect.Size().Sub(padding)
  1009  			g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y)
  1010  			g.ctx.DrawArrays(i*6, 6)
  1011  		}
  1012  		g.ctx.EndRenderPass()
  1013  		if !g.useCPU {
  1014  			continue
  1015  		}
  1016  		src := atlas.image
  1017  		data := atlas.cpuImage.Data()
  1018  		for _, a := range newAllocs {
  1019  			stride := atlas.size.X * 4
  1020  			col := a.rect.Min.X * 4
  1021  			row := stride * a.rect.Min.Y
  1022  			off := col + row
  1023  			src.ReadPixels(a.rect, data[off:], stride)
  1024  		}
  1025  	}
  1026  	return nil
  1027  }
  1028  
  1029  func (g *compute) uploadImages() error {
  1030  	for k, a := range g.imgAllocs {
  1031  		if a.dead {
  1032  			delete(g.imgAllocs, k)
  1033  		}
  1034  	}
  1035  	type upload struct {
  1036  		pos image.Point
  1037  		img *image.RGBA
  1038  	}
  1039  	var uploads []upload
  1040  	format := driver.TextureFormatSRGBA
  1041  	if !g.srgb {
  1042  		format = driver.TextureFormatRGBA8
  1043  	}
  1044  	// padding is the number of pixels added to the right and below
  1045  	// images, to avoid atlas filtering artifacts.
  1046  	const padding = 1
  1047  	texOps := g.texOps
  1048  	for len(texOps) > 0 {
  1049  		uploads = uploads[:0]
  1050  		var atlas *textureAtlas
  1051  		for len(texOps) > 0 {
  1052  			op := &texOps[0]
  1053  			if a, exists := g.imgAllocs[op.img.handle]; exists {
  1054  				g.touchAlloc(a)
  1055  				op.imgAlloc = a
  1056  				texOps = texOps[1:]
  1057  				continue
  1058  			}
  1059  			size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding))
  1060  			alloc, fits := g.atlasAlloc(allocQuery{
  1061  				atlas:    atlas,
  1062  				size:     size,
  1063  				format:   format,
  1064  				bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer,
  1065  			})
  1066  			if !fits {
  1067  				break
  1068  			}
  1069  			atlas = alloc.atlas
  1070  			if g.imgAllocs == nil {
  1071  				g.imgAllocs = make(map[interface{}]*atlasAlloc)
  1072  			}
  1073  			op.imgAlloc = &alloc
  1074  			atlas.allocs = append(atlas.allocs, op.imgAlloc)
  1075  			g.imgAllocs[op.img.handle] = op.imgAlloc
  1076  			uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src})
  1077  			texOps = texOps[1:]
  1078  		}
  1079  		if len(uploads) == 0 {
  1080  			break
  1081  		}
  1082  		if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil {
  1083  			return err
  1084  		}
  1085  		for _, u := range uploads {
  1086  			size := u.img.Bounds().Size()
  1087  			driver.UploadImage(atlas.image, u.pos, u.img)
  1088  			rightPadding := image.Pt(padding, size.Y)
  1089  			atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0)
  1090  			bottomPadding := image.Pt(size.X, padding)
  1091  			atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0)
  1092  		}
  1093  	}
  1094  	return nil
  1095  }
  1096  
  1097  func pow2Ceil(v int) int {
  1098  	exp := bits.Len(uint(v))
  1099  	if bits.OnesCount(uint(v)) == 1 {
  1100  		exp--
  1101  	}
  1102  	return 1 << exp
  1103  }
  1104  
  1105  // materialQuad constructs a quad that represents the transformed image. It returns the quad
  1106  // and its bounds.
  1107  func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex {
  1108  	imgSize := layout.FPt(img.src.Bounds().Size())
  1109  	sx, hx, ox, hy, sy, oy := M.Elems()
  1110  	transOff := f32.Pt(ox, oy)
  1111  	// The 4 corners of the image rectangle transformed by M, excluding its offset, are:
  1112  	//
  1113  	// q0: M * (0, 0)   q3: M * (w, 0)
  1114  	// q1: M * (0, h)   q2: M * (w, h)
  1115  	//
  1116  	// Note that q0 = M*0 = 0, q2 = q1 + q3.
  1117  	q0 := f32.Pt(0, 0)
  1118  	q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y)
  1119  	q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X)
  1120  	q2 := q1.Add(q3)
  1121  	q0 = q0.Add(transOff)
  1122  	q1 = q1.Add(transOff)
  1123  	q2 = q2.Add(transOff)
  1124  	q3 = q3.Add(transOff)
  1125  
  1126  	uvPosf := layout.FPt(uvPos)
  1127  	atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y))
  1128  	uvBounds := f32.Rectangle{
  1129  		Min: uvPosf,
  1130  		Max: uvPosf.Add(imgSize),
  1131  	}
  1132  	uvBounds.Min.X *= atlasScale.X
  1133  	uvBounds.Min.Y *= atlasScale.Y
  1134  	uvBounds.Max.X *= atlasScale.X
  1135  	uvBounds.Max.Y *= atlasScale.Y
  1136  	quad := [4]materialVertex{
  1137  		{posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y},
  1138  		{posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y},
  1139  		{posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y},
  1140  		{posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y},
  1141  	}
  1142  	return quad
  1143  }
  1144  
  1145  func quadBounds(q [4]materialVertex) f32.Rectangle {
  1146  	q0 := f32.Pt(q[0].posX, q[0].posY)
  1147  	q1 := f32.Pt(q[1].posX, q[1].posY)
  1148  	q2 := f32.Pt(q[2].posX, q[2].posY)
  1149  	q3 := f32.Pt(q[3].posX, q[3].posY)
  1150  	return f32.Rectangle{
  1151  		Min: min(min(q0, q1), min(q2, q3)),
  1152  		Max: max(max(q0, q1), max(q2, q3)),
  1153  	}
  1154  }
  1155  
  1156  func max(p1, p2 f32.Point) f32.Point {
  1157  	p := p1
  1158  	if p2.X > p.X {
  1159  		p.X = p2.X
  1160  	}
  1161  	if p2.Y > p.Y {
  1162  		p.Y = p2.Y
  1163  	}
  1164  	return p
  1165  }
  1166  
  1167  func min(p1, p2 f32.Point) f32.Point {
  1168  	p := p1
  1169  	if p2.X < p.X {
  1170  		p.X = p2.X
  1171  	}
  1172  	if p2.Y < p.Y {
  1173  		p.Y = p2.Y
  1174  	}
  1175  	return p
  1176  }
  1177  
  1178  func (enc *encoder) encodePath(verts []byte, fillMode int) {
  1179  	for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] {
  1180  		cmd := ops.DecodeCommand(verts[4:])
  1181  		if cmd.Op() == scene.OpGap {
  1182  			if fillMode != scene.FillModeNonzero {
  1183  				// Skip gaps in strokes.
  1184  				continue
  1185  			}
  1186  			// Replace them by a straight line in outlines.
  1187  			cmd = scene.Line(scene.DecodeGap(cmd))
  1188  		}
  1189  		enc.scene = append(enc.scene, cmd)
  1190  		enc.npathseg++
  1191  	}
  1192  }
  1193  
  1194  func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error {
  1195  	const (
  1196  		// wgSize is the largest and most common workgroup size.
  1197  		wgSize = 128
  1198  		// PARTITION_SIZE from elements.comp
  1199  		partitionSize = 32 * 4
  1200  	)
  1201  	widthInBins := (tileDims.X + 15) / 16
  1202  	heightInBins := (tileDims.Y + 7) / 8
  1203  	if widthInBins*heightInBins > wgSize {
  1204  		return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx)
  1205  	}
  1206  
  1207  	enc := &g.enc
  1208  	// Pad scene with zeroes to avoid reading garbage in elements.comp.
  1209  	scenePadding := partitionSize - len(enc.scene)%partitionSize
  1210  	enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...)
  1211  
  1212  	scene := byteslice.Slice(enc.scene)
  1213  	if s := len(scene); s > g.buffers.scene.size {
  1214  		paddedCap := s * 11 / 10
  1215  		if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil {
  1216  			return err
  1217  		}
  1218  	}
  1219  	g.buffers.scene.upload(scene)
  1220  
  1221  	// alloc is the number of allocated bytes for static buffers.
  1222  	var alloc uint32
  1223  	round := func(v, quantum int) int {
  1224  		return (v + quantum - 1) &^ (quantum - 1)
  1225  	}
  1226  	malloc := func(size int) memAlloc {
  1227  		size = round(size, 4)
  1228  		offset := alloc
  1229  		alloc += uint32(size)
  1230  		return memAlloc{offset /*, uint32(size)*/}
  1231  	}
  1232  
  1233  	*g.conf = config{
  1234  		n_elements:      uint32(enc.npath),
  1235  		n_pathseg:       uint32(enc.npathseg),
  1236  		width_in_tiles:  uint32(tileDims.X),
  1237  		height_in_tiles: uint32(tileDims.Y),
  1238  		tile_alloc:      malloc(enc.npath * pathSize),
  1239  		bin_alloc:       malloc(round(enc.npath, wgSize) * binSize),
  1240  		ptcl_alloc:      malloc(tileDims.X * tileDims.Y * ptclInitialAlloc),
  1241  		pathseg_alloc:   malloc(enc.npathseg * pathsegSize),
  1242  		anno_alloc:      malloc(enc.npath * annoSize),
  1243  		trans_alloc:     malloc(enc.ntrans * transSize),
  1244  	}
  1245  
  1246  	numPartitions := (enc.numElements() + 127) / 128
  1247  	// clearSize is the atomic partition counter plus flag and 2 states per partition.
  1248  	clearSize := 4 + numPartitions*stateStride
  1249  	if clearSize > g.buffers.state.size {
  1250  		paddedCap := clearSize * 11 / 10
  1251  		if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil {
  1252  			return err
  1253  		}
  1254  	}
  1255  
  1256  	confData := byteslice.Struct(g.conf)
  1257  	g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData))
  1258  	g.buffers.config.upload(confData)
  1259  
  1260  	minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc)
  1261  	if minSize > g.buffers.memory.size {
  1262  		// Add space for dynamic GPU allocations.
  1263  		const sizeBump = 4 * 1024 * 1024
  1264  		minSize += sizeBump
  1265  		if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil {
  1266  			return err
  1267  		}
  1268  	}
  1269  
  1270  	for {
  1271  		*g.memHeader = memoryHeader{
  1272  			mem_offset: alloc,
  1273  		}
  1274  		g.buffers.memory.upload(byteslice.Struct(g.memHeader))
  1275  		g.buffers.state.upload(g.zeros(clearSize))
  1276  
  1277  		if !g.useCPU {
  1278  			g.ctx.BeginCompute()
  1279  			g.ctx.BindImageTexture(kernel4OutputUnit, dst)
  1280  			img := g.output.nullMaterials
  1281  			if images != nil {
  1282  				img = images.image
  1283  			}
  1284  			g.ctx.BindImageTexture(kernel4AtlasUnit, img)
  1285  		} else {
  1286  			*g.output.descriptors.Binding2() = cpuDst
  1287  			if images != nil {
  1288  				*g.output.descriptors.Binding3() = images.cpuImage
  1289  			}
  1290  		}
  1291  
  1292  		g.bindBuffers()
  1293  		g.memoryBarrier()
  1294  		g.dispatch(g.programs.elements, numPartitions, 1, 1)
  1295  		g.memoryBarrier()
  1296  		g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1)
  1297  		g.memoryBarrier()
  1298  		g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1)
  1299  		g.memoryBarrier()
  1300  		g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1)
  1301  		// No barrier needed between backdrop and binning.
  1302  		g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1)
  1303  		g.memoryBarrier()
  1304  		g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1)
  1305  		g.memoryBarrier()
  1306  		g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1)
  1307  		g.memoryBarrier()
  1308  		if !g.useCPU {
  1309  			g.ctx.EndCompute()
  1310  		} else {
  1311  			g.dispatcher.Sync()
  1312  		}
  1313  
  1314  		if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil {
  1315  			if err == driver.ErrContentLost {
  1316  				continue
  1317  			}
  1318  			return err
  1319  		}
  1320  		switch errCode := g.memHeader.mem_error; errCode {
  1321  		case memNoError:
  1322  			if g.useCPU {
  1323  				w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx
  1324  				dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride)
  1325  			}
  1326  			return nil
  1327  		case memMallocFailed:
  1328  			// Resize memory and try again.
  1329  			sz := g.buffers.memory.size * 15 / 10
  1330  			if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil {
  1331  				return err
  1332  			}
  1333  			continue
  1334  		default:
  1335  			return fmt.Errorf("compute: shader program failed with error %d", errCode)
  1336  		}
  1337  	}
  1338  }
  1339  
  1340  func (g *compute) memoryBarrier() {
  1341  	if g.useCPU {
  1342  		g.dispatcher.Barrier()
  1343  	}
  1344  }
  1345  
  1346  func (g *compute) dispatch(p computeProgram, x, y, z int) {
  1347  	if !g.useCPU {
  1348  		g.ctx.BindProgram(p.prog)
  1349  		g.ctx.DispatchCompute(x, y, z)
  1350  	} else {
  1351  		g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z)
  1352  	}
  1353  }
  1354  
  1355  // zeros returns a byte slice with size bytes of zeros.
  1356  func (g *compute) zeros(size int) []byte {
  1357  	if cap(g.zeroSlice) < size {
  1358  		g.zeroSlice = append(g.zeroSlice, make([]byte, size)...)
  1359  	}
  1360  	return g.zeroSlice[:size]
  1361  }
  1362  
  1363  func (g *compute) touchAlloc(a *atlasAlloc) {
  1364  	if a.dead {
  1365  		panic("re-use of dead allocation")
  1366  	}
  1367  	a.frameCount = g.frameCount
  1368  	a.atlas.lastFrame = a.frameCount
  1369  }
  1370  
  1371  func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) {
  1372  	var (
  1373  		place placement
  1374  		fits  bool
  1375  		atlas = q.atlas
  1376  	)
  1377  	if atlas != nil {
  1378  		place, fits = atlas.packer.tryAdd(q.size)
  1379  		if !fits {
  1380  			atlas.compact = true
  1381  		}
  1382  	}
  1383  	if atlas == nil {
  1384  		// Look for matching atlas to re-use.
  1385  		for _, a := range g.atlases {
  1386  			if q.empty && len(a.allocs) > 0 {
  1387  				continue
  1388  			}
  1389  			if q.nocompact && a.compact {
  1390  				continue
  1391  			}
  1392  			if a.format != q.format || a.bindings&q.bindings != q.bindings {
  1393  				continue
  1394  			}
  1395  			place, fits = a.packer.tryAdd(q.size)
  1396  			if !fits {
  1397  				a.compact = true
  1398  				continue
  1399  			}
  1400  			atlas = a
  1401  			break
  1402  		}
  1403  	}
  1404  	if atlas == nil {
  1405  		atlas = &textureAtlas{
  1406  			format:   q.format,
  1407  			bindings: q.bindings,
  1408  		}
  1409  		atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim)
  1410  		atlas.packer.newPage()
  1411  		g.atlases = append(g.atlases, atlas)
  1412  		place, fits = atlas.packer.tryAdd(q.size)
  1413  		if !fits {
  1414  			panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size))
  1415  		}
  1416  	}
  1417  	if !fits {
  1418  		return atlasAlloc{}, false
  1419  	}
  1420  	atlas.lastFrame = g.frameCount
  1421  	return atlasAlloc{
  1422  		frameCount: g.frameCount,
  1423  		atlas:      atlas,
  1424  		rect:       image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)},
  1425  	}, true
  1426  }
  1427  
  1428  func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error {
  1429  	defer func() {
  1430  		atlas.packer.maxDims = atlas.size
  1431  		atlas.realized = true
  1432  		atlas.ensureCPUImage(useCPU)
  1433  	}()
  1434  	if atlas.size.X >= size.X && atlas.size.Y >= size.Y {
  1435  		return nil
  1436  	}
  1437  	if atlas.realized {
  1438  		panic("resizing a realized atlas")
  1439  	}
  1440  	if err := atlas.resize(g.ctx, size); err != nil {
  1441  		return err
  1442  	}
  1443  	return nil
  1444  }
  1445  
  1446  func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error {
  1447  	a.Release()
  1448  
  1449  	img, err := ctx.NewTexture(a.format, size.X, size.Y,
  1450  		driver.FilterNearest,
  1451  		driver.FilterNearest,
  1452  		a.bindings)
  1453  	if err != nil {
  1454  		return err
  1455  	}
  1456  	a.image = img
  1457  	a.size = size
  1458  	return nil
  1459  }
  1460  
  1461  func (a *textureAtlas) ensureCPUImage(useCPU bool) {
  1462  	if !useCPU || a.hasCPU {
  1463  		return
  1464  	}
  1465  	a.hasCPU = true
  1466  	a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y)
  1467  }
  1468  
  1469  func (g *compute) Release() {
  1470  	if g.useCPU {
  1471  		g.dispatcher.Stop()
  1472  	}
  1473  	type resource interface {
  1474  		Release()
  1475  	}
  1476  	res := []resource{
  1477  		g.output.nullMaterials,
  1478  		&g.programs.elements,
  1479  		&g.programs.tileAlloc,
  1480  		&g.programs.pathCoarse,
  1481  		&g.programs.backdrop,
  1482  		&g.programs.binning,
  1483  		&g.programs.coarse,
  1484  		&g.programs.kernel4,
  1485  		g.output.blitPipeline,
  1486  		&g.output.buffer,
  1487  		g.output.uniBuf,
  1488  		&g.buffers.scene,
  1489  		&g.buffers.state,
  1490  		&g.buffers.memory,
  1491  		&g.buffers.config,
  1492  		g.materials.pipeline,
  1493  		&g.materials.buffer,
  1494  		g.materials.uniforms.buf,
  1495  		g.timers.t,
  1496  	}
  1497  	for _, r := range res {
  1498  		if r != nil {
  1499  			r.Release()
  1500  		}
  1501  	}
  1502  	for _, a := range g.atlases {
  1503  		a.Release()
  1504  	}
  1505  	g.ctx.Release()
  1506  	*g = compute{}
  1507  }
  1508  
  1509  func (a *textureAtlas) Release() {
  1510  	if a.image != nil {
  1511  		a.image.Release()
  1512  		a.image = nil
  1513  	}
  1514  	a.cpuImage.Free()
  1515  	a.hasCPU = false
  1516  }
  1517  
  1518  func (g *compute) bindBuffers() {
  1519  	g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state)
  1520  	g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config)
  1521  	g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config)
  1522  	g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config)
  1523  	g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config)
  1524  	g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config)
  1525  	g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config)
  1526  }
  1527  
  1528  func (p *computeProgram) Release() {
  1529  	if p.prog != nil {
  1530  		p.prog.Release()
  1531  	}
  1532  	*p = computeProgram{}
  1533  }
  1534  
  1535  func (b *sizedBuffer) Release() {
  1536  	if b.buffer != nil {
  1537  		b.buffer.Release()
  1538  	}
  1539  	b.cpuBuf.Free()
  1540  	*b = sizedBuffer{}
  1541  }
  1542  
  1543  func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error {
  1544  	if b.size >= size {
  1545  		return nil
  1546  	}
  1547  	if b.buffer != nil {
  1548  		b.Release()
  1549  	}
  1550  	b.cpuBuf.Free()
  1551  	if !useCPU {
  1552  		buf, err := ctx.NewBuffer(binding, size)
  1553  		if err != nil {
  1554  			return err
  1555  		}
  1556  		b.buffer = buf
  1557  	} else {
  1558  		b.cpuBuf = cpu.NewBuffer(size)
  1559  	}
  1560  	b.size = size
  1561  	return nil
  1562  }
  1563  
  1564  func (b *sizedBuffer) download(data []byte) error {
  1565  	if b.buffer != nil {
  1566  		return b.buffer.Download(data)
  1567  	} else {
  1568  		copy(data, b.cpuBuf.Data())
  1569  		return nil
  1570  	}
  1571  }
  1572  
  1573  func (b *sizedBuffer) upload(data []byte) {
  1574  	if b.buffer != nil {
  1575  		b.buffer.Upload(data)
  1576  	} else {
  1577  		copy(b.cpuBuf.Data(), data)
  1578  	}
  1579  }
  1580  
  1581  func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) {
  1582  	for i, buf := range buffers {
  1583  		if !g.useCPU {
  1584  			g.ctx.BindStorageBuffer(i, buf.buffer)
  1585  		} else {
  1586  			*prog.buffers[i] = buf.cpuBuf
  1587  		}
  1588  	}
  1589  }
  1590  
  1591  var bo = binary.LittleEndian
  1592  
  1593  func (e *encoder) reset() {
  1594  	e.scene = e.scene[:0]
  1595  	e.npath = 0
  1596  	e.npathseg = 0
  1597  	e.ntrans = 0
  1598  }
  1599  
  1600  func (e *encoder) numElements() int {
  1601  	return len(e.scene)
  1602  }
  1603  
  1604  func (e *encoder) transform(m f32.Affine2D) {
  1605  	e.scene = append(e.scene, scene.Transform(m))
  1606  	e.ntrans++
  1607  }
  1608  
  1609  func (e *encoder) lineWidth(width float32) {
  1610  	e.scene = append(e.scene, scene.SetLineWidth(width))
  1611  }
  1612  
  1613  func (e *encoder) fillMode(mode scene.FillMode) {
  1614  	e.scene = append(e.scene, scene.SetFillMode(mode))
  1615  }
  1616  
  1617  func (e *encoder) beginClip(bbox f32.Rectangle) {
  1618  	e.scene = append(e.scene, scene.BeginClip(bbox))
  1619  	e.npath++
  1620  }
  1621  
  1622  func (e *encoder) endClip(bbox f32.Rectangle) {
  1623  	e.scene = append(e.scene, scene.EndClip(bbox))
  1624  	e.npath++
  1625  }
  1626  
  1627  func (e *encoder) rect(r f32.Rectangle) {
  1628  	// Rectangle corners, clock-wise.
  1629  	c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y)
  1630  	e.line(c0, c1)
  1631  	e.line(c1, c2)
  1632  	e.line(c2, c3)
  1633  	e.line(c3, c0)
  1634  }
  1635  
  1636  func (e *encoder) fillColor(col color.RGBA) {
  1637  	e.scene = append(e.scene, scene.FillColor(col))
  1638  	e.npath++
  1639  }
  1640  
  1641  func (e *encoder) fillImage(index int, offset image.Point) {
  1642  	e.scene = append(e.scene, scene.FillImage(index, offset))
  1643  	e.npath++
  1644  }
  1645  
  1646  func (e *encoder) line(start, end f32.Point) {
  1647  	e.scene = append(e.scene, scene.Line(start, end))
  1648  	e.npathseg++
  1649  }
  1650  
  1651  func (c *collector) reset() {
  1652  	c.prevFrame, c.frame = c.frame, c.prevFrame
  1653  	c.clipStates = c.clipStates[:0]
  1654  	c.transStack = c.transStack[:0]
  1655  	c.frame.reset()
  1656  }
  1657  
  1658  func (c *opsCollector) reset() {
  1659  	c.paths = c.paths[:0]
  1660  	c.clipCmds = c.clipCmds[:0]
  1661  	c.ops = c.ops[:0]
  1662  	c.layers = c.layers[:0]
  1663  }
  1664  
  1665  func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) {
  1666  	// Rectangle clip regions.
  1667  	if len(path) == 0 && !push {
  1668  		// If the rectangular clip region contains a previous path it can be discarded.
  1669  		p := state.clip
  1670  		t := state.relTrans.Invert()
  1671  		for p != nil {
  1672  			// rect is the parent bounds transformed relative to the rectangle.
  1673  			rect := transformBounds(t, p.bounds)
  1674  			if rect.In(bounds) {
  1675  				return
  1676  			}
  1677  			t = p.relTrans.Invert().Mul(t)
  1678  			p = p.parent
  1679  		}
  1680  	}
  1681  
  1682  	absBounds := transformBounds(state.t, bounds).Bounds()
  1683  	intersect := absBounds
  1684  	if state.clip != nil {
  1685  		intersect = state.clip.intersect.Intersect(intersect)
  1686  	}
  1687  	c.clipStates = append(c.clipStates, clipState{
  1688  		parent:    state.clip,
  1689  		absBounds: absBounds,
  1690  		path:      path,
  1691  		pathKey:   key,
  1692  		intersect: intersect,
  1693  		clipKey: clipKey{
  1694  			bounds:      bounds,
  1695  			relTrans:    state.relTrans,
  1696  			strokeWidth: strokeWidth,
  1697  			pathHash:    hash,
  1698  		},
  1699  	})
  1700  	state.clip = &c.clipStates[len(c.clipStates)-1]
  1701  	state.relTrans = f32.Affine2D{}
  1702  }
  1703  
  1704  func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) {
  1705  	fview := f32.Rectangle{Max: layout.FPt(viewport)}
  1706  	var intOps *ops.Ops
  1707  	if root != nil {
  1708  		intOps = &root.Internal
  1709  	}
  1710  	c.reader.Reset(intOps)
  1711  	var state encoderState
  1712  	reset := func() {
  1713  		state = encoderState{
  1714  			paintKey: paintKey{
  1715  				color: color.NRGBA{A: 0xff},
  1716  			},
  1717  		}
  1718  	}
  1719  	reset()
  1720  	r := &c.reader
  1721  	var (
  1722  		pathData struct {
  1723  			data []byte
  1724  			key  ops.Key
  1725  			hash uint64
  1726  		}
  1727  		strWidth float32
  1728  	)
  1729  	c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false)
  1730  	for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() {
  1731  		switch ops.OpType(encOp.Data[0]) {
  1732  		case ops.TypeTransform:
  1733  			dop, push := ops.DecodeTransform(encOp.Data)
  1734  			if push {
  1735  				c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans})
  1736  			}
  1737  			state.t = state.t.Mul(dop)
  1738  			state.relTrans = state.relTrans.Mul(dop)
  1739  		case ops.TypePopTransform:
  1740  			n := len(c.transStack)
  1741  			st := c.transStack[n-1]
  1742  			c.transStack = c.transStack[:n-1]
  1743  			state.t = st.t
  1744  			state.relTrans = st.relTrans
  1745  		case ops.TypeStroke:
  1746  			strWidth = decodeStrokeOp(encOp.Data)
  1747  		case ops.TypePath:
  1748  			hash := bo.Uint64(encOp.Data[1:])
  1749  			encOp, ok = r.Decode()
  1750  			if !ok {
  1751  				panic("unexpected end of path operation")
  1752  			}
  1753  			pathData.data = encOp.Data[ops.TypeAuxLen:]
  1754  			pathData.key = encOp.Key
  1755  			pathData.hash = hash
  1756  		case ops.TypeClip:
  1757  			var op ops.ClipOp
  1758  			op.Decode(encOp.Data)
  1759  			bounds := f32.FRect(op.Bounds)
  1760  			c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true)
  1761  			pathData.data = nil
  1762  			strWidth = 0
  1763  		case ops.TypePopClip:
  1764  			state.relTrans = state.clip.relTrans.Mul(state.relTrans)
  1765  			state.clip = state.clip.parent
  1766  		case ops.TypeColor:
  1767  			state.matType = materialColor
  1768  			state.color = decodeColorOp(encOp.Data)
  1769  		case ops.TypeLinearGradient:
  1770  			state.matType = materialLinearGradient
  1771  			op := decodeLinearGradientOp(encOp.Data)
  1772  			state.stop1 = op.stop1
  1773  			state.stop2 = op.stop2
  1774  			state.color1 = op.color1
  1775  			state.color2 = op.color2
  1776  		case ops.TypeImage:
  1777  			state.matType = materialTexture
  1778  			state.image = decodeImageOp(encOp.Data, encOp.Refs)
  1779  		case ops.TypePaint:
  1780  			paintState := state
  1781  			if paintState.matType == materialTexture {
  1782  				// Clip to the bounds of the image, to hide other images in the atlas.
  1783  				sz := state.image.src.Rect.Size()
  1784  				bounds := f32.Rectangle{Max: layout.FPt(sz)}
  1785  				c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false)
  1786  			}
  1787  			intersect := paintState.clip.intersect
  1788  			if intersect.Empty() {
  1789  				break
  1790  			}
  1791  
  1792  			// If the paint is a uniform opaque color that takes up the whole
  1793  			// screen, it covers all previous paints and we can discard all
  1794  			// rendering commands recorded so far.
  1795  			if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 {
  1796  				c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque()
  1797  				c.clear = true
  1798  				c.frame.reset()
  1799  				break
  1800  			}
  1801  
  1802  			// Flatten clip stack.
  1803  			p := paintState.clip
  1804  			startIdx := len(c.frame.clipCmds)
  1805  			for p != nil {
  1806  				idx := len(c.frame.paths)
  1807  				c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...)
  1808  				path := c.frame.paths[idx:]
  1809  				copy(path, p.path)
  1810  				c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{
  1811  					state:     p.clipKey,
  1812  					path:      path,
  1813  					pathKey:   p.pathKey,
  1814  					absBounds: p.absBounds,
  1815  				})
  1816  				p = p.parent
  1817  			}
  1818  			clipStack := c.frame.clipCmds[startIdx:]
  1819  			c.frame.ops = append(c.frame.ops, paintOp{
  1820  				clipStack: clipStack,
  1821  				state:     paintState.paintKey,
  1822  				intersect: intersect,
  1823  			})
  1824  		case ops.TypeSave:
  1825  			id := ops.DecodeSave(encOp.Data)
  1826  			c.save(id, state.t)
  1827  		case ops.TypeLoad:
  1828  			reset()
  1829  			id := ops.DecodeLoad(encOp.Data)
  1830  			state.t = c.states[id]
  1831  			state.relTrans = state.t
  1832  		}
  1833  	}
  1834  	for i := range c.frame.ops {
  1835  		op := &c.frame.ops[i]
  1836  		// For each clip, cull rectangular clip regions that contain its
  1837  		// (transformed) bounds. addClip already handled the converse case.
  1838  		// TODO: do better than O(n²) to efficiently deal with deep stacks.
  1839  		for j := 0; j < len(op.clipStack)-1; j++ {
  1840  			cl := op.clipStack[j]
  1841  			p := cl.state
  1842  			r := transformBounds(p.relTrans, p.bounds)
  1843  			for k := j + 1; k < len(op.clipStack); k++ {
  1844  				cl2 := op.clipStack[k]
  1845  				p2 := cl2.state
  1846  				if len(cl2.path) == 0 && r.In(cl2.state.bounds) {
  1847  					op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...)
  1848  					k--
  1849  					op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans)
  1850  				}
  1851  				r = transformRect(p2.relTrans, r)
  1852  			}
  1853  		}
  1854  		// Separate the integer offset from the first transform. Two ops that differ
  1855  		// only in integer offsets may share backing storage.
  1856  		if len(op.clipStack) > 0 {
  1857  			c := &op.clipStack[len(op.clipStack)-1]
  1858  			t := c.state.relTrans
  1859  			t, off := separateTransform(t)
  1860  			c.state.relTrans = t
  1861  			op.offset = off
  1862  			op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1)))
  1863  		}
  1864  		op.hash = c.hashOp(*op)
  1865  		op.texOpIdx = -1
  1866  		switch op.state.matType {
  1867  		case materialTexture:
  1868  			op.texOpIdx = len(*texOps)
  1869  			// Separate integer offset from transformation. TextureOps that have identical transforms
  1870  			// except for their integer offsets can share a transformed image.
  1871  			t := op.state.t.Offset(layout.FPt(op.offset))
  1872  			t, off := separateTransform(t)
  1873  			bounds := op.intersect.Round().Sub(off)
  1874  			*texOps = append(*texOps, textureOp{
  1875  				img: op.state.image,
  1876  				off: off,
  1877  				key: textureKey{
  1878  					bounds:    bounds,
  1879  					transform: t,
  1880  					handle:    op.state.image.handle,
  1881  				},
  1882  			})
  1883  		}
  1884  	}
  1885  }
  1886  
  1887  func (c *collector) hashOp(op paintOp) uint64 {
  1888  	c.hasher.Reset()
  1889  	for _, cl := range op.clipStack {
  1890  		k := cl.state
  1891  		keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k)))
  1892  		c.hasher.Write(keyBytes[:])
  1893  	}
  1894  	k := op.state
  1895  	keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k)))
  1896  	c.hasher.Write(keyBytes[:])
  1897  	return c.hasher.Sum64()
  1898  }
  1899  
  1900  func (g *compute) layer(viewport image.Point, texOps []textureOp) {
  1901  	// Sort ops from previous frames by hash.
  1902  	c := &g.collector
  1903  	prevOps := c.prevFrame.ops
  1904  	c.order = c.order[:0]
  1905  	for i, op := range prevOps {
  1906  		c.order = append(c.order, hashIndex{
  1907  			index: i,
  1908  			hash:  op.hash,
  1909  		})
  1910  	}
  1911  	sort.Slice(c.order, func(i, j int) bool {
  1912  		return c.order[i].hash < c.order[j].hash
  1913  	})
  1914  	// Split layers with different materials atlas; the compute stage has only
  1915  	// one materials slot.
  1916  	splitLayer := func(ops []paintOp, prevLayerIdx int) {
  1917  		for len(ops) > 0 {
  1918  			var materials *textureAtlas
  1919  			idx := 0
  1920  			for idx < len(ops) {
  1921  				if i := ops[idx].texOpIdx; i != -1 {
  1922  					omats := texOps[i].matAlloc.alloc.atlas
  1923  					if materials != nil && omats != nil && omats != materials {
  1924  						break
  1925  					}
  1926  					materials = omats
  1927  				}
  1928  				idx++
  1929  			}
  1930  			l := layer{ops: ops[:idx], materials: materials}
  1931  			if prevLayerIdx != -1 {
  1932  				prev := c.prevFrame.layers[prevLayerIdx]
  1933  				if !prev.alloc.dead && len(prev.ops) == len(l.ops) {
  1934  					l.alloc = prev.alloc
  1935  					l.materials = prev.materials
  1936  					g.touchAlloc(l.alloc)
  1937  				}
  1938  			}
  1939  			for i, op := range l.ops {
  1940  				l.rect = l.rect.Union(op.intersect.Round())
  1941  				l.ops[i].layer = len(c.frame.layers)
  1942  			}
  1943  			c.frame.layers = append(c.frame.layers, l)
  1944  			ops = ops[idx:]
  1945  		}
  1946  	}
  1947  	ops := c.frame.ops
  1948  	idx := 0
  1949  	for idx < len(ops) {
  1950  		op := ops[idx]
  1951  		// Search for longest matching op sequence.
  1952  		// start is the earliest index of a match.
  1953  		start := searchOp(c.order, op.hash)
  1954  		layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:])
  1955  		if len(layerOps) == 0 {
  1956  			idx++
  1957  			continue
  1958  		}
  1959  		if unmatched := ops[:idx]; len(unmatched) > 0 {
  1960  			// Flush layer of unmatched ops.
  1961  			splitLayer(unmatched, -1)
  1962  			ops = ops[idx:]
  1963  			idx = 0
  1964  		}
  1965  		splitLayer(layerOps, prevLayerIdx)
  1966  		ops = ops[len(layerOps):]
  1967  	}
  1968  	if len(ops) > 0 {
  1969  		splitLayer(ops, -1)
  1970  	}
  1971  }
  1972  
  1973  func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) {
  1974  	longest := 0
  1975  	longestIdx := -1
  1976  outer:
  1977  	for len(order) > 0 {
  1978  		first := order[0]
  1979  		order = order[1:]
  1980  		match := prev[first.index:]
  1981  		// Potential match found. Now find longest matching sequence.
  1982  		end := 0
  1983  		layer := match[0].layer
  1984  		off := match[0].offset.Sub(ops[0].offset)
  1985  		for end < len(match) && end < len(ops) {
  1986  			m := match[end]
  1987  			o := ops[end]
  1988  			// End layers on previous match.
  1989  			if m.layer != layer {
  1990  				break
  1991  			}
  1992  			// End layer when the next op doesn't match.
  1993  			if m.hash != o.hash {
  1994  				if end == 0 {
  1995  					// Hashes are sorted so if the first op doesn't match, no
  1996  					// more matches are possible.
  1997  					break outer
  1998  				}
  1999  				break
  2000  			}
  2001  			if !opEqual(off, m, o) {
  2002  				break
  2003  			}
  2004  			end++
  2005  		}
  2006  		if end > longest {
  2007  			longest = end
  2008  			longestIdx = layer
  2009  
  2010  		}
  2011  	}
  2012  	return ops[:longest], longestIdx
  2013  }
  2014  
  2015  func searchOp(order []hashIndex, hash uint64) int {
  2016  	lo, hi := 0, len(order)
  2017  	for lo < hi {
  2018  		mid := (lo + hi) / 2
  2019  		if order[mid].hash < hash {
  2020  			lo = mid + 1
  2021  		} else {
  2022  			hi = mid
  2023  		}
  2024  	}
  2025  	return lo
  2026  }
  2027  
  2028  func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool {
  2029  	if len(o1.clipStack) != len(o2.clipStack) {
  2030  		return false
  2031  	}
  2032  	if o1.state != o2.state {
  2033  		return false
  2034  	}
  2035  	if o1.offset.Sub(o2.offset) != off {
  2036  		return false
  2037  	}
  2038  	for i, cl1 := range o1.clipStack {
  2039  		cl2 := o2.clipStack[i]
  2040  		if len(cl1.path) != len(cl2.path) {
  2041  			return false
  2042  		}
  2043  		if cl1.state != cl2.state {
  2044  			return false
  2045  		}
  2046  		if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) {
  2047  			return false
  2048  		}
  2049  	}
  2050  	return true
  2051  }
  2052  
  2053  func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) {
  2054  	off := pos.Sub(l.rect.Min)
  2055  	offf := layout.FPt(off)
  2056  
  2057  	enc.transform(f32.Affine2D{}.Offset(offf))
  2058  	for _, op := range l.ops {
  2059  		encodeOp(viewport, off, enc, texOps, op)
  2060  	}
  2061  	enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1)))
  2062  }
  2063  
  2064  func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) {
  2065  	// Fill in clip bounds, which the shaders expect to be the union
  2066  	// of all affected bounds.
  2067  	var union f32.Rectangle
  2068  	for i, cl := range op.clipStack {
  2069  		union = union.Union(cl.absBounds)
  2070  		op.clipStack[i].union = union
  2071  	}
  2072  
  2073  	absOfff := layout.FPt(absOff)
  2074  	fillMode := scene.FillModeNonzero
  2075  	opOff := layout.FPt(op.offset)
  2076  	inv := f32.Affine2D{}.Offset(opOff)
  2077  	enc.transform(inv)
  2078  	for i := len(op.clipStack) - 1; i >= 0; i-- {
  2079  		cl := op.clipStack[i]
  2080  		if w := cl.state.strokeWidth; w > 0 {
  2081  			enc.fillMode(scene.FillModeStroke)
  2082  			enc.lineWidth(w)
  2083  			fillMode = scene.FillModeStroke
  2084  		} else if fillMode != scene.FillModeNonzero {
  2085  			enc.fillMode(scene.FillModeNonzero)
  2086  			fillMode = scene.FillModeNonzero
  2087  		}
  2088  		enc.transform(cl.state.relTrans)
  2089  		inv = inv.Mul(cl.state.relTrans)
  2090  		if len(cl.path) == 0 {
  2091  			enc.rect(cl.state.bounds)
  2092  		} else {
  2093  			enc.encodePath(cl.path, fillMode)
  2094  		}
  2095  		if i != 0 {
  2096  			enc.beginClip(cl.union.Add(absOfff))
  2097  		}
  2098  	}
  2099  	if len(op.clipStack) == 0 {
  2100  		// No clipping; fill the entire view.
  2101  		enc.rect(f32.Rectangle{Max: layout.FPt(viewport)})
  2102  	}
  2103  
  2104  	switch op.state.matType {
  2105  	case materialTexture:
  2106  		texOp := texOps[op.texOpIdx]
  2107  		off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff)
  2108  		enc.fillImage(0, off)
  2109  	case materialColor:
  2110  		enc.fillColor(f32color.NRGBAToRGBA(op.state.color))
  2111  	case materialLinearGradient:
  2112  		// TODO: implement.
  2113  		enc.fillColor(f32color.NRGBAToRGBA(op.state.color1))
  2114  	default:
  2115  		panic("not implemented")
  2116  	}
  2117  	enc.transform(inv.Invert())
  2118  	// Pop the clip stack, except the first entry used for fill.
  2119  	for i := 1; i < len(op.clipStack); i++ {
  2120  		cl := op.clipStack[i]
  2121  		enc.endClip(cl.union.Add(absOfff))
  2122  	}
  2123  	if fillMode != scene.FillModeNonzero {
  2124  		enc.fillMode(scene.FillModeNonzero)
  2125  	}
  2126  }
  2127  
  2128  func (c *collector) save(id int, state f32.Affine2D) {
  2129  	if extra := id - len(c.states) + 1; extra > 0 {
  2130  		c.states = append(c.states, make([]f32.Affine2D, extra)...)
  2131  	}
  2132  	c.states[id] = state
  2133  }
  2134  
  2135  func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle {
  2136  	return rectangle{
  2137  		t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)),
  2138  		t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)),
  2139  	}
  2140  }
  2141  
  2142  func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) {
  2143  	sx, hx, ox, hy, sy, oy := t.Elems()
  2144  	intx, fracx := math.Modf(float64(ox))
  2145  	inty, fracy := math.Modf(float64(oy))
  2146  	t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy))
  2147  	return t, image.Pt(int(intx), int(inty))
  2148  }
  2149  
  2150  func transformRect(t f32.Affine2D, r rectangle) rectangle {
  2151  	var tr rectangle
  2152  	for i, c := range r {
  2153  		tr[i] = t.Transform(c)
  2154  	}
  2155  	return tr
  2156  }
  2157  
  2158  func (r rectangle) In(b f32.Rectangle) bool {
  2159  	for _, c := range r {
  2160  		inside := b.Min.X <= c.X && c.X <= b.Max.X &&
  2161  			b.Min.Y <= c.Y && c.Y <= b.Max.Y
  2162  		if !inside {
  2163  			return false
  2164  		}
  2165  	}
  2166  	return true
  2167  }
  2168  
  2169  func (r rectangle) Contains(b f32.Rectangle) bool {
  2170  	return true
  2171  }
  2172  
  2173  func (r rectangle) Bounds() f32.Rectangle {
  2174  	bounds := f32.Rectangle{
  2175  		Min: f32.Pt(math.MaxFloat32, math.MaxFloat32),
  2176  		Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32),
  2177  	}
  2178  	for _, c := range r {
  2179  		if c.X < bounds.Min.X {
  2180  			bounds.Min.X = c.X
  2181  		}
  2182  		if c.Y < bounds.Min.Y {
  2183  			bounds.Min.Y = c.Y
  2184  		}
  2185  		if c.X > bounds.Max.X {
  2186  			bounds.Max.X = c.X
  2187  		}
  2188  		if c.Y > bounds.Max.Y {
  2189  			bounds.Max.Y = c.Y
  2190  		}
  2191  	}
  2192  	return bounds
  2193  }