github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/gpu.go (about)

     1  // SPDX-License-Identifier: Unlicense OR MIT
     2  
     3  /*
     4  Package gpu implements the rendering of Gio drawing operations. It
     5  is used by package app and package app/headless and is otherwise not
     6  useful except for integrating with external window implementations.
     7  */
     8  package gpu
     9  
    10  import (
    11  	"encoding/binary"
    12  	"errors"
    13  	"fmt"
    14  	"image"
    15  	"image/color"
    16  	"math"
    17  	"os"
    18  	"reflect"
    19  	"time"
    20  	"unsafe"
    21  
    22  	"github.com/cybriq/giocore/f32"
    23  	"github.com/cybriq/giocore/gpu/internal/driver"
    24  	"github.com/cybriq/giocore/internal/byteslice"
    25  	"github.com/cybriq/giocore/internal/f32color"
    26  	"github.com/cybriq/giocore/internal/opconst"
    27  	"github.com/cybriq/giocore/internal/ops"
    28  	"github.com/cybriq/giocore/internal/scene"
    29  	"github.com/cybriq/giocore/internal/stroke"
    30  	"github.com/cybriq/giocore/op"
    31  	"github.com/cybriq/giocore/op/clip"
    32  	layout "github.com/cybriq/giocore/utils"
    33  
    34  	// Register backends.
    35  	_ "github.com/cybriq/giocore/gpu/internal/d3d11"
    36  	_ "github.com/cybriq/giocore/gpu/internal/opengl"
    37  )
    38  
    39  type GPU interface {
    40  	// Release non-Go resources. The GPU is no longer valid after Release.
    41  	Release()
    42  	// Clear sets the clear color for the next Frame.
    43  	Clear(color color.NRGBA)
    44  	// Collect the graphics operations from frame, given the viewport.
    45  	Collect(viewport image.Point, frame *op.Ops)
    46  	// Frame clears the color buffer and draws the collected operations.
    47  	Frame() error
    48  	// Profile returns the last available profiling information. Profiling
    49  	// information is requested when Collect sees a ProfileOp, and the result
    50  	// is available through Profile at some later time.
    51  	Profile() string
    52  }
    53  
    54  type gpu struct {
    55  	cache *resourceCache
    56  
    57  	profile                                           string
    58  	timers                                            *timers
    59  	frameStart                                        time.Time
    60  	zopsTimer, stencilTimer, coverTimer, cleanupTimer *timer
    61  	drawOps                                           drawOps
    62  	ctx                                               driver.Device
    63  	renderer                                          *renderer
    64  }
    65  
    66  type renderer struct {
    67  	ctx           driver.Device
    68  	blitter       *blitter
    69  	pather        *pather
    70  	packer        packer
    71  	intersections packer
    72  }
    73  
    74  type drawOps struct {
    75  	profile    bool
    76  	reader     ops.Reader
    77  	states     []drawState
    78  	cache      *resourceCache
    79  	vertCache  []byte
    80  	viewport   image.Point
    81  	clear      bool
    82  	clearColor f32color.RGBA
    83  	// allImageOps is the combined list of imageOps and
    84  	// zimageOps, in drawing order.
    85  	allImageOps []imageOp
    86  	imageOps    []imageOp
    87  	// zimageOps are the rectangle clipped opaque images
    88  	// that can use fast front-to-back rendering with z-test
    89  	// and no blending.
    90  	zimageOps   []imageOp
    91  	pathOps     []*pathOp
    92  	pathOpCache []pathOp
    93  	qs          quadSplitter
    94  	pathCache   *opCache
    95  	// hack for the compute renderer to access
    96  	// converted path data.
    97  	compute bool
    98  }
    99  
   100  type drawState struct {
   101  	clip  f32.Rectangle
   102  	t     f32.Affine2D
   103  	cpath *pathOp
   104  	rect  bool
   105  
   106  	matType materialType
   107  	// Current paint.ImageOp
   108  	image imageOpData
   109  	// Current paint.ColorOp, if any.
   110  	color color.NRGBA
   111  
   112  	// Current paint.LinearGradientOp.
   113  	stop1  f32.Point
   114  	stop2  f32.Point
   115  	color1 color.NRGBA
   116  	color2 color.NRGBA
   117  }
   118  
   119  type pathOp struct {
   120  	off f32.Point
   121  	// clip is the union of all
   122  	// later clip rectangles.
   123  	clip      image.Rectangle
   124  	bounds    f32.Rectangle
   125  	pathKey   ops.Key
   126  	path      bool
   127  	pathVerts []byte
   128  	parent    *pathOp
   129  	place     placement
   130  
   131  	// For compute
   132  	trans  f32.Affine2D
   133  	stroke clip.StrokeStyle
   134  }
   135  
   136  type imageOp struct {
   137  	z        float32
   138  	path     *pathOp
   139  	clip     image.Rectangle
   140  	material material
   141  	clipType clipType
   142  	place    placement
   143  }
   144  
   145  func decodeStrokeOp(data []byte) clip.StrokeStyle {
   146  	_ = data[4]
   147  	if opconst.OpType(data[0]) != opconst.TypeStroke {
   148  		panic("invalid op")
   149  	}
   150  	bo := binary.LittleEndian
   151  	return clip.StrokeStyle{
   152  		Width: math.Float32frombits(bo.Uint32(data[1:])),
   153  	}
   154  }
   155  
   156  type quadsOp struct {
   157  	key ops.Key
   158  	aux []byte
   159  }
   160  
   161  type material struct {
   162  	material materialType
   163  	opaque   bool
   164  	// For materialTypeColor.
   165  	color f32color.RGBA
   166  	// For materialTypeLinearGradient.
   167  	color1 f32color.RGBA
   168  	color2 f32color.RGBA
   169  	// For materialTypeTexture.
   170  	data    imageOpData
   171  	uvTrans f32.Affine2D
   172  
   173  	// For the compute backend.
   174  	trans f32.Affine2D
   175  }
   176  
   177  // clipOp is the shadow of clip.Op.
   178  type clipOp struct {
   179  	// TODO: Use image.Rectangle?
   180  	bounds  f32.Rectangle
   181  	outline bool
   182  }
   183  
   184  // imageOpData is the shadow of paint.ImageOp.
   185  type imageOpData struct {
   186  	src    *image.RGBA
   187  	handle interface{}
   188  }
   189  
   190  type linearGradientOpData struct {
   191  	stop1  f32.Point
   192  	color1 color.NRGBA
   193  	stop2  f32.Point
   194  	color2 color.NRGBA
   195  }
   196  
   197  func (op *clipOp) decode(data []byte) {
   198  	if opconst.OpType(data[0]) != opconst.TypeClip {
   199  		panic("invalid op")
   200  	}
   201  	bo := binary.LittleEndian
   202  	r := image.Rectangle{
   203  		Min: image.Point{
   204  			X: int(int32(bo.Uint32(data[1:]))),
   205  			Y: int(int32(bo.Uint32(data[5:]))),
   206  		},
   207  		Max: image.Point{
   208  			X: int(int32(bo.Uint32(data[9:]))),
   209  			Y: int(int32(bo.Uint32(data[13:]))),
   210  		},
   211  	}
   212  	*op = clipOp{
   213  		bounds:  layout.FRect(r),
   214  		outline: data[17] == 1,
   215  	}
   216  }
   217  
   218  func decodeImageOp(data []byte, refs []interface{}) imageOpData {
   219  	if opconst.OpType(data[0]) != opconst.TypeImage {
   220  		panic("invalid op")
   221  	}
   222  	handle := refs[1]
   223  	if handle == nil {
   224  		return imageOpData{}
   225  	}
   226  	return imageOpData{
   227  		src:    refs[0].(*image.RGBA),
   228  		handle: handle,
   229  	}
   230  }
   231  
   232  func decodeColorOp(data []byte) color.NRGBA {
   233  	if opconst.OpType(data[0]) != opconst.TypeColor {
   234  		panic("invalid op")
   235  	}
   236  	return color.NRGBA{
   237  		R: data[1],
   238  		G: data[2],
   239  		B: data[3],
   240  		A: data[4],
   241  	}
   242  }
   243  
   244  func decodeLinearGradientOp(data []byte) linearGradientOpData {
   245  	if opconst.OpType(data[0]) != opconst.TypeLinearGradient {
   246  		panic("invalid op")
   247  	}
   248  	bo := binary.LittleEndian
   249  	return linearGradientOpData{
   250  		stop1: f32.Point{
   251  			X: math.Float32frombits(bo.Uint32(data[1:])),
   252  			Y: math.Float32frombits(bo.Uint32(data[5:])),
   253  		},
   254  		stop2: f32.Point{
   255  			X: math.Float32frombits(bo.Uint32(data[9:])),
   256  			Y: math.Float32frombits(bo.Uint32(data[13:])),
   257  		},
   258  		color1: color.NRGBA{
   259  			R: data[17+0],
   260  			G: data[17+1],
   261  			B: data[17+2],
   262  			A: data[17+3],
   263  		},
   264  		color2: color.NRGBA{
   265  			R: data[21+0],
   266  			G: data[21+1],
   267  			B: data[21+2],
   268  			A: data[21+3],
   269  		},
   270  	}
   271  }
   272  
   273  type clipType uint8
   274  
   275  type resource interface {
   276  	release()
   277  }
   278  
   279  type texture struct {
   280  	src *image.RGBA
   281  	tex driver.Texture
   282  }
   283  
   284  type blitter struct {
   285  	ctx                    driver.Device
   286  	viewport               image.Point
   287  	prog                   [3]*program
   288  	layout                 driver.InputLayout
   289  	colUniforms            *blitColUniforms
   290  	texUniforms            *blitTexUniforms
   291  	linearGradientUniforms *blitLinearGradientUniforms
   292  	quadVerts              driver.Buffer
   293  }
   294  
   295  type blitColUniforms struct {
   296  	vert struct {
   297  		blitUniforms
   298  		_ [12]byte // Padding to a multiple of 16.
   299  	}
   300  	frag struct {
   301  		colorUniforms
   302  	}
   303  }
   304  
   305  type blitTexUniforms struct {
   306  	vert struct {
   307  		blitUniforms
   308  		_ [12]byte // Padding to a multiple of 16.
   309  	}
   310  }
   311  
   312  type blitLinearGradientUniforms struct {
   313  	vert struct {
   314  		blitUniforms
   315  		_ [12]byte // Padding to a multiple of 16.
   316  	}
   317  	frag struct {
   318  		gradientUniforms
   319  	}
   320  }
   321  
   322  type uniformBuffer struct {
   323  	buf driver.Buffer
   324  	ptr []byte
   325  }
   326  
   327  type program struct {
   328  	prog         driver.Program
   329  	vertUniforms *uniformBuffer
   330  	fragUniforms *uniformBuffer
   331  }
   332  
   333  type blitUniforms struct {
   334  	transform     [4]float32
   335  	uvTransformR1 [4]float32
   336  	uvTransformR2 [4]float32
   337  	z             float32
   338  }
   339  
   340  type colorUniforms struct {
   341  	color f32color.RGBA
   342  }
   343  
   344  type gradientUniforms struct {
   345  	color1 f32color.RGBA
   346  	color2 f32color.RGBA
   347  }
   348  
   349  type materialType uint8
   350  
   351  const (
   352  	clipTypeNone clipType = iota
   353  	clipTypePath
   354  	clipTypeIntersection
   355  )
   356  
   357  const (
   358  	materialColor materialType = iota
   359  	materialLinearGradient
   360  	materialTexture
   361  )
   362  
   363  func New(api API) (GPU, error) {
   364  	d, err := driver.NewDevice(api)
   365  	if err != nil {
   366  		return nil, err
   367  	}
   368  	d.BeginFrame(false, image.Point{})
   369  	defer d.EndFrame()
   370  	forceCompute := os.Getenv("GIORENDERER") == "forcecompute"
   371  	feats := d.Caps().Features
   372  	switch {
   373  	case !forceCompute && feats.Has(driver.FeatureFloatRenderTargets):
   374  		return newGPU(d)
   375  	case feats.Has(driver.FeatureCompute):
   376  		return newCompute(d)
   377  	default:
   378  		return nil, errors.New("gpu: no support for float render targets nor compute")
   379  	}
   380  }
   381  
   382  func newGPU(ctx driver.Device) (*gpu, error) {
   383  	g := &gpu{
   384  		cache: newResourceCache(),
   385  	}
   386  	g.drawOps.pathCache = newOpCache()
   387  	if err := g.init(ctx); err != nil {
   388  		return nil, err
   389  	}
   390  	return g, nil
   391  }
   392  
   393  func (g *gpu) init(ctx driver.Device) error {
   394  	g.ctx = ctx
   395  	g.renderer = newRenderer(ctx)
   396  	return nil
   397  }
   398  
   399  func (g *gpu) Clear(col color.NRGBA) {
   400  	g.drawOps.clear = true
   401  	g.drawOps.clearColor = f32color.LinearFromSRGB(col)
   402  }
   403  
   404  func (g *gpu) Release() {
   405  	g.renderer.release()
   406  	g.drawOps.pathCache.release()
   407  	g.cache.release()
   408  	if g.timers != nil {
   409  		g.timers.release()
   410  	}
   411  	g.ctx.Release()
   412  }
   413  
   414  func (g *gpu) Collect(viewport image.Point, frameOps *op.Ops) {
   415  	g.renderer.blitter.viewport = viewport
   416  	g.renderer.pather.viewport = viewport
   417  	g.drawOps.reset(g.cache, viewport)
   418  	g.drawOps.collect(g.ctx, g.cache, frameOps, viewport)
   419  	g.frameStart = time.Now()
   420  	if g.drawOps.profile && g.timers == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) {
   421  		g.timers = newTimers(g.ctx)
   422  		g.zopsTimer = g.timers.newTimer()
   423  		g.stencilTimer = g.timers.newTimer()
   424  		g.coverTimer = g.timers.newTimer()
   425  		g.cleanupTimer = g.timers.newTimer()
   426  	}
   427  }
   428  
   429  func (g *gpu) Frame() error {
   430  	viewport := g.renderer.blitter.viewport
   431  	defFBO := g.ctx.BeginFrame(g.drawOps.clear, viewport)
   432  	defer g.ctx.EndFrame()
   433  	for _, img := range g.drawOps.imageOps {
   434  		expandPathOp(img.path, img.clip)
   435  	}
   436  	if g.drawOps.profile {
   437  		g.zopsTimer.begin()
   438  	}
   439  	g.ctx.BindFramebuffer(defFBO)
   440  	g.ctx.DepthFunc(driver.DepthFuncGreater)
   441  	// Note that Clear must be before ClearDepth if nothing else is rendered
   442  	// (len(zimageOps) == 0). If not, the Fairphone 2 will corrupt the depth buffer.
   443  	if g.drawOps.clear {
   444  		g.drawOps.clear = false
   445  		g.ctx.Clear(g.drawOps.clearColor.Float32())
   446  	}
   447  	g.ctx.ClearDepth(0.0)
   448  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
   449  	g.renderer.drawZOps(g.cache, g.drawOps.zimageOps)
   450  	g.zopsTimer.end()
   451  	g.stencilTimer.begin()
   452  	g.ctx.SetBlend(true)
   453  	g.renderer.packStencils(&g.drawOps.pathOps)
   454  	g.renderer.stencilClips(g.drawOps.pathCache, g.drawOps.pathOps)
   455  	g.renderer.packIntersections(g.drawOps.imageOps)
   456  	g.renderer.intersect(g.drawOps.imageOps)
   457  	g.stencilTimer.end()
   458  	g.coverTimer.begin()
   459  	g.ctx.BindFramebuffer(defFBO)
   460  	g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
   461  	g.renderer.drawOps(g.cache, g.drawOps.imageOps)
   462  	g.ctx.SetBlend(false)
   463  	g.renderer.pather.stenciler.invalidateFBO()
   464  	g.coverTimer.end()
   465  	g.ctx.BindFramebuffer(defFBO)
   466  	g.cleanupTimer.begin()
   467  	g.cache.frame()
   468  	g.drawOps.pathCache.frame()
   469  	g.cleanupTimer.end()
   470  	if g.drawOps.profile && g.timers.ready() {
   471  		zt, st, covt, cleant := g.zopsTimer.Elapsed, g.stencilTimer.Elapsed, g.coverTimer.Elapsed, g.cleanupTimer.Elapsed
   472  		ft := zt + st + covt + cleant
   473  		q := 100 * time.Microsecond
   474  		zt, st, covt = zt.Round(q), st.Round(q), covt.Round(q)
   475  		frameDur := time.Since(g.frameStart).Round(q)
   476  		ft = ft.Round(q)
   477  		g.profile = fmt.Sprintf("draw:%7s gpu:%7s zt:%7s st:%7s cov:%7s", frameDur, ft, zt, st, covt)
   478  	}
   479  	return nil
   480  }
   481  
   482  func (g *gpu) Profile() string {
   483  	return g.profile
   484  }
   485  
   486  func (r *renderer) texHandle(cache *resourceCache, data imageOpData) driver.Texture {
   487  	var tex *texture
   488  	t, exists := cache.get(data.handle)
   489  	if !exists {
   490  		t = &texture{
   491  			src: data.src,
   492  		}
   493  		cache.put(data.handle, t)
   494  	}
   495  	tex = t.(*texture)
   496  	if tex.tex != nil {
   497  		return tex.tex
   498  	}
   499  	handle, err := r.ctx.NewTexture(driver.TextureFormatSRGB, data.src.Bounds().Dx(), data.src.Bounds().Dy(), driver.FilterLinear, driver.FilterLinear, driver.BufferBindingTexture)
   500  	if err != nil {
   501  		panic(err)
   502  	}
   503  	driver.UploadImage(handle, image.Pt(0, 0), data.src)
   504  	tex.tex = handle
   505  	return tex.tex
   506  }
   507  
   508  func (t *texture) release() {
   509  	if t.tex != nil {
   510  		t.tex.Release()
   511  	}
   512  }
   513  
   514  func newRenderer(ctx driver.Device) *renderer {
   515  	r := &renderer{
   516  		ctx:     ctx,
   517  		blitter: newBlitter(ctx),
   518  		pather:  newPather(ctx),
   519  	}
   520  
   521  	maxDim := ctx.Caps().MaxTextureSize
   522  	// Large atlas textures cause artifacts due to precision loss in
   523  	// shaders.
   524  	if cap := 8192; maxDim > cap {
   525  		maxDim = cap
   526  	}
   527  
   528  	r.packer.maxDim = maxDim
   529  	r.intersections.maxDim = maxDim
   530  	return r
   531  }
   532  
   533  func (r *renderer) release() {
   534  	r.pather.release()
   535  	r.blitter.release()
   536  }
   537  
   538  func newBlitter(ctx driver.Device) *blitter {
   539  	quadVerts, err := ctx.NewImmutableBuffer(driver.BufferBindingVertices,
   540  		byteslice.Slice([]float32{
   541  			-1, +1, 0, 0,
   542  			+1, +1, 1, 0,
   543  			-1, -1, 0, 1,
   544  			+1, -1, 1, 1,
   545  		}),
   546  	)
   547  	if err != nil {
   548  		panic(err)
   549  	}
   550  	b := &blitter{
   551  		ctx:       ctx,
   552  		quadVerts: quadVerts,
   553  	}
   554  	b.colUniforms = new(blitColUniforms)
   555  	b.texUniforms = new(blitTexUniforms)
   556  	b.linearGradientUniforms = new(blitLinearGradientUniforms)
   557  	prog, layout, err := createColorPrograms(ctx, shader_blit_vert, shader_blit_frag,
   558  		[3]interface{}{&b.colUniforms.vert, &b.linearGradientUniforms.vert, &b.texUniforms.vert},
   559  		[3]interface{}{&b.colUniforms.frag, &b.linearGradientUniforms.frag, nil},
   560  	)
   561  	if err != nil {
   562  		panic(err)
   563  	}
   564  	b.prog = prog
   565  	b.layout = layout
   566  	return b
   567  }
   568  
   569  func (b *blitter) release() {
   570  	b.quadVerts.Release()
   571  	for _, p := range b.prog {
   572  		p.Release()
   573  	}
   574  	b.layout.Release()
   575  }
   576  
   577  func createColorPrograms(b driver.Device, vsSrc driver.ShaderSources, fsSrc [3]driver.ShaderSources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) {
   578  	var progs [3]*program
   579  	{
   580  		prog, err := b.NewProgram(vsSrc, fsSrc[materialTexture])
   581  		if err != nil {
   582  			return progs, nil, err
   583  		}
   584  		var vertBuffer, fragBuffer *uniformBuffer
   585  		if u := vertUniforms[materialTexture]; u != nil {
   586  			vertBuffer = newUniformBuffer(b, u)
   587  			prog.SetVertexUniforms(vertBuffer.buf)
   588  		}
   589  		if u := fragUniforms[materialTexture]; u != nil {
   590  			fragBuffer = newUniformBuffer(b, u)
   591  			prog.SetFragmentUniforms(fragBuffer.buf)
   592  		}
   593  		progs[materialTexture] = newProgram(prog, vertBuffer, fragBuffer)
   594  	}
   595  	{
   596  		var vertBuffer, fragBuffer *uniformBuffer
   597  		prog, err := b.NewProgram(vsSrc, fsSrc[materialColor])
   598  		if err != nil {
   599  			progs[materialTexture].Release()
   600  			return progs, nil, err
   601  		}
   602  		if u := vertUniforms[materialColor]; u != nil {
   603  			vertBuffer = newUniformBuffer(b, u)
   604  			prog.SetVertexUniforms(vertBuffer.buf)
   605  		}
   606  		if u := fragUniforms[materialColor]; u != nil {
   607  			fragBuffer = newUniformBuffer(b, u)
   608  			prog.SetFragmentUniforms(fragBuffer.buf)
   609  		}
   610  		progs[materialColor] = newProgram(prog, vertBuffer, fragBuffer)
   611  	}
   612  	{
   613  		var vertBuffer, fragBuffer *uniformBuffer
   614  		prog, err := b.NewProgram(vsSrc, fsSrc[materialLinearGradient])
   615  		if err != nil {
   616  			progs[materialTexture].Release()
   617  			progs[materialColor].Release()
   618  			return progs, nil, err
   619  		}
   620  		if u := vertUniforms[materialLinearGradient]; u != nil {
   621  			vertBuffer = newUniformBuffer(b, u)
   622  			prog.SetVertexUniforms(vertBuffer.buf)
   623  		}
   624  		if u := fragUniforms[materialLinearGradient]; u != nil {
   625  			fragBuffer = newUniformBuffer(b, u)
   626  			prog.SetFragmentUniforms(fragBuffer.buf)
   627  		}
   628  		progs[materialLinearGradient] = newProgram(prog, vertBuffer, fragBuffer)
   629  	}
   630  	layout, err := b.NewInputLayout(vsSrc, []driver.InputDesc{
   631  		{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
   632  		{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
   633  	})
   634  	if err != nil {
   635  		progs[materialTexture].Release()
   636  		progs[materialColor].Release()
   637  		progs[materialLinearGradient].Release()
   638  		return progs, nil, err
   639  	}
   640  	return progs, layout, nil
   641  }
   642  
   643  func (r *renderer) stencilClips(pathCache *opCache, ops []*pathOp) {
   644  	if len(r.packer.sizes) == 0 {
   645  		return
   646  	}
   647  	fbo := -1
   648  	r.pather.begin(r.packer.sizes)
   649  	for _, p := range ops {
   650  		if fbo != p.place.Idx {
   651  			fbo = p.place.Idx
   652  			f := r.pather.stenciler.cover(fbo)
   653  			r.ctx.BindFramebuffer(f.fbo)
   654  			r.ctx.Clear(0.0, 0.0, 0.0, 0.0)
   655  		}
   656  		v, _ := pathCache.get(p.pathKey)
   657  		r.pather.stencilPath(p.clip, p.off, p.place.Pos, v.data)
   658  	}
   659  }
   660  
   661  func (r *renderer) intersect(ops []imageOp) {
   662  	if len(r.intersections.sizes) == 0 {
   663  		return
   664  	}
   665  	fbo := -1
   666  	r.pather.stenciler.beginIntersect(r.intersections.sizes)
   667  	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
   668  	r.ctx.BindInputLayout(r.pather.stenciler.iprog.layout)
   669  	for _, img := range ops {
   670  		if img.clipType != clipTypeIntersection {
   671  			continue
   672  		}
   673  		if fbo != img.place.Idx {
   674  			fbo = img.place.Idx
   675  			f := r.pather.stenciler.intersections.fbos[fbo]
   676  			r.ctx.BindFramebuffer(f.fbo)
   677  			r.ctx.Clear(1.0, 0.0, 0.0, 0.0)
   678  		}
   679  		r.ctx.Viewport(img.place.Pos.X, img.place.Pos.Y, img.clip.Dx(), img.clip.Dy())
   680  		r.intersectPath(img.path, img.clip)
   681  	}
   682  }
   683  
   684  func (r *renderer) intersectPath(p *pathOp, clip image.Rectangle) {
   685  	if p.parent != nil {
   686  		r.intersectPath(p.parent, clip)
   687  	}
   688  	if !p.path {
   689  		return
   690  	}
   691  	uv := image.Rectangle{
   692  		Min: p.place.Pos,
   693  		Max: p.place.Pos.Add(p.clip.Size()),
   694  	}
   695  	o := clip.Min.Sub(p.clip.Min)
   696  	sub := image.Rectangle{
   697  		Min: o,
   698  		Max: o.Add(clip.Size()),
   699  	}
   700  	fbo := r.pather.stenciler.cover(p.place.Idx)
   701  	r.ctx.BindTexture(0, fbo.tex)
   702  	coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size)
   703  	subScale, subOff := texSpaceTransform(layout.FRect(sub), p.clip.Size())
   704  	r.pather.stenciler.iprog.uniforms.vert.uvTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y}
   705  	r.pather.stenciler.iprog.uniforms.vert.subUVTransform = [4]float32{subScale.X, subScale.Y, subOff.X, subOff.Y}
   706  	r.pather.stenciler.iprog.prog.UploadUniforms()
   707  	r.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4)
   708  }
   709  
   710  func (r *renderer) packIntersections(ops []imageOp) {
   711  	r.intersections.clear()
   712  	for i, img := range ops {
   713  		var npaths int
   714  		var onePath *pathOp
   715  		for p := img.path; p != nil; p = p.parent {
   716  			if p.path {
   717  				onePath = p
   718  				npaths++
   719  			}
   720  		}
   721  		switch npaths {
   722  		case 0:
   723  		case 1:
   724  			place := onePath.place
   725  			place.Pos = place.Pos.Sub(onePath.clip.Min).Add(img.clip.Min)
   726  			ops[i].place = place
   727  			ops[i].clipType = clipTypePath
   728  		default:
   729  			sz := image.Point{X: img.clip.Dx(), Y: img.clip.Dy()}
   730  			place, ok := r.intersections.add(sz)
   731  			if !ok {
   732  				panic("internal error: if the intersection fit, the intersection should fit as well")
   733  			}
   734  			ops[i].clipType = clipTypeIntersection
   735  			ops[i].place = place
   736  		}
   737  	}
   738  }
   739  
   740  func (r *renderer) packStencils(pops *[]*pathOp) {
   741  	r.packer.clear()
   742  	ops := *pops
   743  	// Allocate atlas space for cover textures.
   744  	var i int
   745  	for i < len(ops) {
   746  		p := ops[i]
   747  		if p.clip.Empty() {
   748  			ops[i] = ops[len(ops)-1]
   749  			ops = ops[:len(ops)-1]
   750  			continue
   751  		}
   752  		sz := image.Point{X: p.clip.Dx(), Y: p.clip.Dy()}
   753  		place, ok := r.packer.add(sz)
   754  		if !ok {
   755  			// The clip area is at most the entire screen. Hopefully no
   756  			// screen is larger than GL_MAX_TEXTURE_SIZE.
   757  			panic(fmt.Errorf("clip area %v is larger than maximum texture size %dx%d", p.clip, r.packer.maxDim, r.packer.maxDim))
   758  		}
   759  		p.place = place
   760  		i++
   761  	}
   762  	*pops = ops
   763  }
   764  
   765  // boundRectF returns a bounding image.Rectangle for a f32.Rectangle.
   766  func boundRectF(r f32.Rectangle) image.Rectangle {
   767  	return image.Rectangle{
   768  		Min: image.Point{
   769  			X: int(floor(r.Min.X)),
   770  			Y: int(floor(r.Min.Y)),
   771  		},
   772  		Max: image.Point{
   773  			X: int(ceil(r.Max.X)),
   774  			Y: int(ceil(r.Max.Y)),
   775  		},
   776  	}
   777  }
   778  
   779  func ceil(v float32) int {
   780  	return int(math.Ceil(float64(v)))
   781  }
   782  
   783  func floor(v float32) int {
   784  	return int(math.Floor(float64(v)))
   785  }
   786  
   787  func (d *drawOps) reset(cache *resourceCache, viewport image.Point) {
   788  	d.profile = false
   789  	d.cache = cache
   790  	d.viewport = viewport
   791  	d.imageOps = d.imageOps[:0]
   792  	d.allImageOps = d.allImageOps[:0]
   793  	d.zimageOps = d.zimageOps[:0]
   794  	d.pathOps = d.pathOps[:0]
   795  	d.pathOpCache = d.pathOpCache[:0]
   796  	d.vertCache = d.vertCache[:0]
   797  }
   798  
   799  func (d *drawOps) collect(ctx driver.Device, cache *resourceCache, root *op.Ops, viewport image.Point) {
   800  	clip := f32.Rectangle{
   801  		Max: f32.Point{X: float32(viewport.X), Y: float32(viewport.Y)},
   802  	}
   803  	d.reader.Reset(root)
   804  	state := drawState{
   805  		clip:  clip,
   806  		rect:  true,
   807  		color: color.NRGBA{A: 0xff},
   808  	}
   809  	d.collectOps(&d.reader, state)
   810  	for _, p := range d.pathOps {
   811  		if v, exists := d.pathCache.get(p.pathKey); !exists || v.data.data == nil {
   812  			data := buildPath(ctx, p.pathVerts)
   813  			var computePath encoder
   814  			if d.compute {
   815  				computePath = encodePath(p.pathVerts)
   816  			}
   817  			d.pathCache.put(p.pathKey, opCacheValue{
   818  				data:        data,
   819  				bounds:      p.bounds,
   820  				computePath: computePath,
   821  			})
   822  		}
   823  		p.pathVerts = nil
   824  	}
   825  }
   826  
   827  func (d *drawOps) newPathOp() *pathOp {
   828  	d.pathOpCache = append(d.pathOpCache, pathOp{})
   829  	return &d.pathOpCache[len(d.pathOpCache)-1]
   830  }
   831  
   832  func (d *drawOps) addClipPath(state *drawState, aux []byte, auxKey ops.Key, bounds f32.Rectangle, off f32.Point, tr f32.Affine2D, stroke clip.StrokeStyle) {
   833  	npath := d.newPathOp()
   834  	*npath = pathOp{
   835  		parent: state.cpath,
   836  		bounds: bounds,
   837  		off:    off,
   838  		trans:  tr,
   839  		stroke: stroke,
   840  	}
   841  	state.cpath = npath
   842  	if len(aux) > 0 {
   843  		state.rect = false
   844  		state.cpath.pathKey = auxKey
   845  		state.cpath.path = true
   846  		state.cpath.pathVerts = aux
   847  		d.pathOps = append(d.pathOps, state.cpath)
   848  	}
   849  }
   850  
   851  // split a transform into two parts, one which is pur offset and the
   852  // other representing the scaling, shearing and rotation part
   853  func splitTransform(t f32.Affine2D) (srs f32.Affine2D, offset f32.Point) {
   854  	sx, hx, ox, hy, sy, oy := t.Elems()
   855  	offset = f32.Point{X: ox, Y: oy}
   856  	srs = f32.NewAffine2D(sx, hx, 0, hy, sy, 0)
   857  	return
   858  }
   859  
   860  func (d *drawOps) save(id int, state drawState) {
   861  	if extra := id - len(d.states) + 1; extra > 0 {
   862  		d.states = append(d.states, make([]drawState, extra)...)
   863  	}
   864  	d.states[id] = state
   865  }
   866  
   867  func (d *drawOps) collectOps(r *ops.Reader, state drawState) {
   868  	var (
   869  		quads quadsOp
   870  		str   clip.StrokeStyle
   871  		z     int
   872  	)
   873  	d.save(opconst.InitialStateID, state)
   874  loop:
   875  	for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() {
   876  		switch opconst.OpType(encOp.Data[0]) {
   877  		case opconst.TypeProfile:
   878  			d.profile = true
   879  		case opconst.TypeTransform:
   880  			dop := ops.DecodeTransform(encOp.Data)
   881  			state.t = state.t.Mul(dop)
   882  
   883  		case opconst.TypeStroke:
   884  			str = decodeStrokeOp(encOp.Data)
   885  
   886  		case opconst.TypePath:
   887  			encOp, ok = r.Decode()
   888  			if !ok {
   889  				break loop
   890  			}
   891  			quads.aux = encOp.Data[opconst.TypeAuxLen:]
   892  			quads.key = encOp.Key
   893  
   894  		case opconst.TypeClip:
   895  			var op clipOp
   896  			op.decode(encOp.Data)
   897  			bounds := op.bounds
   898  			trans, off := splitTransform(state.t)
   899  			if len(quads.aux) > 0 {
   900  				// There is a clipping path, build the gpu data and update the
   901  				// cache key such that it will be equal only if the transform is the
   902  				// same also. Use cached data if we have it.
   903  				quads.key = quads.key.SetTransform(trans)
   904  				if v, ok := d.pathCache.get(quads.key); ok {
   905  					// Since the GPU data exists in the cache aux will not be used.
   906  					// Why is this not used for the offset shapes?
   907  					op.bounds = v.bounds
   908  				} else {
   909  					pathData, bounds := d.buildVerts(
   910  						quads.aux, trans, op.outline, str,
   911  					)
   912  					op.bounds = bounds
   913  					if !d.compute {
   914  						quads.aux = pathData
   915  					}
   916  					// add it to the cache, without GPU data, so the transform can be
   917  					// reused.
   918  					d.pathCache.put(quads.key, opCacheValue{bounds: op.bounds})
   919  				}
   920  			} else {
   921  				quads.aux, op.bounds, _ = d.boundsForTransformedRect(bounds, trans)
   922  				quads.key = encOp.Key
   923  				quads.key.SetTransform(trans)
   924  			}
   925  			state.clip = state.clip.Intersect(op.bounds.Add(off))
   926  			d.addClipPath(&state, quads.aux, quads.key, op.bounds, off, state.t, str)
   927  			quads = quadsOp{}
   928  			str = clip.StrokeStyle{}
   929  
   930  		case opconst.TypeColor:
   931  			state.matType = materialColor
   932  			state.color = decodeColorOp(encOp.Data)
   933  		case opconst.TypeLinearGradient:
   934  			state.matType = materialLinearGradient
   935  			op := decodeLinearGradientOp(encOp.Data)
   936  			state.stop1 = op.stop1
   937  			state.stop2 = op.stop2
   938  			state.color1 = op.color1
   939  			state.color2 = op.color2
   940  		case opconst.TypeImage:
   941  			state.matType = materialTexture
   942  			state.image = decodeImageOp(encOp.Data, encOp.Refs)
   943  		case opconst.TypePaint:
   944  			// Transform (if needed) the painting rectangle and if so generate a clip path,
   945  			// for those cases also compute a partialTrans that maps texture coordinates between
   946  			// the new bounding rectangle and the transformed original paint rectangle.
   947  			trans, off := splitTransform(state.t)
   948  			// Fill the clip area, unless the material is a (bounded) image.
   949  			// TODO: Find a tighter bound.
   950  			inf := float32(1e6)
   951  			dst := f32.Rect(-inf, -inf, inf, inf)
   952  			if state.matType == materialTexture {
   953  				dst = layout.FRect(state.image.src.Rect)
   954  			}
   955  			clipData, bnd, partialTrans := d.boundsForTransformedRect(dst, trans)
   956  			cl := state.clip.Intersect(bnd.Add(off))
   957  			if cl.Empty() {
   958  				continue
   959  			}
   960  
   961  			wasrect := state.rect
   962  			if clipData != nil {
   963  				// The paint operation is sheared or rotated, add a clip path representing
   964  				// this transformed rectangle.
   965  				encOp.Key.SetTransform(trans)
   966  				d.addClipPath(&state, clipData, encOp.Key, bnd, off, state.t, clip.StrokeStyle{})
   967  			}
   968  
   969  			bounds := boundRectF(cl)
   970  			mat := state.materialFor(bnd, off, partialTrans, bounds, state.t)
   971  
   972  			if bounds.Min == (image.Point{}) && bounds.Max == d.viewport && state.rect && mat.opaque && (mat.material == materialColor) {
   973  				// The image is a uniform opaque color and takes up the whole screen.
   974  				// Scrap images up to and including this image and set clear color.
   975  				d.allImageOps = d.allImageOps[:0]
   976  				d.zimageOps = d.zimageOps[:0]
   977  				d.imageOps = d.imageOps[:0]
   978  				z = 0
   979  				d.clearColor = mat.color.Opaque()
   980  				d.clear = true
   981  				continue
   982  			}
   983  			z++
   984  			if z != int(uint16(z)) {
   985  				// TODO(eliasnaur) github.com/cybriq/giocore/issue/127.
   986  				panic("more than 65k paint objects not supported")
   987  			}
   988  			// Assume 16-bit depth buffer.
   989  			const zdepth = 1 << 16
   990  			// Convert z to window-space, assuming depth range [0;1].
   991  			zf := float32(z)*2/zdepth - 1.0
   992  			img := imageOp{
   993  				z:        zf,
   994  				path:     state.cpath,
   995  				clip:     bounds,
   996  				material: mat,
   997  			}
   998  
   999  			d.allImageOps = append(d.allImageOps, img)
  1000  			if state.rect && img.material.opaque {
  1001  				d.zimageOps = append(d.zimageOps, img)
  1002  			} else {
  1003  				d.imageOps = append(d.imageOps, img)
  1004  			}
  1005  			if clipData != nil {
  1006  				// we added a clip path that should not remain
  1007  				state.cpath = state.cpath.parent
  1008  				state.rect = wasrect
  1009  			}
  1010  		case opconst.TypeSave:
  1011  			id := ops.DecodeSave(encOp.Data)
  1012  			d.save(id, state)
  1013  		case opconst.TypeLoad:
  1014  			id, mask := ops.DecodeLoad(encOp.Data)
  1015  			s := d.states[id]
  1016  			if mask&opconst.TransformState != 0 {
  1017  				state.t = s.t
  1018  			}
  1019  			if mask&^opconst.TransformState != 0 {
  1020  				state = s
  1021  			}
  1022  		}
  1023  	}
  1024  }
  1025  
  1026  func expandPathOp(p *pathOp, clip image.Rectangle) {
  1027  	for p != nil {
  1028  		pclip := p.clip
  1029  		if !pclip.Empty() {
  1030  			clip = clip.Union(pclip)
  1031  		}
  1032  		p.clip = clip
  1033  		p = p.parent
  1034  	}
  1035  }
  1036  
  1037  func (d *drawState) materialFor(rect f32.Rectangle, off f32.Point, partTrans f32.Affine2D, clip image.Rectangle, trans f32.Affine2D) material {
  1038  	var m material
  1039  	switch d.matType {
  1040  	case materialColor:
  1041  		m.material = materialColor
  1042  		m.color = f32color.LinearFromSRGB(d.color)
  1043  		m.opaque = m.color.A == 1.0
  1044  	case materialLinearGradient:
  1045  		m.material = materialLinearGradient
  1046  
  1047  		m.color1 = f32color.LinearFromSRGB(d.color1)
  1048  		m.color2 = f32color.LinearFromSRGB(d.color2)
  1049  		m.opaque = m.color1.A == 1.0 && m.color2.A == 1.0
  1050  
  1051  		m.uvTrans = partTrans.Mul(gradientSpaceTransform(clip, off, d.stop1, d.stop2))
  1052  	case materialTexture:
  1053  		m.material = materialTexture
  1054  		dr := boundRectF(rect.Add(off))
  1055  		sz := d.image.src.Bounds().Size()
  1056  		sr := f32.Rectangle{
  1057  			Max: f32.Point{
  1058  				X: float32(sz.X),
  1059  				Y: float32(sz.Y),
  1060  			},
  1061  		}
  1062  		dx := float32(dr.Dx())
  1063  		sdx := sr.Dx()
  1064  		sr.Min.X += float32(clip.Min.X-dr.Min.X) * sdx / dx
  1065  		sr.Max.X -= float32(dr.Max.X-clip.Max.X) * sdx / dx
  1066  		dy := float32(dr.Dy())
  1067  		sdy := sr.Dy()
  1068  		sr.Min.Y += float32(clip.Min.Y-dr.Min.Y) * sdy / dy
  1069  		sr.Max.Y -= float32(dr.Max.Y-clip.Max.Y) * sdy / dy
  1070  		uvScale, uvOffset := texSpaceTransform(sr, sz)
  1071  		m.uvTrans = partTrans.Mul(f32.Affine2D{}.Scale(f32.Point{}, uvScale).Offset(uvOffset))
  1072  		m.trans = trans
  1073  		m.data = d.image
  1074  	}
  1075  	return m
  1076  }
  1077  
  1078  func (r *renderer) drawZOps(cache *resourceCache, ops []imageOp) {
  1079  	r.ctx.SetDepthTest(true)
  1080  	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
  1081  	r.ctx.BindInputLayout(r.blitter.layout)
  1082  	// Render front to back.
  1083  	for i := len(ops) - 1; i >= 0; i-- {
  1084  		img := ops[i]
  1085  		m := img.material
  1086  		switch m.material {
  1087  		case materialTexture:
  1088  			r.ctx.BindTexture(0, r.texHandle(cache, m.data))
  1089  		}
  1090  		drc := img.clip
  1091  		scale, off := clipSpaceTransform(drc, r.blitter.viewport)
  1092  		r.blitter.blit(img.z, m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans)
  1093  	}
  1094  	r.ctx.SetDepthTest(false)
  1095  }
  1096  
  1097  func (r *renderer) drawOps(cache *resourceCache, ops []imageOp) {
  1098  	r.ctx.SetDepthTest(true)
  1099  	r.ctx.DepthMask(false)
  1100  	r.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha)
  1101  	r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0)
  1102  	r.ctx.BindInputLayout(r.pather.coverer.layout)
  1103  	var coverTex driver.Texture
  1104  	for _, img := range ops {
  1105  		m := img.material
  1106  		switch m.material {
  1107  		case materialTexture:
  1108  			r.ctx.BindTexture(0, r.texHandle(cache, m.data))
  1109  		}
  1110  		drc := img.clip
  1111  
  1112  		scale, off := clipSpaceTransform(drc, r.blitter.viewport)
  1113  		var fbo stencilFBO
  1114  		switch img.clipType {
  1115  		case clipTypeNone:
  1116  			r.blitter.blit(img.z, m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans)
  1117  			continue
  1118  		case clipTypePath:
  1119  			fbo = r.pather.stenciler.cover(img.place.Idx)
  1120  		case clipTypeIntersection:
  1121  			fbo = r.pather.stenciler.intersections.fbos[img.place.Idx]
  1122  		}
  1123  		if coverTex != fbo.tex {
  1124  			coverTex = fbo.tex
  1125  			r.ctx.BindTexture(1, coverTex)
  1126  		}
  1127  		uv := image.Rectangle{
  1128  			Min: img.place.Pos,
  1129  			Max: img.place.Pos.Add(drc.Size()),
  1130  		}
  1131  		coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size)
  1132  		r.pather.cover(img.z, m.material, m.color, m.color1, m.color2, scale, off, m.uvTrans, coverScale, coverOff)
  1133  	}
  1134  	r.ctx.DepthMask(true)
  1135  	r.ctx.SetDepthTest(false)
  1136  }
  1137  
  1138  func (b *blitter) blit(z float32, mat materialType, col f32color.RGBA, col1, col2 f32color.RGBA, scale, off f32.Point, uvTrans f32.Affine2D) {
  1139  	p := b.prog[mat]
  1140  	b.ctx.BindProgram(p.prog)
  1141  	var uniforms *blitUniforms
  1142  	switch mat {
  1143  	case materialColor:
  1144  		b.colUniforms.frag.color = col
  1145  		uniforms = &b.colUniforms.vert.blitUniforms
  1146  	case materialTexture:
  1147  		t1, t2, t3, t4, t5, t6 := uvTrans.Elems()
  1148  		b.texUniforms.vert.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
  1149  		b.texUniforms.vert.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
  1150  		uniforms = &b.texUniforms.vert.blitUniforms
  1151  	case materialLinearGradient:
  1152  		b.linearGradientUniforms.frag.color1 = col1
  1153  		b.linearGradientUniforms.frag.color2 = col2
  1154  
  1155  		t1, t2, t3, t4, t5, t6 := uvTrans.Elems()
  1156  		b.linearGradientUniforms.vert.blitUniforms.uvTransformR1 = [4]float32{t1, t2, t3, 0}
  1157  		b.linearGradientUniforms.vert.blitUniforms.uvTransformR2 = [4]float32{t4, t5, t6, 0}
  1158  		uniforms = &b.linearGradientUniforms.vert.blitUniforms
  1159  	}
  1160  	uniforms.z = z
  1161  	uniforms.transform = [4]float32{scale.X, scale.Y, off.X, off.Y}
  1162  	p.UploadUniforms()
  1163  	b.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4)
  1164  }
  1165  
  1166  // newUniformBuffer creates a new GPU uniform buffer backed by the
  1167  // structure uniformBlock points to.
  1168  func newUniformBuffer(b driver.Device, uniformBlock interface{}) *uniformBuffer {
  1169  	ref := reflect.ValueOf(uniformBlock)
  1170  	// Determine the size of the uniforms structure, *uniforms.
  1171  	size := ref.Elem().Type().Size()
  1172  	// Map the uniforms structure as a byte slice.
  1173  	ptr := (*[1 << 30]byte)(unsafe.Pointer(ref.Pointer()))[:size:size]
  1174  	ubuf, err := b.NewBuffer(driver.BufferBindingUniforms, len(ptr))
  1175  	if err != nil {
  1176  		panic(err)
  1177  	}
  1178  	return &uniformBuffer{buf: ubuf, ptr: ptr}
  1179  }
  1180  
  1181  func (u *uniformBuffer) Upload() {
  1182  	u.buf.Upload(u.ptr)
  1183  }
  1184  
  1185  func (u *uniformBuffer) Release() {
  1186  	u.buf.Release()
  1187  	u.buf = nil
  1188  }
  1189  
  1190  func newProgram(prog driver.Program, vertUniforms, fragUniforms *uniformBuffer) *program {
  1191  	if vertUniforms != nil {
  1192  		prog.SetVertexUniforms(vertUniforms.buf)
  1193  	}
  1194  	if fragUniforms != nil {
  1195  		prog.SetFragmentUniforms(fragUniforms.buf)
  1196  	}
  1197  	return &program{prog: prog, vertUniforms: vertUniforms, fragUniforms: fragUniforms}
  1198  }
  1199  
  1200  func (p *program) UploadUniforms() {
  1201  	if p.vertUniforms != nil {
  1202  		p.vertUniforms.Upload()
  1203  	}
  1204  	if p.fragUniforms != nil {
  1205  		p.fragUniforms.Upload()
  1206  	}
  1207  }
  1208  
  1209  func (p *program) Release() {
  1210  	p.prog.Release()
  1211  	p.prog = nil
  1212  	if p.vertUniforms != nil {
  1213  		p.vertUniforms.Release()
  1214  		p.vertUniforms = nil
  1215  	}
  1216  	if p.fragUniforms != nil {
  1217  		p.fragUniforms.Release()
  1218  		p.fragUniforms = nil
  1219  	}
  1220  }
  1221  
  1222  // texSpaceTransform return the scale and offset that transforms the given subimage
  1223  // into quad texture coordinates.
  1224  func texSpaceTransform(r f32.Rectangle, bounds image.Point) (f32.Point, f32.Point) {
  1225  	size := f32.Point{X: float32(bounds.X), Y: float32(bounds.Y)}
  1226  	scale := f32.Point{X: r.Dx() / size.X, Y: r.Dy() / size.Y}
  1227  	offset := f32.Point{X: r.Min.X / size.X, Y: r.Min.Y / size.Y}
  1228  	return scale, offset
  1229  }
  1230  
  1231  // gradientSpaceTransform transforms stop1 and stop2 to [(0,0), (1,1)].
  1232  func gradientSpaceTransform(clip image.Rectangle, off f32.Point, stop1, stop2 f32.Point) f32.Affine2D {
  1233  	d := stop2.Sub(stop1)
  1234  	l := float32(math.Sqrt(float64(d.X*d.X + d.Y*d.Y)))
  1235  	a := float32(math.Atan2(float64(-d.Y), float64(d.X)))
  1236  
  1237  	// TODO: optimize
  1238  	zp := f32.Point{}
  1239  	return f32.Affine2D{}.
  1240  		Scale(zp, layout.FPt(clip.Size())).            // scale to pixel space
  1241  		Offset(zp.Sub(off).Add(layout.FPt(clip.Min))). // offset to clip space
  1242  		Offset(zp.Sub(stop1)).                         // offset to first stop point
  1243  		Rotate(zp, a).                                 // rotate to align gradient
  1244  		Scale(zp, f32.Pt(1/l, 1/l))                    // scale gradient to right size
  1245  }
  1246  
  1247  // clipSpaceTransform returns the scale and offset that transforms the given
  1248  // rectangle from a viewport into OpenGL clip space.
  1249  func clipSpaceTransform(r image.Rectangle, viewport image.Point) (f32.Point, f32.Point) {
  1250  	// First, transform UI coordinates to OpenGL coordinates:
  1251  	//
  1252  	//	[(-1, +1) (+1, +1)]
  1253  	//	[(-1, -1) (+1, -1)]
  1254  	//
  1255  	x, y := float32(r.Min.X), float32(r.Min.Y)
  1256  	w, h := float32(r.Dx()), float32(r.Dy())
  1257  	vx, vy := 2/float32(viewport.X), 2/float32(viewport.Y)
  1258  	x = x*vx - 1
  1259  	y = 1 - y*vy
  1260  	w *= vx
  1261  	h *= vy
  1262  
  1263  	// Then, compute the transformation from the fullscreen quad to
  1264  	// the rectangle at (x, y) and dimensions (w, h).
  1265  	scale := f32.Point{X: w * .5, Y: h * .5}
  1266  	offset := f32.Point{X: x + w*.5, Y: y - h*.5}
  1267  
  1268  	return scale, offset
  1269  }
  1270  
  1271  // Fill in maximal Y coordinates of the NW and NE corners.
  1272  func fillMaxY(verts []byte) {
  1273  	contour := 0
  1274  	bo := binary.LittleEndian
  1275  	for len(verts) > 0 {
  1276  		maxy := float32(math.Inf(-1))
  1277  		i := 0
  1278  		for ; i+vertStride*4 <= len(verts); i += vertStride * 4 {
  1279  			vert := verts[i : i+vertStride]
  1280  			// MaxY contains the integer contour index.
  1281  			pathContour := int(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).MaxY)):]))
  1282  			if contour != pathContour {
  1283  				contour = pathContour
  1284  				break
  1285  			}
  1286  			fromy := math.Float32frombits(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).FromY)):]))
  1287  			ctrly := math.Float32frombits(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).CtrlY)):]))
  1288  			toy := math.Float32frombits(bo.Uint32(vert[int(unsafe.Offsetof(((*vertex)(nil)).ToY)):]))
  1289  			if fromy > maxy {
  1290  				maxy = fromy
  1291  			}
  1292  			if ctrly > maxy {
  1293  				maxy = ctrly
  1294  			}
  1295  			if toy > maxy {
  1296  				maxy = toy
  1297  			}
  1298  		}
  1299  		fillContourMaxY(maxy, verts[:i])
  1300  		verts = verts[i:]
  1301  	}
  1302  }
  1303  
  1304  func fillContourMaxY(maxy float32, verts []byte) {
  1305  	bo := binary.LittleEndian
  1306  	for i := 0; i < len(verts); i += vertStride {
  1307  		off := int(unsafe.Offsetof(((*vertex)(nil)).MaxY))
  1308  		bo.PutUint32(verts[i+off:], math.Float32bits(maxy))
  1309  	}
  1310  }
  1311  
  1312  func (d *drawOps) writeVertCache(n int) []byte {
  1313  	d.vertCache = append(d.vertCache, make([]byte, n)...)
  1314  	return d.vertCache[len(d.vertCache)-n:]
  1315  }
  1316  
  1317  // transform, split paths as needed, calculate maxY, bounds and create GPU vertices.
  1318  func (d *drawOps) buildVerts(pathData []byte, tr f32.Affine2D, outline bool, str clip.StrokeStyle) (verts []byte, bounds f32.Rectangle) {
  1319  	inf := float32(math.Inf(+1))
  1320  	d.qs.bounds = f32.Rectangle{
  1321  		Min: f32.Point{X: inf, Y: inf},
  1322  		Max: f32.Point{X: -inf, Y: -inf},
  1323  	}
  1324  	d.qs.d = d
  1325  	startLength := len(d.vertCache)
  1326  
  1327  	switch {
  1328  	case str.Width > 0:
  1329  		// Stroke path.
  1330  		ss := stroke.StrokeStyle{
  1331  			Width: str.Width,
  1332  			Miter: str.Miter,
  1333  			Cap:   stroke.StrokeCap(str.Cap),
  1334  			Join:  stroke.StrokeJoin(str.Join),
  1335  		}
  1336  		quads := stroke.StrokePathCommands(ss, stroke.DashOp{}, pathData)
  1337  		for _, quad := range quads {
  1338  			d.qs.contour = quad.Contour
  1339  			quad.Quad = quad.Quad.Transform(tr)
  1340  
  1341  			d.qs.splitAndEncode(quad.Quad)
  1342  		}
  1343  
  1344  	case outline:
  1345  		decodeToOutlineQuads(&d.qs, tr, pathData)
  1346  	}
  1347  
  1348  	fillMaxY(d.vertCache[startLength:])
  1349  	return d.vertCache[startLength:], d.qs.bounds
  1350  }
  1351  
  1352  // decodeOutlineQuads decodes scene commands, splits them into quadratic béziers
  1353  // as needed and feeds them to the supplied splitter.
  1354  func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) {
  1355  	for len(pathData) >= scene.CommandSize+4 {
  1356  		qs.contour = bo.Uint32(pathData)
  1357  		cmd := ops.DecodeCommand(pathData[4:])
  1358  		switch cmd.Op() {
  1359  		case scene.OpLine:
  1360  			var q stroke.QuadSegment
  1361  			q.From, q.To = scene.DecodeLine(cmd)
  1362  			q.Ctrl = q.From.Add(q.To).Mul(.5)
  1363  			q = q.Transform(tr)
  1364  			qs.splitAndEncode(q)
  1365  		case scene.OpQuad:
  1366  			var q stroke.QuadSegment
  1367  			q.From, q.Ctrl, q.To = scene.DecodeQuad(cmd)
  1368  			q = q.Transform(tr)
  1369  			qs.splitAndEncode(q)
  1370  		case scene.OpCubic:
  1371  			for _, q := range stroke.SplitCubic(scene.DecodeCubic(cmd)) {
  1372  				q = q.Transform(tr)
  1373  				qs.splitAndEncode(q)
  1374  			}
  1375  		default:
  1376  			panic("unsupported scene command")
  1377  		}
  1378  		pathData = pathData[scene.CommandSize+4:]
  1379  	}
  1380  }
  1381  
  1382  // create GPU vertices for transformed r, find the bounds and establish texture transform.
  1383  func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (aux []byte, bnd f32.Rectangle, ptr f32.Affine2D) {
  1384  	if isPureOffset(tr) {
  1385  		// fast-path to allow blitting of pure rectangles
  1386  		_, _, ox, _, _, oy := tr.Elems()
  1387  		off := f32.Pt(ox, oy)
  1388  		bnd.Min = r.Min.Add(off)
  1389  		bnd.Max = r.Max.Add(off)
  1390  		return
  1391  	}
  1392  
  1393  	// transform all corners, find new bounds
  1394  	corners := [4]f32.Point{
  1395  		tr.Transform(r.Min), tr.Transform(f32.Pt(r.Max.X, r.Min.Y)),
  1396  		tr.Transform(r.Max), tr.Transform(f32.Pt(r.Min.X, r.Max.Y)),
  1397  	}
  1398  	bnd.Min = f32.Pt(math.MaxFloat32, math.MaxFloat32)
  1399  	bnd.Max = f32.Pt(-math.MaxFloat32, -math.MaxFloat32)
  1400  	for _, c := range corners {
  1401  		if c.X < bnd.Min.X {
  1402  			bnd.Min.X = c.X
  1403  		}
  1404  		if c.Y < bnd.Min.Y {
  1405  			bnd.Min.Y = c.Y
  1406  		}
  1407  		if c.X > bnd.Max.X {
  1408  			bnd.Max.X = c.X
  1409  		}
  1410  		if c.Y > bnd.Max.Y {
  1411  			bnd.Max.Y = c.Y
  1412  		}
  1413  	}
  1414  
  1415  	// build the GPU vertices
  1416  	l := len(d.vertCache)
  1417  	if !d.compute {
  1418  		d.vertCache = append(d.vertCache, make([]byte, vertStride*4*4)...)
  1419  		aux = d.vertCache[l:]
  1420  		encodeQuadTo(aux, 0, corners[0], corners[0].Add(corners[1]).Mul(0.5), corners[1])
  1421  		encodeQuadTo(aux[vertStride*4:], 0, corners[1], corners[1].Add(corners[2]).Mul(0.5), corners[2])
  1422  		encodeQuadTo(aux[vertStride*4*2:], 0, corners[2], corners[2].Add(corners[3]).Mul(0.5), corners[3])
  1423  		encodeQuadTo(aux[vertStride*4*3:], 0, corners[3], corners[3].Add(corners[0]).Mul(0.5), corners[0])
  1424  		fillMaxY(aux)
  1425  	} else {
  1426  		d.vertCache = append(d.vertCache, make([]byte, (scene.CommandSize+4)*4)...)
  1427  		aux = d.vertCache[l:]
  1428  		buf := aux
  1429  		bo := binary.LittleEndian
  1430  		bo.PutUint32(buf, 0) // Contour
  1431  		ops.EncodeCommand(buf[4:], scene.Line(r.Min, f32.Pt(r.Max.X, r.Min.Y)))
  1432  		buf = buf[4+scene.CommandSize:]
  1433  		bo.PutUint32(buf, 0)
  1434  		ops.EncodeCommand(buf[4:], scene.Line(f32.Pt(r.Max.X, r.Min.Y), r.Max))
  1435  		buf = buf[4+scene.CommandSize:]
  1436  		bo.PutUint32(buf, 0)
  1437  		ops.EncodeCommand(buf[4:], scene.Line(r.Max, f32.Pt(r.Min.X, r.Max.Y)))
  1438  		buf = buf[4+scene.CommandSize:]
  1439  		bo.PutUint32(buf, 0)
  1440  		ops.EncodeCommand(buf[4:], scene.Line(f32.Pt(r.Min.X, r.Max.Y), r.Min))
  1441  	}
  1442  
  1443  	// establish the transform mapping from bounds rectangle to transformed corners
  1444  	var P1, P2, P3 f32.Point
  1445  	P1.X = (corners[1].X - bnd.Min.X) / (bnd.Max.X - bnd.Min.X)
  1446  	P1.Y = (corners[1].Y - bnd.Min.Y) / (bnd.Max.Y - bnd.Min.Y)
  1447  	P2.X = (corners[2].X - bnd.Min.X) / (bnd.Max.X - bnd.Min.X)
  1448  	P2.Y = (corners[2].Y - bnd.Min.Y) / (bnd.Max.Y - bnd.Min.Y)
  1449  	P3.X = (corners[3].X - bnd.Min.X) / (bnd.Max.X - bnd.Min.X)
  1450  	P3.Y = (corners[3].Y - bnd.Min.Y) / (bnd.Max.Y - bnd.Min.Y)
  1451  	sx, sy := P2.X-P3.X, P2.Y-P3.Y
  1452  	ptr = f32.NewAffine2D(sx, P2.X-P1.X, P1.X-sx, sy, P2.Y-P1.Y, P1.Y-sy).Invert()
  1453  
  1454  	return
  1455  }
  1456  
  1457  func isPureOffset(t f32.Affine2D) bool {
  1458  	a, b, _, d, e, _ := t.Elems()
  1459  	return a == 1 && b == 0 && d == 0 && e == 1
  1460  }