9fans.net/go@v0.0.5/draw/memdraw/draw.go (about)

     1  // #include <u.h>
     2  // #include <libc.h>
     3  // #include <draw.h>
     4  // #include <memdraw.h>
     5  
     6  package memdraw
     7  
     8  import (
     9  	"fmt"
    10  	"os"
    11  	"reflect"
    12  	"runtime"
    13  	"strings"
    14  	"unsafe"
    15  
    16  	"9fans.net/go/draw"
    17  )
    18  
    19  const _DBG = false
    20  
    21  var drawdebug int
    22  var tablesbuilt int
    23  
    24  /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
    25  func _RGB2K(r, g, b uint8) uint8 {
    26  //	fmt.Printf("RGB2K %#x %#x %#x -> %#x\n%s", r, g, b,
    27  //		uint8((156763*int(r) + 307758*int(g) + 59769*int(b)) >> 19),
    28  //		string(debug.Stack()))
    29  	return uint8((156763*int(r) + 307758*int(g) + 59769*int(b)) >> 19)
    30  }
    31  
    32  /*
    33   * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
    34   * We add another 127 to round to the nearest value rather
    35   * than truncate.
    36   *
    37   * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
    38   * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
    39   */
    40  func _CALC11(a, v uint8) uint8 {
    41  	t := uint32(a)*uint32(v) + 128
    42  	return uint8((t - 1) / 255)
    43  }
    44  
    45  func _CALC12(a1, v1, a2, v2 uint8) uint8 {
    46  	t := uint32(a1)*uint32(v1) + uint32(a2)*uint32(v2) + 128
    47  	return uint8((t - 1) / 255)
    48  }
    49  
    50  const _MASK = 0x00FF00FF
    51  
    52  func _CALC21(a uint8, vvuu uint32) uint32 {
    53  	// vvuu is masked by MASK
    54  	panic("CALC21")
    55  	t := uint32(a)*vvuu + 0x0080_0080
    56  	return ((t + ((t >> 8) & _MASK)) >> 8) & _MASK
    57  }
    58  
    59  func _CALC41(a uint8, rgba uint32) uint32 {
    60  	return _CALC21(a, rgba&_MASK) | _CALC21(a, (rgba>>8)&_MASK)<<8
    61  }
    62  
    63  func _CALC22(a1 uint8, vvuu1 uint32, a2 uint8, vvuu2 uint32) uint32 {
    64  	// vvuu is masked by MASK
    65  	panic("CALC22")
    66  	t := uint32(a1)*vvuu1 + uint32(a2)*vvuu2 + 0x0080_0080
    67  	return ((t + ((t >> 8) & _MASK)) >> 8) & _MASK
    68  }
    69  
    70  func _CALC42(a1 uint8, rgba1 uint32, a2 uint8, rgba2 uint32) uint32 {
    71  	return uint32(_CALC12(a1, uint8(rgba1>>24), a2, uint8(rgba2>>24)))<<24 |
    72  		uint32(_CALC12(a1, uint8(rgba1>>16), a2, uint8(rgba2>>16)))<<16 |
    73  		uint32(_CALC12(a1, uint8(rgba1>>8), a2, uint8(rgba2>>8)))<<8 |
    74  		uint32(_CALC12(a1, uint8(rgba1>>0), a2, uint8(rgba2>>0)))<<0
    75  
    76  	return _CALC22(a1, rgba1&_MASK, a2, rgba2&_MASK) | _CALC22(a1, (rgba1>>8)&_MASK, a2, (rgba2>>8)&_MASK)<<8
    77  }
    78  
    79  type _Subdraw func(*memDrawParam) int
    80  
    81  var memones *Image
    82  var memzeros *Image
    83  var memwhite *Image
    84  var Black *Image
    85  var memtransparent *Image
    86  var Opaque *Image
    87  
    88  var memimageinit_didinit int = 0
    89  
    90  func Init() {
    91  
    92  	if memimageinit_didinit != 0 {
    93  		return
    94  	}
    95  
    96  	memimageinit_didinit = 1
    97  
    98  	mktables()
    99  	_memmkcmap()
   100  
   101  	var err error
   102  	memones, err = AllocImage(draw.Rect(0, 0, 1, 1), draw.GREY1)
   103  	if err != nil {
   104  		panic("cannot initialize memimage library")
   105  	}
   106  	memones.Flags |= Frepl
   107  	memones.Clipr = draw.Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF)
   108  	byteaddr(memones, draw.ZP)[0] = ^uint8(0)
   109  
   110  	memzeros, err = AllocImage(draw.Rect(0, 0, 1, 1), draw.GREY1)
   111  	if err != nil {
   112  		panic("cannot initialize memimage library")
   113  	}
   114  	memzeros.Flags |= Frepl
   115  	memzeros.Clipr = draw.Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF)
   116  	byteaddr(memzeros, draw.ZP)[0] = 0
   117  
   118  	memwhite = memones
   119  	Black = memzeros
   120  	Opaque = memones
   121  	memtransparent = memzeros
   122  }
   123  
   124  // #define DBG drawdebug
   125  var par memDrawParam
   126  
   127  func _memimagedrawsetup(dst *Image, r draw.Rectangle, src *Image, p0 draw.Point, mask *Image, p1 draw.Point, op draw.Op) *memDrawParam {
   128  	if mask == nil {
   129  		mask = Opaque
   130  	}
   131  
   132  	if _DBG {
   133  		fmt.Fprintf(os.Stderr, "memimagedraw %p/%X %v @ %p %p/%X %v %p/%X %v... ", dst, dst.Pix, r, dst.Data.Bdata, src, src.Pix, p0, mask, mask.Pix, p1)
   134  	}
   135  
   136  	if drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0 {
   137  		/*		if(drawdebug) */
   138  		/*			fmt.Fprintf(os.Stderr, "empty clipped rectangle\n"); */
   139  		return nil
   140  	}
   141  
   142  	if _DBG {
   143  		fmt.Fprintf(os.Stderr, "->clip %v %v %v\n", r, p0, p1)
   144  	}
   145  
   146  	if op < draw.Clear || op > draw.SoverD {
   147  		/*		if(drawdebug) */
   148  		/*			fmt.Fprintf(os.Stderr, "op out of range: %d\n", op); */
   149  		return nil
   150  	}
   151  
   152  	par.op = op
   153  	par.dst = dst
   154  	par.r = r
   155  	par.src = src
   156  	/* par.sr set by drawclip */
   157  	par.mask = mask
   158  	/* par.mr set by drawclip */
   159  
   160  	par.state = 0
   161  	if src.Flags&Frepl != 0 {
   162  		par.state |= _Replsrc
   163  		if src.R.Dx() == 1 && src.R.Dy() == 1 {
   164  			par.sval = pixelbits(src, src.R.Min)
   165  			par.state |= _Simplesrc
   166  			par.srgba = _imgtorgba(src, par.sval)
   167  			par.sdval = _rgbatoimg(dst, par.srgba)
   168  			if par.srgba&0xFF == 0 && op&draw.DoutS != 0 {
   169  				/*				if (drawdebug) fmt.Fprintf(os.Stderr, "fill with transparent source\n"); */
   170  				return nil /* no-op successfully handled */
   171  			}
   172  			if par.srgba&0xFF == 0xFF {
   173  				par.state |= _Fullsrc
   174  			}
   175  		}
   176  	}
   177  
   178  	if mask.Flags&Frepl != 0 {
   179  		par.state |= _Replmask
   180  		if mask.R.Dx() == 1 && mask.R.Dy() == 1 {
   181  			par.mval = pixelbits(mask, mask.R.Min)
   182  			if par.mval == 0 && op&draw.DoutS != 0 {
   183  				/*				if(drawdebug) fmt.Fprintf(os.Stderr, "fill with zero mask\n"); */
   184  				return nil /* no-op successfully handled */
   185  			}
   186  			par.state |= _Simplemask
   187  			if ^par.mval == 0 {
   188  				par.state |= _Fullmask
   189  			}
   190  			par.mrgba = _imgtorgba(mask, par.mval)
   191  		}
   192  	}
   193  
   194  	/*	if(drawdebug) */
   195  	/*		fmt.Fprintf(os.Stderr, "dr %v sr %v mr %v...", r, par.sr, par.mr); */
   196  	if _DBG {
   197  		fmt.Fprintf(os.Stderr, "draw dr %v sr %v mr %v %x\n", r, par.sr, par.mr, par.state)
   198  	}
   199  
   200  	return &par
   201  }
   202  
   203  func _memimagedraw(par *memDrawParam) {
   204  	/*
   205  	 * Now that we've clipped the parameters down to be consistent, we
   206  	 * simply try sub-drawing routines in order until we find one that was able
   207  	 * to handle us.  If the sub-drawing routine returns zero, it means it was
   208  	 * unable to satisfy the request, so we do not return.
   209  	 */
   210  
   211  	/*
   212  	 * Hardware support.  Each video driver provides this function,
   213  	 * which checks to see if there is anything it can help with.
   214  	 * There could be an if around this checking to see if dst is in video memory.
   215  	 */
   216  	if _DBG {
   217  		fmt.Fprintf(os.Stderr, "test hwdraw\n")
   218  	}
   219  	if hwdraw(par) != 0 {
   220  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "hw handled\n"); */
   221  		if _DBG {
   222  			fmt.Fprintf(os.Stderr, "hwdraw handled\n")
   223  		}
   224  		return
   225  	}
   226  	/*
   227  	 * Optimizations using memmove and memset.
   228  	 */
   229  	if _DBG {
   230  		fmt.Fprintf(os.Stderr, "test memoptdraw\n")
   231  	}
   232  	if memoptdraw(par) != 0 {
   233  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "memopt handled\n"); */
   234  		if _DBG {
   235  			fmt.Fprintf(os.Stderr, "memopt handled\n")
   236  		}
   237  		return
   238  	}
   239  
   240  	/*
   241  	 * Character drawing.
   242  	 * Solid source color being painted through a boolean mask onto a high res image.
   243  	 */
   244  	if _DBG {
   245  		fmt.Fprintf(os.Stderr, "test chardraw\n")
   246  	}
   247  	if chardraw(par) != 0 {
   248  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "chardraw handled\n"); */
   249  		if _DBG {
   250  			fmt.Fprintf(os.Stderr, "chardraw handled\n")
   251  		}
   252  		return
   253  	}
   254  
   255  	/*
   256  	 * General calculation-laden case that does alpha for each pixel.
   257  	 */
   258  	if _DBG {
   259  		fmt.Fprintf(os.Stderr, "do alphadraw\n")
   260  	}
   261  	alphadraw(par)
   262  	/*if(drawdebug) fmt.Fprintf(os.Stderr, "alphadraw handled\n"); */
   263  	if _DBG {
   264  		fmt.Fprintf(os.Stderr, "alphadraw handled\n")
   265  	}
   266  }
   267  
   268  // #undef DBG
   269  
   270  func assert(b bool) {
   271  	if !b {
   272  		panic("assert failed")
   273  	}
   274  }
   275  
   276  /*
   277   * Clip the destination rectangle further based on the properties of the
   278   * source and mask rectangles.  Once the destination rectangle is properly
   279   * clipped, adjust the source and mask rectangles to be the same size.
   280   * Then if source or mask is replicated, move its clipped rectangle
   281   * so that its minimum point falls within the repl rectangle.
   282   *
   283   * Return zero if the final rectangle is null.
   284   */
   285  func drawclip(dst *Image, r *draw.Rectangle, src *Image, p0 *draw.Point, mask *Image, p1 *draw.Point, sr *draw.Rectangle, mr *draw.Rectangle) int {
   286  	if r.Min.X >= r.Max.X || r.Min.Y >= r.Max.Y {
   287  		return 0
   288  	}
   289  	splitcoords := (p0.X != p1.X) || (p0.Y != p1.Y)
   290  	/* clip to destination */
   291  	rmin := r.Min
   292  	if !draw.RectClip(r, dst.R) || !draw.RectClip(r, dst.Clipr) {
   293  		return 0
   294  	}
   295  	/* move mask point */
   296  	p1.X += r.Min.X - rmin.X
   297  	p1.Y += r.Min.Y - rmin.Y
   298  	/* move source point */
   299  	p0.X += r.Min.X - rmin.X
   300  	p0.Y += r.Min.Y - rmin.Y
   301  	/* map destination rectangle into source */
   302  	sr.Min = *p0
   303  	sr.Max.X = p0.X + r.Dx()
   304  	sr.Max.Y = p0.Y + r.Dy()
   305  	/* sr is r in source coordinates; clip to source */
   306  	if src.Flags&Frepl == 0 && !draw.RectClip(sr, src.R) {
   307  		return 0
   308  	}
   309  	if !draw.RectClip(sr, src.Clipr) {
   310  		return 0
   311  	}
   312  	/* compute and clip rectangle in mask */
   313  	if splitcoords {
   314  		/* move mask point with source */
   315  		p1.X += sr.Min.X - p0.X
   316  		p1.Y += sr.Min.Y - p0.Y
   317  		mr.Min = *p1
   318  		mr.Max.X = p1.X + sr.Dx()
   319  		mr.Max.Y = p1.Y + sr.Dy()
   320  		omr := *mr
   321  		/* mr is now rectangle in mask; clip it */
   322  		if mask.Flags&Frepl == 0 && !draw.RectClip(mr, mask.R) {
   323  			return 0
   324  		}
   325  		if !draw.RectClip(mr, mask.Clipr) {
   326  			return 0
   327  		}
   328  		/* reflect any clips back to source */
   329  		sr.Min.X += mr.Min.X - omr.Min.X
   330  		sr.Min.Y += mr.Min.Y - omr.Min.Y
   331  		sr.Max.X += mr.Max.X - omr.Max.X
   332  		sr.Max.Y += mr.Max.Y - omr.Max.Y
   333  		*p1 = mr.Min
   334  	} else {
   335  		if mask.Flags&Frepl == 0 && !draw.RectClip(sr, mask.R) {
   336  			return 0
   337  		}
   338  		if !draw.RectClip(sr, mask.Clipr) {
   339  			return 0
   340  		}
   341  		*p1 = sr.Min
   342  	}
   343  	var delta draw.Point
   344  
   345  	/* move source clipping back to destination */
   346  	delta.X = r.Min.X - p0.X
   347  	delta.Y = r.Min.Y - p0.Y
   348  	r.Min.X = sr.Min.X + delta.X
   349  	r.Min.Y = sr.Min.Y + delta.Y
   350  	r.Max.X = sr.Max.X + delta.X
   351  	r.Max.Y = sr.Max.Y + delta.Y
   352  
   353  	/* move source rectangle so sr->min is in src->r */
   354  	if src.Flags&Frepl != 0 {
   355  		delta.X = draw.ReplXY(src.R.Min.X, src.R.Max.X, sr.Min.X) - sr.Min.X
   356  		delta.Y = draw.ReplXY(src.R.Min.Y, src.R.Max.Y, sr.Min.Y) - sr.Min.Y
   357  		sr.Min.X += delta.X
   358  		sr.Min.Y += delta.Y
   359  		sr.Max.X += delta.X
   360  		sr.Max.Y += delta.Y
   361  	}
   362  	*p0 = sr.Min
   363  
   364  	/* move mask point so it is in mask->r */
   365  	*p1 = draw.Repl(mask.R, *p1)
   366  	mr.Min = *p1
   367  	mr.Max.X = p1.X + sr.Dx()
   368  	mr.Max.Y = p1.Y + sr.Dy()
   369  
   370  	assert(sr.Dx() == mr.Dx() && mr.Dx() == r.Dx())
   371  	assert(sr.Dy() == mr.Dy() && mr.Dy() == r.Dy())
   372  	assert(p0.In(src.R))
   373  	assert(p1.In(mask.R))
   374  	assert(r.Min.In(dst.R))
   375  
   376  	return 1
   377  }
   378  
   379  /*
   380   * Conversion tables.
   381   */
   382  var replbit [1 + 8][256]uint8 /* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
   383  var conv18 [256][8]uint8      /* conv18[x][y] is the yth pixel in the depth-1 pixel x */
   384  var conv28 [256][4]uint8      /* ... */
   385  var conv48 [256][2]uint8
   386  
   387  /*
   388   * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
   389   * the X's are where to put the bottom (ones) bit of the n-bit pattern.
   390   * only the top 8 bits of the result are actually used.
   391   * (the lower 8 bits are needed to get bits in the right place
   392   * when n is not a divisor of 8.)
   393   *
   394   * Should check to see if its easier to just refer to replmul than
   395   * use the precomputed values in replbit.  On PCs it may well
   396   * be; on machines with slow multiply instructions it probably isn't.
   397   */
   398  var replmul = [1 + 8]uint32{
   399  	0,
   400  	0b1111111111111111,
   401  	0b0101010101010101,
   402  	0b0010010010010010,
   403  	0b0001000100010001,
   404  	0b0000100001000010,
   405  	0b0000010000010000,
   406  	0b0000001000000100,
   407  	0b0000000100000001,
   408  }
   409  
   410  func mktables() {
   411  	if tablesbuilt != 0 {
   412  		return
   413  	}
   414  
   415  	tablesbuilt = 1
   416  
   417  	/* bit replication up to 8 bits */
   418  	for i := uint32(0); i < 256; i++ {
   419  		for j := uint32(0); j <= 8; j++ { /* j <= 8 [sic] */
   420  			small := i & ((1 << j) - 1)
   421  			replbit[j][i] = uint8((small * replmul[j]) >> 8)
   422  		}
   423  	}
   424  
   425  	/* bit unpacking up to 8 bits, only powers of 2 */
   426  	for i := uint32(0); i < 256; i++ {
   427  		j := uint32(0)
   428  		sh := uint(7)
   429  		mask := uint32(1)
   430  		for ; j < 8; func() { j++; sh-- }() {
   431  			conv18[i][j] = replbit[1][(i>>sh)&mask]
   432  		}
   433  
   434  		j = 0
   435  		sh = 6
   436  		mask = 3
   437  		for ; j < 4; func() { j++; sh -= 2 }() {
   438  			conv28[i][j] = replbit[2][(i>>sh)&mask]
   439  		}
   440  
   441  		j = 0
   442  		sh = 4
   443  		mask = 15
   444  		for ; j < 2; func() { j++; sh -= 4 }() {
   445  			conv48[i][j] = replbit[4][(i>>sh)&mask]
   446  		}
   447  	}
   448  }
   449  
   450  var ones = [8]uint8{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
   451  
   452  /*
   453   * General alpha drawing case.  Can handle anything.
   454   */
   455  
   456  type _Buffer struct {
   457  	red    []uint8
   458  	grn    []uint8
   459  	blu    []uint8
   460  	alpha  []uint8
   461  	grey   []uint8
   462  	rgba   []uint8
   463  	delta  int
   464  	m      []uint8
   465  	mskip  int
   466  	bm     []uint8
   467  	bmskip int
   468  	em     []uint8
   469  	emskip int
   470  }
   471  
   472  type _Readfn func(*drawParam, []uint8, int) _Buffer
   473  type _Writefn func(*drawParam, []uint8, _Buffer)
   474  type _Calcfn func(_Buffer, _Buffer, _Buffer, int, bool, draw.Op) _Buffer
   475  
   476  const (
   477  	_MAXBCACHE = 16
   478  )
   479  
   480  /* giant rathole to customize functions with */
   481  type drawParam struct {
   482  	replcall      _Readfn
   483  	greymaskcall  _Readfn
   484  	convreadcall  _Readfn
   485  	convwritecall _Writefn
   486  	img           *Image
   487  	r             draw.Rectangle
   488  	dx            int
   489  	needbuf       bool
   490  	convgrey      bool
   491  	alphaonly     bool
   492  	bytey0s       []uint8
   493  	bytermin      []uint8
   494  	bytey0e       []uint8
   495  	bwidth        int
   496  	replcache     int
   497  	bcache        [_MAXBCACHE]_Buffer
   498  	bfilled       uint32
   499  	bufbase       []uint8
   500  	bufoff        int
   501  	bufdelta      int
   502  	dir           int
   503  	convbufoff    int
   504  	convbuf       []uint8
   505  	convdpar      *drawParam
   506  	convdx        int
   507  }
   508  
   509  var drawbuf []uint8
   510  var ndrawbuf int
   511  var spar drawParam
   512  var mpar drawParam /* easier on the stacks */
   513  var dpar drawParam
   514  
   515  var alphacalc = [draw.Ncomp]_Calcfn{
   516  	alphacalc0,    /* Clear */
   517  	alphacalc14,   /* DoutS */
   518  	alphacalc2810, /* SoutD */
   519  	alphacalc3679, /* DxorS */
   520  	alphacalc14,   /* DinS */
   521  	alphacalc5,    /* D */
   522  	alphacalc3679, /* DatopS */
   523  	alphacalc3679, /* DoverS */
   524  	alphacalc2810, /* SinD */
   525  	alphacalc3679, /* SatopD */
   526  	alphacalc2810, /* S */
   527  	alphacalc11,   /* SoverD */
   528  }
   529  
   530  var boolcalc = [draw.Ncomp]_Calcfn{
   531  	alphacalc0,     /* Clear */
   532  	boolcalc14,     /* DoutS */
   533  	boolcalc236789, /* SoutD */
   534  	boolcalc236789, /* DxorS */
   535  	boolcalc14,     /* DinS */
   536  	alphacalc5,     /* D */
   537  	boolcalc236789, /* DatopS */
   538  	boolcalc236789, /* DoverS */
   539  	boolcalc236789, /* SinD */
   540  	boolcalc236789, /* SatopD */
   541  	boolcalc1011,   /* S */
   542  	boolcalc1011,   /* SoverD */
   543  }
   544  
   545  func allocdrawbuf() {
   546  	for cap(drawbuf) < ndrawbuf {
   547  		drawbuf = append(drawbuf[:cap(drawbuf)], 0)
   548  	}
   549  	drawbuf = drawbuf[:ndrawbuf]
   550  }
   551  
   552  func getparam(p *drawParam, img *Image, r draw.Rectangle, convgrey, needbuf bool) {
   553  	*p = drawParam{}
   554  	p.img = img
   555  	p.r = r
   556  	p.dx = r.Dx()
   557  	p.needbuf = needbuf
   558  	p.convgrey = convgrey
   559  
   560  	assert(img.R.Min.X <= r.Min.X && r.Min.X < img.R.Max.X)
   561  
   562  	p.bytey0s = byteaddr(img, draw.Pt(img.R.Min.X, img.R.Min.Y))
   563  	p.bytermin = byteaddr(img, draw.Pt(r.Min.X, img.R.Min.Y))
   564  	p.bytey0e = byteaddr(img, draw.Pt(img.R.Max.X, img.R.Min.Y))
   565  	p.bwidth = int(4 * img.Width)
   566  
   567  	assert(len(p.bytey0s) >= len(p.bytermin) && len(p.bytermin) >= len(p.bytey0e))
   568  
   569  	if p.r.Min.X == p.img.R.Min.X {
   570  		assert(len(p.bytermin) == len(p.bytey0s))
   571  	}
   572  
   573  	nbuf := 1
   574  	if img.Flags&Frepl != 0 && img.R.Dy() <= _MAXBCACHE && img.R.Dy() < r.Dy() {
   575  		p.replcache = 1
   576  		nbuf = img.R.Dy()
   577  	}
   578  	p.bufdelta = 4 * p.dx
   579  	p.bufoff = ndrawbuf
   580  	ndrawbuf += p.bufdelta * nbuf
   581  }
   582  
   583  func clipy(img *Image, y *int) {
   584  	dy := img.R.Dy()
   585  	if *y == dy {
   586  		*y = 0
   587  	} else if *y == -1 {
   588  		*y = dy - 1
   589  	}
   590  	assert(0 <= *y && *y < dy)
   591  }
   592  
   593  func dumpbuf(s string, b _Buffer, n int) {
   594  	fmt.Fprintf(os.Stderr, "%s", s)
   595  	for i := 0; i < n; i++ {
   596  		fmt.Fprintf(os.Stderr, " ")
   597  		p := b.grey
   598  		if len(p) != 0 {
   599  			fmt.Fprintf(os.Stderr, " k%.2X", p[0])
   600  			b.grey = b.grey[b.delta:]
   601  		} else {
   602  			p = b.red
   603  			if len(p) != 0 {
   604  				fmt.Fprintf(os.Stderr, " r%.2X", p[0])
   605  				b.red = b.red[b.delta:]
   606  			}
   607  			p = b.grn
   608  			if len(p) != 0 {
   609  				fmt.Fprintf(os.Stderr, " g%.2X", p[0])
   610  				b.grn = b.grn[b.delta:]
   611  			}
   612  			p = b.blu
   613  			if len(p) != 0 {
   614  				fmt.Fprintf(os.Stderr, " b%.2X", p[0])
   615  				b.blu = b.blu[b.delta:]
   616  			}
   617  		}
   618  		p = b.alpha
   619  		if &p[0] != &ones[0] {
   620  			fmt.Fprintf(os.Stderr, " α%.2X", p[0])
   621  			b.alpha = b.alpha[b.delta:]
   622  		}
   623  	}
   624  	fmt.Fprintf(os.Stderr, "\n")
   625  }
   626  
   627  /*
   628   * For each scan line, we expand the pixels from source, mask, and destination
   629   * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
   630   * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
   631   * the readers need not copy the data: they can simply return pointers to the data.
   632   * If the destination image is grey and the source is not, it is converted using the NTSC
   633   * formula.
   634   *
   635   * Once we have all the channels, we call either rgbcalc or greycalc, depending on
   636   * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
   637   * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
   638   * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
   639   * the calculator, and that buffer is passed to a function to write it to the destination.
   640   * If the buffer is already pointing at the destination, the writing function is a no-op.
   641   */
   642  // #define DBG drawdebug
   643  func alphadraw(par *memDrawParam) int {
   644  	if drawdebug != 0 {
   645  		fmt.Fprintf(os.Stderr, "alphadraw %v\n", par.r)
   646  	}
   647  	r := par.r
   648  	dx := r.Dx()
   649  	dy := r.Dy()
   650  
   651  	if _DBG {
   652  		fmt.Fprintf(os.Stderr, "alphadraw %v\n", r)
   653  	}
   654  	ndrawbuf = 0
   655  
   656  	src := par.src
   657  	mask := par.mask
   658  	dst := par.dst
   659  	sr := par.sr
   660  	mr := par.mr
   661  	op := par.op
   662  
   663  	isgrey := dst.Flags&Fgrey != 0
   664  
   665  	/*
   666  	 * Buffering when src and dst are the same bitmap is sufficient but not
   667  	 * necessary.  There are stronger conditions we could use.  We could
   668  	 * check to see if the rectangles intersect, and if simply moving in the
   669  	 * correct y direction can avoid the need to buffer.
   670  	 */
   671  	needbuf := src.Data == dst.Data
   672  
   673  	getparam(&spar, src, sr, isgrey, needbuf)
   674  	getparam(&dpar, dst, r, isgrey, needbuf)
   675  	getparam(&mpar, mask, mr, false, needbuf)
   676  
   677  	dir := 1
   678  	if needbuf && len(byteaddr(dst, r.Min)) < len(byteaddr(src, sr.Min)) {
   679  		dir = -1
   680  	}
   681  	dpar.dir = dir
   682  	mpar.dir = dpar.dir
   683  	spar.dir = mpar.dir
   684  	var rdsrc _Readfn
   685  	var rdmask _Readfn
   686  	var rddst _Readfn
   687  	var calc _Calcfn
   688  	var wrdst _Writefn
   689  
   690  	/*
   691  	 * If the mask is purely boolean, we can convert from src to dst format
   692  	 * when we read src, and then just copy it to dst where the mask tells us to.
   693  	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
   694  	 *
   695  	 * The computation is accomplished by assigning the function pointers as follows:
   696  	 *	rdsrc - read and convert source into dst format in a buffer
   697  	 * 	rdmask - convert mask to bytes, set pointer to it
   698  	 * 	rddst - fill with pointer to real dst data, but do no reads
   699  	 *	calc - copy src onto dst when mask says to.
   700  	 *	wrdst - do nothing
   701  	 * This is slightly sleazy, since things aren't doing exactly what their names say,
   702  	 * but it avoids a fair amount of code duplication to make this a case here
   703  	 * rather than have a separate booldraw.
   704  	 */
   705  	/*if(drawdebug) fmt.Fprintf(os.Stderr, "flag %lud mchan %x=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth); */
   706  	if src.Flags&Falpha == 0 && mask.Pix == draw.GREY1 && dst.Depth >= 8 && op == draw.SoverD {
   707  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "boolcopy..."); */
   708  		rdsrc = convfn(dst, &dpar, src, &spar)
   709  		rddst = readptr
   710  		rdmask = readfn(mask)
   711  		calc = boolcopyfn(dst, mask)
   712  		wrdst = nullwrite
   713  	} else {
   714  		/* usual alphadraw parameter fetching */
   715  		rdsrc = readfn(src)
   716  		rddst = readfn(dst)
   717  		wrdst = writefn(dst)
   718  		calc = alphacalc[op]
   719  
   720  		/*
   721  		 * If there is no alpha channel, we'll ask for a grey channel
   722  		 * and pretend it is the alpha.
   723  		 */
   724  		if mask.Flags&Falpha != 0 {
   725  			rdmask = readalphafn(mask)
   726  			mpar.alphaonly = true
   727  		} else {
   728  			mpar.greymaskcall = readfn(mask)
   729  			mpar.convgrey = true
   730  			rdmask = greymaskread
   731  
   732  			/*
   733  			 * Should really be above, but then boolcopyfns would have
   734  			 * to deal with bit alignment, and I haven't written that.
   735  			 *
   736  			 * This is a common case for things like ellipse drawing.
   737  			 * When there's no alpha involved and the mask is boolean,
   738  			 * we can avoid all the division and multiplication.
   739  			 */
   740  			if mask.Pix == draw.GREY1 && src.Flags&Falpha == 0 {
   741  				calc = boolcalc[op]
   742  			} else if op == draw.SoverD && src.Flags&Falpha == 0 {
   743  				calc = alphacalcS
   744  			}
   745  		}
   746  	}
   747  
   748  	/*
   749  	 * If the image has a small enough repl rectangle,
   750  	 * we can just read each line once and cache them.
   751  	 */
   752  	if spar.replcache != 0 {
   753  		spar.replcall = rdsrc
   754  		rdsrc = replread
   755  	}
   756  	if mpar.replcache != 0 {
   757  		mpar.replcall = rdmask
   758  		rdmask = replread
   759  	}
   760  
   761  	allocdrawbuf()
   762  
   763  	/*
   764  	 * Before we were saving only offsets from drawbuf in the parameter
   765  	 * structures; now that drawbuf has been grown to accomodate us,
   766  	 * we can fill in the pointers.
   767  	 */
   768  	spar.bufbase = drawbuf[spar.bufoff:]
   769  	mpar.bufbase = drawbuf[mpar.bufoff:]
   770  	dpar.bufbase = drawbuf[dpar.bufoff:]
   771  	spar.convbuf = drawbuf[spar.convbufoff:]
   772  	var starty int
   773  	var endy int
   774  
   775  	if dir == 1 {
   776  		starty = 0
   777  		endy = dy
   778  	} else {
   779  		starty = dy - 1
   780  		endy = -1
   781  	}
   782  
   783  	/*
   784  	 * srcy, masky, and dsty are offsets from the top of their
   785  	 * respective Rectangles.  they need to be contained within
   786  	 * the rectangles, so clipy can keep them there without division.
   787  	 */
   788  	srcy := (starty + sr.Min.Y - src.R.Min.Y) % src.R.Dy()
   789  	masky := (starty + mr.Min.Y - mask.R.Min.Y) % mask.R.Dy()
   790  	dsty := starty + r.Min.Y - dst.R.Min.Y
   791  
   792  	assert(0 <= srcy && srcy < src.R.Dy())
   793  	assert(0 <= masky && masky < mask.R.Dy())
   794  	assert(0 <= dsty && dsty < dst.R.Dy())
   795  
   796  	if drawdebug != 0 {
   797  		fmt.Fprintf(os.Stderr, "alphadraw: rdsrc=%p rdmask=%p rddst=%p calc=%p wrdst=%p\n", rdsrc, rdmask, rddst, calc, wrdst)
   798  	}
   799  	for y := starty; y != endy; func() { y += dir; srcy += dir; masky += dir; dsty += dir }() {
   800  		clipy(src, &srcy)
   801  		clipy(dst, &dsty)
   802  		clipy(mask, &masky)
   803  
   804  		bsrc := rdsrc(&spar, spar.bufbase, srcy)
   805  		if _DBG {
   806  			fmt.Fprintf(os.Stderr, "[")
   807  		}
   808  		bmask := rdmask(&mpar, mpar.bufbase, masky)
   809  		if _DBG {
   810  			fmt.Fprintf(os.Stderr, "]\n")
   811  		}
   812  		bdst := rddst(&dpar, dpar.bufbase, dsty)
   813  		if _DBG {
   814  			fmt.Fprintf(os.Stderr, "src %v %+v mask %v dst %v calc %v write %v\n", nameof(rdsrc), spar, nameof(rdmask), nameof(rddst), nameof(calc), nameof(wrdst))
   815  			dumpbuf("src", bsrc, dx)
   816  			dumpbuf("mask", bmask, dx)
   817  			dumpbuf("dst", bdst, dx)
   818  		}
   819  		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op)
   820  		if _DBG {
   821  			dumpbuf("bdst", bdst, dx)
   822  		}
   823  		wrdst(&dpar, dpar.bytermin[dsty*dpar.bwidth:], bdst)
   824  	}
   825  
   826  	return 1
   827  }
   828  
   829  type eface struct {
   830  	_type unsafe.Pointer
   831  	data  unsafe.Pointer
   832  }
   833  
   834  func funcPC(f interface{}) uintptr {
   835  	return *(*uintptr)(efaceOf(&f).data)
   836  }
   837  func efaceOf(ep *interface{}) *eface {
   838  	return (*eface)(unsafe.Pointer(ep))
   839  }
   840  
   841  func nameof(x interface{}) string {
   842  	f := runtime.FuncForPC(funcPC(x))
   843  	i := strings.LastIndex(f.Name(), ".")
   844  	return f.Name()[i+1:]
   845  }
   846  
   847  // #undef DBG
   848  
   849  func uint8words(b []uint8) []uint32 {
   850  	var w []uint32
   851  	h := (*reflect.SliceHeader)(unsafe.Pointer(&w))
   852  	h.Data = uintptr(unsafe.Pointer(&b[0]))
   853  	h.Len = len(b) / 4
   854  	h.Cap = cap(b) / 4
   855  	if h.Data&3 != 0 {
   856  		panic("unaligned")
   857  	}
   858  	return w
   859  }
   860  
   861  func uint8shorts(b []uint8) []uint16 {
   862  	var w []uint16
   863  	h := (*reflect.SliceHeader)(unsafe.Pointer(&w))
   864  	h.Data = uintptr(unsafe.Pointer(&b[0]))
   865  	h.Len = len(b) / 2
   866  	h.Cap = cap(b) / 2
   867  	if h.Data&1 != 0 {
   868  		panic("unaligned")
   869  	}
   870  	return w
   871  }
   872  
   873  func alphacalc0(bdst _Buffer, b1 _Buffer, b2 _Buffer, dx int, grey bool, op draw.Op) _Buffer {
   874  	b := bdst.rgba
   875  	for i := range b[:dx*bdst.delta] {
   876  		b[i] = 0
   877  	}
   878  	return bdst
   879  }
   880  
   881  /*
   882   * Do the channels in the buffers match enough
   883   * that we can do word-at-a-time operations
   884   * on the pixels?
   885   */
   886  func chanmatch(bdst *_Buffer, bsrc *_Buffer) int {
   887  	/*
   888  	 * first, r, g, b must be in the same place
   889  	 * in the rgba word.
   890  	 */
   891  	drgb := bdst.rgba
   892  	srgb := bsrc.rgba
   893  	if len(bdst.red)-len(drgb) != len(bsrc.red)-len(srgb) || len(bdst.blu)-len(drgb) != len(bsrc.blu)-len(srgb) || len(bdst.grn)-len(drgb) != len(bsrc.grn)-len(srgb) {
   894  		return 0
   895  	}
   896  
   897  	/*
   898  	 * that implies alpha is in the same place,
   899  	 * if it is there at all (it might be == ones[:]).
   900  	 * if the destination is ones[:], we can scribble
   901  	 * over the rgba slot just fine.
   902  	 */
   903  	if &bdst.alpha[0] == &ones[0] {
   904  		return 1
   905  	}
   906  
   907  	/*
   908  	 * if the destination is not ones but the src is,
   909  	 * then the simultaneous calculation will use
   910  	 * bogus bytes from the src's rgba.  no good.
   911  	 */
   912  	if &bsrc.alpha[0] == &ones[0] {
   913  		return 0
   914  	}
   915  
   916  	/*
   917  	 * otherwise, alphas are in the same place.
   918  	 */
   919  	return 1
   920  }
   921  
   922  func alphacalc14(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
   923  	obdst := bdst
   924  	sadelta := bsrc.delta
   925  	if &bsrc.alpha[0] == &ones[0] {
   926  		sadelta = 0
   927  	}
   928  	q := bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc) != 0
   929  	var drgba, srgba []uint32
   930  	if q {
   931  		drgba = uint8words(bdst.rgba)
   932  		srgba = uint8words(bsrc.rgba)
   933  	}
   934  
   935  	for i := 0; i < dx; i++ {
   936  		sa := bsrc.alpha[0]
   937  		ma := bmask.alpha[0]
   938  		fd := _CALC11(sa, ma)
   939  		if op == draw.DoutS {
   940  			fd = 255 - fd
   941  		}
   942  
   943  		if grey {
   944  			bdst.grey[0] = _CALC11(fd, bdst.grey[0])
   945  			bsrc.grey = bsrc.grey[bsrc.delta:]
   946  			bdst.grey = bdst.grey[bdst.delta:]
   947  		} else {
   948  			if q {
   949  				drgba[0] = _CALC41(fd, drgba[0])
   950  				srgba = srgba[1:]
   951  				drgba = drgba[1:]
   952  				bsrc.alpha = bsrc.alpha[sadelta:]
   953  				bmask.alpha = bmask.alpha[bmask.delta:]
   954  				continue
   955  			}
   956  			bdst.red[0] = _CALC11(fd, bdst.red[0])
   957  			bdst.grn[0] = _CALC11(fd, bdst.grn[0])
   958  			bdst.blu[0] = _CALC11(fd, bdst.blu[0])
   959  			bsrc.red = bsrc.red[bsrc.delta:]
   960  			bsrc.blu = bsrc.blu[bsrc.delta:]
   961  			bsrc.grn = bsrc.grn[bsrc.delta:]
   962  			bdst.red = bdst.red[bdst.delta:]
   963  			bdst.blu = bdst.blu[bdst.delta:]
   964  			bdst.grn = bdst.grn[bdst.delta:]
   965  		}
   966  		if &bdst.alpha[0] != &ones[0] {
   967  			bdst.alpha[0] = _CALC11(fd, bdst.alpha[0])
   968  			bdst.alpha = bdst.alpha[bdst.delta:]
   969  		}
   970  		bmask.alpha = bmask.alpha[bmask.delta:]
   971  		bsrc.alpha = bsrc.alpha[sadelta:]
   972  	}
   973  	return obdst
   974  }
   975  
   976  func alphacalc2810(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
   977  	obdst := bdst
   978  	sadelta := bsrc.delta
   979  	if &bsrc.alpha[0] == &ones[0] {
   980  		sadelta = 0
   981  	}
   982  	q := bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc) != 0
   983  	var drgba, srgba []uint32
   984  	if q {
   985  		drgba = uint8words(bdst.rgba)
   986  		srgba = uint8words(bsrc.rgba)
   987  	}
   988  
   989  	for i := 0; i < dx; i++ {
   990  		ma := bmask.alpha[0]
   991  		da := bdst.alpha[0]
   992  		if op == draw.SoutD {
   993  			da = 255 - da
   994  		}
   995  		fs := ma
   996  		if op != draw.S {
   997  			fs = _CALC11(fs, da)
   998  		}
   999  
  1000  		if grey {
  1001  			bdst.grey[0] = _CALC11(fs, bsrc.grey[0])
  1002  			bsrc.grey = bsrc.grey[bsrc.delta:]
  1003  			bdst.grey = bdst.grey[bdst.delta:]
  1004  		} else {
  1005  			if q {
  1006  				drgba[0] = _CALC41(fs, srgba[0])
  1007  				srgba = srgba[1:]
  1008  				drgba = drgba[1:]
  1009  				bmask.alpha = bmask.alpha[bmask.delta:]
  1010  				bdst.alpha = bdst.alpha[bdst.delta:]
  1011  				continue
  1012  			}
  1013  			bdst.red[0] = _CALC11(fs, bsrc.red[0])
  1014  			bdst.grn[0] = _CALC11(fs, bsrc.grn[0])
  1015  			bdst.blu[0] = _CALC11(fs, bsrc.blu[0])
  1016  			bsrc.red = bsrc.red[bsrc.delta:]
  1017  			bsrc.blu = bsrc.blu[bsrc.delta:]
  1018  			bsrc.grn = bsrc.grn[bsrc.delta:]
  1019  			bdst.red = bdst.red[bdst.delta:]
  1020  			bdst.blu = bdst.blu[bdst.delta:]
  1021  			bdst.grn = bdst.grn[bdst.delta:]
  1022  		}
  1023  		if &bdst.alpha[0] != &ones[0] {
  1024  			bdst.alpha[0] = _CALC11(fs, bsrc.alpha[0])
  1025  			bdst.alpha = bdst.alpha[bdst.delta:]
  1026  		}
  1027  		bmask.alpha = bmask.alpha[bmask.delta:]
  1028  		bsrc.alpha = bsrc.alpha[sadelta:]
  1029  	}
  1030  	return obdst
  1031  }
  1032  
  1033  func alphacalc3679(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1034  	obdst := bdst
  1035  	sadelta := bsrc.delta
  1036  	if &bsrc.alpha[0] == &ones[0] {
  1037  		sadelta = 0
  1038  	}
  1039  	q := bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc) != 0
  1040  	var drgba, srgba []uint32
  1041  	if q {
  1042  		drgba = uint8words(bdst.rgba)
  1043  		srgba = uint8words(bsrc.rgba)
  1044  	}
  1045  
  1046  	for i := 0; i < dx; i++ {
  1047  		sa := bsrc.alpha[0]
  1048  		ma := bmask.alpha[0]
  1049  		da := bdst.alpha[0]
  1050  		var fs uint8
  1051  		if op == draw.SatopD {
  1052  			fs = _CALC11(ma, da)
  1053  		} else {
  1054  			fs = _CALC11(ma, 255-da)
  1055  		}
  1056  		var fd uint8
  1057  		if op == draw.DoverS {
  1058  			fd = 255
  1059  		} else {
  1060  			fd = _CALC11(sa, ma)
  1061  			if op != draw.DatopS {
  1062  				fd = 255 - fd
  1063  			}
  1064  		}
  1065  
  1066  		if grey {
  1067  			bdst.grey[0] = _CALC12(fs, bsrc.grey[0], fd, bdst.grey[0])
  1068  			bsrc.grey = bsrc.grey[bsrc.delta:]
  1069  			bdst.grey = bdst.grey[bdst.delta:]
  1070  		} else {
  1071  			if q {
  1072  				drgba[0] = _CALC42(fs, srgba[0], fd, drgba[0])
  1073  				srgba = srgba[1:]
  1074  				drgba = drgba[1:]
  1075  				bsrc.alpha = bsrc.alpha[sadelta:]
  1076  				bmask.alpha = bmask.alpha[bmask.delta:]
  1077  				bdst.alpha = bdst.alpha[bdst.delta:]
  1078  				continue
  1079  			}
  1080  			bdst.red[0] = _CALC12(fs, bsrc.red[0], fd, bdst.red[0])
  1081  			bdst.grn[0] = _CALC12(fs, bsrc.grn[0], fd, bdst.grn[0])
  1082  			bdst.blu[0] = _CALC12(fs, bsrc.blu[0], fd, bdst.blu[0])
  1083  			bsrc.red = bsrc.red[bsrc.delta:]
  1084  			bsrc.blu = bsrc.blu[bsrc.delta:]
  1085  			bsrc.grn = bsrc.grn[bsrc.delta:]
  1086  			bdst.red = bdst.red[bdst.delta:]
  1087  			bdst.blu = bdst.blu[bdst.delta:]
  1088  			bdst.grn = bdst.grn[bdst.delta:]
  1089  		}
  1090  		if &bdst.alpha[0] != &ones[0] {
  1091  			bdst.alpha[0] = _CALC12(fs, sa, fd, da)
  1092  			bdst.alpha = bdst.alpha[bdst.delta:]
  1093  		}
  1094  		bmask.alpha = bmask.alpha[bmask.delta:]
  1095  		bsrc.alpha = bsrc.alpha[sadelta:]
  1096  	}
  1097  	return obdst
  1098  }
  1099  
  1100  func alphacalc5(bdst _Buffer, b1 _Buffer, b2 _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1101  	return bdst
  1102  }
  1103  
  1104  func alphacalc11(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1105  	obdst := bdst
  1106  	sadelta := bsrc.delta
  1107  	if &bsrc.alpha[0] == &ones[0] {
  1108  		sadelta = 0
  1109  	}
  1110  	q := bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc) != 0
  1111  	var drgba, srgba []uint32
  1112  	if q {
  1113  		drgba = uint8words(bdst.rgba)
  1114  		srgba = uint8words(bsrc.rgba)
  1115  	}
  1116  
  1117  	for i := 0; i < dx; i++ {
  1118  		di := i * bdst.delta
  1119  		si := i * bsrc.delta
  1120  		ai := i * bmask.delta
  1121  		asi := i * sadelta
  1122  		ma := bmask.alpha[ai]
  1123  		sa := bsrc.alpha[asi]
  1124  		fd := 255 - _CALC11(sa, ma)
  1125  
  1126  		if grey {
  1127  			bdst.grey[di] = _CALC12(ma, bsrc.grey[si], fd, bdst.grey[di])
  1128  		} else {
  1129  			if q {
  1130  				drgba[di/4] = _CALC42(ma, srgba[si/4], fd, drgba[di/4])
  1131  				continue
  1132  			}
  1133  			if _DBG {
  1134  				fmt.Fprintf(os.Stderr, "%x %x %x * %x / %x %x %x * %x",
  1135  					bdst.red[0], bdst.grn[0], bdst.blu[0], fd, bsrc.red[0], bsrc.grn[0], bsrc.blu[0], ma)
  1136  			}
  1137  			bdst.red[di] = _CALC12(ma, bsrc.red[si], fd, bdst.red[di])
  1138  			bdst.grn[di] = _CALC12(ma, bsrc.grn[si], fd, bdst.grn[di])
  1139  			bdst.blu[di] = _CALC12(ma, bsrc.blu[si], fd, bdst.blu[di])
  1140  			if _DBG {
  1141  				fmt.Fprintf(os.Stderr, " -> %x %x %x\n",
  1142  					bdst.red[0], bdst.grn[0], bdst.blu[0])
  1143  			}
  1144  		}
  1145  		if &bdst.alpha[0] != &ones[0] {
  1146  			bdst.alpha[di] = _CALC12(ma, sa, fd, bdst.alpha[di])
  1147  		}
  1148  	}
  1149  	return obdst
  1150  }
  1151  
  1152  /*
  1153  not used yet
  1154  source and mask alpha 1
  1155  static Buffer
  1156  alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
  1157  {
  1158  	Buffer obdst;
  1159  	int i;
  1160  
  1161  	USED(op);
  1162  	obdst = bdst;
  1163  	if(bsrc.delta == bdst.delta){
  1164  		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
  1165  		return obdst;
  1166  	}
  1167  	for(i=0; i<dx; i++){
  1168  		if(grey){
  1169  			bdst.grey[0] = bsrc.grey[0];
  1170  			bsrc.grey = bsrc.grey[bsrc.delta;:]
  1171  			bdst.grey = bdst.grey[bdst.delta;:]
  1172  		}else{
  1173  			bdst.red[0] = bsrc.red[0];
  1174  			bdst.grn[0] = bsrc.grn[0];
  1175  			bdst.blu[0] = bsrc.blu[0];
  1176  			bsrc.red = bsrc.red[bsrc.delta;:]
  1177  			bsrc.blu = bsrc.blu[bsrc.delta;:]
  1178  			bsrc.grn = bsrc.grn[bsrc.delta;:]
  1179  			bdst.red = bdst.red[bdst.delta;:]
  1180  			bdst.blu = bdst.blu[bdst.delta;:]
  1181  			bdst.grn = bdst.grn[bdst.delta;:]
  1182  		}
  1183  		if(&bdst.alpha[0] != &ones[0]){
  1184  			bdst.alpha[0] = 255;
  1185  			bdst.alpha = bdst.alpha[bdst.delta;:]
  1186  		}
  1187  	}
  1188  	return obdst;
  1189  }
  1190  */
  1191  
  1192  /* source alpha 1 */
  1193  func alphacalcS(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1194  	obdst := bdst
  1195  
  1196  	for i := 0; i < dx; i++ {
  1197  		di := i * bdst.delta
  1198  		si := i * bsrc.delta
  1199  		ai := i * bmask.delta
  1200  		ma := bmask.alpha[ai]
  1201  		fd := 255 - ma
  1202  		if grey {
  1203  			bdst.grey[di] = _CALC12(ma, bsrc.grey[si], fd, bdst.grey[di])
  1204  		} else {
  1205  			if _DBG {
  1206  				fmt.Fprintf(os.Stderr, "calc %x %x %x * %x / %x %x %x * %x -> ", bdst.red[di], bdst.grn[di], bdst.blu[di], fd, bsrc.red[si], bsrc.grn[si], bsrc.blu[si], ma)
  1207  			}
  1208  			bdst.red[di] = _CALC12(ma, bsrc.red[si], fd, bdst.red[di])
  1209  			bdst.grn[di] = _CALC12(ma, bsrc.grn[si], fd, bdst.grn[di])
  1210  			bdst.blu[di] = _CALC12(ma, bsrc.blu[si], fd, bdst.blu[di])
  1211  			if _DBG {
  1212  				fmt.Fprintf(os.Stderr, "-> %x %x %x\n", bdst.red[di], bdst.grn[di], bdst.blu[di])
  1213  			}
  1214  		}
  1215  		if &bdst.alpha[0] != &ones[0] {
  1216  			bdst.alpha[di] = ma + _CALC11(fd, bdst.alpha[di])
  1217  		}
  1218  	}
  1219  	return obdst
  1220  }
  1221  
  1222  func boolcalc14(bdst _Buffer, b1 _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1223  	obdst := bdst
  1224  
  1225  	for i := 0; i < dx; i++ {
  1226  		ma := bmask.alpha[0]
  1227  		var zero bool
  1228  		if ma != 0 {
  1229  			zero = op == draw.DoutS
  1230  		} else {
  1231  			zero = op == draw.DinS
  1232  		}
  1233  
  1234  		if grey {
  1235  			if zero {
  1236  				bdst.grey[0] = 0
  1237  			}
  1238  			bdst.grey = bdst.grey[bdst.delta:]
  1239  		} else {
  1240  			if zero {
  1241  				bdst.blu[0] = 0
  1242  				bdst.grn[0] = bdst.blu[0]
  1243  				bdst.red[0] = bdst.grn[0]
  1244  			}
  1245  			bdst.red = bdst.red[bdst.delta:]
  1246  			bdst.blu = bdst.blu[bdst.delta:]
  1247  			bdst.grn = bdst.grn[bdst.delta:]
  1248  		}
  1249  		bmask.alpha = bmask.alpha[bmask.delta:]
  1250  		if &bdst.alpha[0] != &ones[0] {
  1251  			if zero {
  1252  				bdst.alpha[0] = 0
  1253  			}
  1254  			bdst.alpha = bdst.alpha[bdst.delta:]
  1255  		}
  1256  	}
  1257  	return obdst
  1258  }
  1259  
  1260  func boolcalc236789(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1261  	obdst := bdst
  1262  	zero := op&1 == 0
  1263  
  1264  	for i := 0; i < dx; i++ {
  1265  		ma := bmask.alpha[0]
  1266  		da := bdst.alpha[0]
  1267  		fs := da
  1268  		if op&2 != 0 {
  1269  			fs = 255 - da
  1270  		}
  1271  		fd := uint8(0)
  1272  		if op&4 != 0 {
  1273  			fd = 255
  1274  		}
  1275  
  1276  		if grey {
  1277  			if ma != 0 {
  1278  				bdst.grey[0] = _CALC12(fs, bsrc.grey[0], fd, bdst.grey[0])
  1279  			} else if zero {
  1280  				bdst.grey[0] = 0
  1281  			}
  1282  			bsrc.grey = bsrc.grey[bsrc.delta:]
  1283  			bdst.grey = bdst.grey[bdst.delta:]
  1284  		} else {
  1285  			if ma != 0 {
  1286  				bdst.red[0] = _CALC12(fs, bsrc.red[0], fd, bdst.red[0])
  1287  				bdst.grn[0] = _CALC12(fs, bsrc.grn[0], fd, bdst.grn[0])
  1288  				bdst.blu[0] = _CALC12(fs, bsrc.blu[0], fd, bdst.blu[0])
  1289  			} else if zero {
  1290  				bdst.blu[0] = 0
  1291  				bdst.grn[0] = bdst.blu[0]
  1292  				bdst.red[0] = bdst.grn[0]
  1293  			}
  1294  			bsrc.red = bsrc.red[bsrc.delta:]
  1295  			bsrc.blu = bsrc.blu[bsrc.delta:]
  1296  			bsrc.grn = bsrc.grn[bsrc.delta:]
  1297  			bdst.red = bdst.red[bdst.delta:]
  1298  			bdst.blu = bdst.blu[bdst.delta:]
  1299  			bdst.grn = bdst.grn[bdst.delta:]
  1300  		}
  1301  		bmask.alpha = bmask.alpha[bmask.delta:]
  1302  		if &bdst.alpha[0] != &ones[0] {
  1303  			if ma != 0 {
  1304  				bdst.alpha[0] = fs + _CALC11(fd, da)
  1305  			} else if zero {
  1306  				bdst.alpha[0] = 0
  1307  			}
  1308  			bdst.alpha = bdst.alpha[bdst.delta:]
  1309  		}
  1310  	}
  1311  	return obdst
  1312  }
  1313  
  1314  func boolcalc1011(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, grey bool, op draw.Op) _Buffer {
  1315  	obdst := bdst
  1316  	zero := op&1 == 0
  1317  
  1318  	for i := 0; i < dx; i++ {
  1319  		ma := bmask.alpha[0]
  1320  
  1321  		if grey {
  1322  			if ma != 0 {
  1323  				bdst.grey[0] = bsrc.grey[0]
  1324  			} else if zero {
  1325  				bdst.grey[0] = 0
  1326  			}
  1327  			bsrc.grey = bsrc.grey[bsrc.delta:]
  1328  			bdst.grey = bdst.grey[bdst.delta:]
  1329  		} else {
  1330  			if ma != 0 {
  1331  				bdst.red[0] = bsrc.red[0]
  1332  				bdst.grn[0] = bsrc.grn[0]
  1333  				bdst.blu[0] = bsrc.blu[0]
  1334  			} else if zero {
  1335  				bdst.blu[0] = 0
  1336  				bdst.grn[0] = bdst.blu[0]
  1337  				bdst.red[0] = bdst.grn[0]
  1338  			}
  1339  			bsrc.red = bsrc.red[bsrc.delta:]
  1340  			bsrc.blu = bsrc.blu[bsrc.delta:]
  1341  			bsrc.grn = bsrc.grn[bsrc.delta:]
  1342  			bdst.red = bdst.red[bdst.delta:]
  1343  			bdst.blu = bdst.blu[bdst.delta:]
  1344  			bdst.grn = bdst.grn[bdst.delta:]
  1345  		}
  1346  		bmask.alpha = bmask.alpha[bmask.delta:]
  1347  		if &bdst.alpha[0] != &ones[0] {
  1348  			if ma != 0 {
  1349  				bdst.alpha[0] = 255
  1350  			} else if zero {
  1351  				bdst.alpha[0] = 0
  1352  			}
  1353  			bdst.alpha = bdst.alpha[bdst.delta:]
  1354  		}
  1355  	}
  1356  	return obdst
  1357  }
  1358  
  1359  /*
  1360   * Replicated cached scan line read.  Call the function listed in the drawParam,
  1361   * but cache the result so that for replicated images we only do the work once.
  1362   */
  1363  func replread(p *drawParam, s []uint8, y int) _Buffer {
  1364  	b := &p.bcache[y]
  1365  	if p.bfilled&(1<<y) == 0 {
  1366  		p.bfilled |= 1 << y
  1367  		*b = p.replcall(p, p.bufbase[y*p.bufdelta:], y)
  1368  	}
  1369  	return *b
  1370  }
  1371  
  1372  /*
  1373   * Alpha reading function that simply relabels the grey pointer.
  1374   */
  1375  func greymaskread(p *drawParam, buf []uint8, y int) _Buffer {
  1376  	b := p.greymaskcall(p, buf, y)
  1377  	b.alpha = b.grey
  1378  	return b
  1379  }
  1380  
  1381  // #define DBG 0
  1382  func readnbit(p *drawParam, buf []uint8, y int) _Buffer {
  1383  	var b _Buffer
  1384  	b.rgba = buf
  1385  	w := buf
  1386  	b.grey = w
  1387  	b.grn = w
  1388  	b.blu = b.grn
  1389  	b.red = b.blu
  1390  	b.alpha = ones[:]
  1391  	b.delta = 1
  1392  
  1393  	dx := p.dx
  1394  	img := p.img
  1395  	depth := img.Depth
  1396  	repl := &replbit[depth]
  1397  	npack := 8 / depth
  1398  	sh := 8 - depth
  1399  
  1400  	/* copy from p->r.min.x until end of repl rectangle */
  1401  	x := p.r.Min.X
  1402  	n := dx
  1403  	if n > p.img.R.Max.X-x {
  1404  		n = p.img.R.Max.X - x
  1405  	}
  1406  
  1407  	r := p.bytermin[y*p.bwidth:]
  1408  	if _DBG {
  1409  		fmt.Fprintf(os.Stderr, "readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p.bytermin, y, p.bwidth, r[0], n)
  1410  	}
  1411  	bits := r[0]
  1412  	r = r[1:]
  1413  	nbits := 8
  1414  	i := x & (npack - 1)
  1415  	if i != 0 {
  1416  		if _DBG {
  1417  			fmt.Fprintf(os.Stderr, "throwaway %d...", i)
  1418  		}
  1419  		bits <<= depth * i
  1420  		nbits -= depth * i
  1421  	}
  1422  	for i = 0; i < n; i++ {
  1423  		if nbits == 0 {
  1424  			if _DBG {
  1425  				fmt.Fprintf(os.Stderr, "(%.2x)...", r[0])
  1426  			}
  1427  			bits = r[0]
  1428  			r = r[1:]
  1429  			nbits = 8
  1430  		}
  1431  		w[0] = repl[bits>>sh]
  1432  		w = w[1:]
  1433  		if _DBG {
  1434  			fmt.Fprintf(os.Stderr, "bit %x...", repl[bits>>sh])
  1435  		}
  1436  		bits <<= depth
  1437  		nbits -= depth
  1438  	}
  1439  	dx -= n
  1440  	if dx == 0 {
  1441  		return b
  1442  	}
  1443  
  1444  	assert(x+i == p.img.R.Max.X)
  1445  
  1446  	/* copy from beginning of repl rectangle until where we were before. */
  1447  	x = p.img.R.Min.X
  1448  	n = dx
  1449  	if n > p.r.Min.X-x {
  1450  		n = p.r.Min.X - x
  1451  	}
  1452  
  1453  	r = p.bytey0s[y*p.bwidth:]
  1454  	if _DBG {
  1455  		fmt.Fprintf(os.Stderr, "x=%d r=%p...", x, r)
  1456  	}
  1457  	bits = r[0]
  1458  	r = r[1:]
  1459  	nbits = 8
  1460  	i = x & (npack - 1)
  1461  	if i != 0 {
  1462  		bits <<= depth * i
  1463  		nbits -= depth * i
  1464  	}
  1465  	if _DBG {
  1466  		fmt.Fprintf(os.Stderr, "nbits=%d...", nbits)
  1467  	}
  1468  	for i = 0; i < n; i++ {
  1469  		if nbits == 0 {
  1470  			bits = r[0]
  1471  			r = r[1:]
  1472  			nbits = 8
  1473  		}
  1474  		w[0] = repl[bits>>sh]
  1475  		w = w[1:]
  1476  		if _DBG {
  1477  			fmt.Fprintf(os.Stderr, "bit %x...", repl[bits>>sh])
  1478  		}
  1479  		bits <<= depth
  1480  		nbits -= depth
  1481  		if _DBG {
  1482  			fmt.Fprintf(os.Stderr, "bits %x nbits %d...", bits, nbits)
  1483  		}
  1484  	}
  1485  	dx -= n
  1486  	if dx == 0 {
  1487  		return b
  1488  	}
  1489  
  1490  	assert(dx > 0)
  1491  	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
  1492  	ow := buf
  1493  	for {
  1494  		tmp8 := dx
  1495  		dx--
  1496  		if tmp8 == 0 {
  1497  			break
  1498  		}
  1499  		w[0] = ow[0]
  1500  		w = w[1:]
  1501  		ow = ow[1:]
  1502  	}
  1503  
  1504  	return b
  1505  }
  1506  
  1507  // #undef DBG
  1508  
  1509  // #define DBG 0
  1510  func writenbit(p *drawParam, w []uint8, src _Buffer) {
  1511  	assert(src.grey != nil && src.delta == 1)
  1512  
  1513  	x := p.r.Min.X
  1514  	ex := x + p.dx
  1515  	depth := p.img.Depth
  1516  	npack := 8 / depth
  1517  
  1518  	i := x & (npack - 1)
  1519  	bits := uint8(0)
  1520  	if i != 0 {
  1521  		bits = w[0] >> (8 - depth*i)
  1522  	}
  1523  	nbits := depth * i
  1524  	sh := 8 - depth
  1525  	r := src.grey
  1526  
  1527  	for ; x < ex; x++ {
  1528  		bits <<= depth
  1529  		if _DBG {
  1530  			fmt.Fprintf(os.Stderr, " %x", r[0])
  1531  		}
  1532  		bits |= r[0] >> sh
  1533  		r = r[1:]
  1534  		nbits += depth
  1535  		if nbits == 8 {
  1536  			w[0] = bits
  1537  			w = w[1:]
  1538  			nbits = 0
  1539  		}
  1540  	}
  1541  
  1542  	if nbits != 0 {
  1543  		sh = 8 - nbits
  1544  		bits <<= sh
  1545  		bits |= w[0] & ((1 << sh) - 1)
  1546  		w[0] = bits
  1547  	}
  1548  	if _DBG {
  1549  		fmt.Fprintf(os.Stderr, "\n")
  1550  	}
  1551  	return
  1552  }
  1553  
  1554  // #undef DBG
  1555  
  1556  func readcmap(p *drawParam, buf []uint8, y int) _Buffer {
  1557  	var b _Buffer
  1558  	begin := p.bytey0s[y*p.bwidth:]
  1559  	r := p.bytermin[y*p.bwidth:]
  1560  	end := p.bytey0e[y*p.bwidth:]
  1561  	r = r[:len(r)-len(end)]
  1562  	begin = begin[:len(begin)-len(end)]
  1563  	cmap := p.img.cmap.Cmap2rgb[:]
  1564  	convgrey := p.convgrey
  1565  	copyalpha := p.img.Flags&Falpha != 0
  1566  
  1567  	w := buf
  1568  	dx := p.dx
  1569  	if copyalpha {
  1570  		b.alpha = buf
  1571  		buf = buf[1:]
  1572  		a := p.img.shift[draw.CAlpha] / 8
  1573  		m := p.img.shift[draw.CMap] / 8
  1574  		for i := 0; i < dx; i++ {
  1575  			w[0] = r[a]
  1576  			w = w[1:]
  1577  			q := cmap[int(r[m])*3:]
  1578  			if _DBG {
  1579  				fmt.Fprintf(os.Stderr, "A %x -> %x %x %x\n", r[m], q[0], q[1], q[2])
  1580  			}
  1581  			r = r[2:]
  1582  			if len(r) == 0 {
  1583  				r = begin
  1584  			}
  1585  			if convgrey {
  1586  				w[0] = _RGB2K(q[0], q[1], q[2])
  1587  				w = w[1:]
  1588  			} else { /* blue */
  1589  				w[0] = q[2]
  1590  				w = w[1:] /* green */
  1591  				w[0] = q[1]
  1592  				w = w[1:] /* red */
  1593  				w[0] = q[0]
  1594  				w = w[1:]
  1595  			}
  1596  		}
  1597  	} else {
  1598  		b.alpha = ones[:]
  1599  		for i := 0; i < dx; i++ {
  1600  			q := cmap[int(r[0])*3:]
  1601  			if _DBG {
  1602  				fmt.Fprintf(os.Stderr, "D %p %x -> %x %x %x\n", cmap, r[0], q[0], q[1], q[2])
  1603  			}
  1604  			r = r[1:]
  1605  			if len(r) == 0 {
  1606  				r = begin
  1607  			}
  1608  			if convgrey {
  1609  				w[0] = _RGB2K(q[0], q[1], q[2])
  1610  				w = w[1:]
  1611  			} else { /* blue */
  1612  				w[0] = q[2]
  1613  				w = w[1:] /* green */
  1614  				w[0] = q[1]
  1615  				w = w[1:] /* red */
  1616  				w[0] = q[0]
  1617  				w = w[1:]
  1618  			}
  1619  		}
  1620  	}
  1621  
  1622  	b.rgba = nil // (*uint32)(buf - copyalpha)
  1623  
  1624  	if convgrey {
  1625  		b.grey = buf
  1626  		b.grn = buf
  1627  		b.blu = b.grn
  1628  		b.red = b.blu
  1629  		b.delta = 1
  1630  		if copyalpha {
  1631  			b.delta++
  1632  		}
  1633  	} else {
  1634  		b.blu = buf
  1635  		b.grn = buf[1:]
  1636  		b.red = buf[2:]
  1637  		b.grey = nil
  1638  		b.delta = 3
  1639  		if copyalpha {
  1640  			b.delta++
  1641  		}
  1642  	}
  1643  	return b
  1644  }
  1645  
  1646  func writecmap(p *drawParam, w []uint8, src _Buffer) {
  1647  	cmap := p.img.cmap.Rgb2cmap[:]
  1648  
  1649  	delta := src.delta
  1650  	red := src.red
  1651  	grn := src.grn
  1652  	blu := src.blu
  1653  
  1654  	dx := p.dx
  1655  	for i := 0; i < dx; func() { i++; red = red[delta:]; grn = grn[delta:]; blu = blu[delta:] }() {
  1656  		w[0] = cmap[(uint32(red[0])>>4)*256+(uint32(grn[0])>>4)*16+(uint32(blu[0])>>4)]
  1657  		if _DBG {
  1658  			fmt.Fprintf(os.Stderr, "%x %x %x -> %x\n", red[0], grn[0], blu[0], w[0])
  1659  		}
  1660  		w = w[1:]
  1661  	}
  1662  }
  1663  
  1664  // #define DBG drawdebug
  1665  func readbyte(p *drawParam, buf []uint8, y int) _Buffer {
  1666  	img := p.img
  1667  	begin := p.bytey0s[y*p.bwidth:]
  1668  	r := p.bytermin[y*p.bwidth:]
  1669  	end := p.bytey0e[y*p.bwidth:]
  1670  	r = r[:len(r)-len(end)]
  1671  	begin = begin[:len(begin)-len(end)]
  1672  
  1673  	if _DBG {
  1674  		fmt.Fprintf(os.Stderr, "readbyte dx=%d begin %p r %p end %p len %d buf %p\n",
  1675  			p.dx, begin, p.bytermin[y*p.bwidth:], end, len(r), buf)
  1676  	}
  1677  
  1678  	w := buf
  1679  	dx := p.dx
  1680  	nb := img.Depth / 8
  1681  
  1682  	convgrey := p.convgrey /* convert rgb to grey */
  1683  	isgrey := img.Flags & Fgrey
  1684  	alphaonly := p.alphaonly
  1685  	copyalpha := img.Flags&Falpha != 0
  1686  
  1687  	/* if we can, avoid processing everything */
  1688  	if img.Flags&Frepl == 0 && !convgrey && img.Flags&Fbytes != 0 {
  1689  		var b _Buffer
  1690  		if p.needbuf {
  1691  			copy(buf[:dx*nb], r[:dx*nb])
  1692  			r = buf[:dx*nb]
  1693  		}
  1694  		b.rgba = r
  1695  		if copyalpha {
  1696  			b.alpha = r[img.shift[draw.CAlpha]/8:]
  1697  		} else {
  1698  			b.alpha = ones[:]
  1699  		}
  1700  		if isgrey != 0 {
  1701  			b.grey = r[img.shift[draw.CGrey]/8:]
  1702  			b.blu = b.grey
  1703  			b.grn = b.blu
  1704  			b.red = b.grn
  1705  		} else {
  1706  			b.red = r[img.shift[draw.CRed]/8:]
  1707  			b.grn = r[img.shift[draw.CGreen]/8:]
  1708  			b.blu = r[img.shift[draw.CBlue]/8:]
  1709  		}
  1710  		b.delta = nb
  1711  		return b
  1712  	}
  1713  
  1714  	rrepl := replbit[img.nbits[draw.CRed]]
  1715  	grepl := replbit[img.nbits[draw.CGreen]]
  1716  	brepl := replbit[img.nbits[draw.CBlue]]
  1717  	arepl := replbit[img.nbits[draw.CAlpha]]
  1718  	krepl := replbit[img.nbits[draw.CGrey]]
  1719  
  1720  	for i := 0; i < dx; i++ {
  1721  		var u uint32
  1722  		if img.Depth == 32 {
  1723  			u = uint32(r[0]) | uint32(r[1])<<8 | uint32(r[2])<<16 | uint32(r[3])<<24
  1724  		} else if img.Depth == 24 {
  1725  			u = uint32(r[0]) | uint32(r[1])<<8 | uint32(r[2])<<16
  1726  		} else if img.Depth > 8 {
  1727  			u = uint32(r[0]) | uint32(r[1])<<8
  1728  		} else {
  1729  			u = uint32(r[0])
  1730  		}
  1731  		if copyalpha {
  1732  			w[0] = arepl[(u>>img.shift[draw.CAlpha])&img.mask[draw.CAlpha]]
  1733  			w = w[1:]
  1734  		}
  1735  
  1736  		if isgrey != 0 {
  1737  			w[0] = krepl[(u>>img.shift[draw.CGrey])&img.mask[draw.CGrey]]
  1738  			w = w[1:]
  1739  		} else if !alphaonly {
  1740  			ured := rrepl[(u>>img.shift[draw.CRed])&img.mask[draw.CRed]]
  1741  			ugrn := grepl[(u>>img.shift[draw.CGreen])&img.mask[draw.CGreen]]
  1742  			ublu := brepl[(u>>img.shift[draw.CBlue])&img.mask[draw.CBlue]]
  1743  			if convgrey {
  1744  				w[0] = _RGB2K(ured, ugrn, ublu)
  1745  				w = w[1:]
  1746  			} else {
  1747  				w[0] = brepl[(u>>img.shift[draw.CBlue])&img.mask[draw.CBlue]]
  1748  				w = w[1:]
  1749  				w[0] = grepl[(u>>img.shift[draw.CGreen])&img.mask[draw.CGreen]]
  1750  				w = w[1:]
  1751  				w[0] = rrepl[(u>>img.shift[draw.CRed])&img.mask[draw.CRed]]
  1752  				w = w[1:]
  1753  			}
  1754  		}
  1755  		r = r[nb:]
  1756  		if len(r) == 0 {
  1757  			r = begin
  1758  		}
  1759  	}
  1760  
  1761  	var b _Buffer
  1762  	if copyalpha {
  1763  		b.alpha = buf
  1764  	} else {
  1765  		b.alpha = ones[:]
  1766  	}
  1767  	b.rgba = buf
  1768  	if alphaonly {
  1769  		b.grey = nil
  1770  		b.blu = b.grey
  1771  		b.grn = b.blu
  1772  		b.red = b.grn
  1773  		if !copyalpha {
  1774  			b.rgba = nil
  1775  		}
  1776  		b.delta = 1
  1777  	} else if isgrey != 0 || convgrey {
  1778  		a := 0
  1779  		if copyalpha {
  1780  			a = 1
  1781  		}
  1782  		b.grey = buf[a:]
  1783  		b.blu = buf[a:]
  1784  		b.grn = b.blu
  1785  		b.red = b.grn
  1786  		b.delta = a + 1
  1787  	} else {
  1788  		a := 0
  1789  		if copyalpha {
  1790  			a = 1
  1791  		}
  1792  		b.blu = buf[a:]
  1793  		b.grn = buf[a+1:]
  1794  		b.grey = nil
  1795  		b.red = buf[a+2:]
  1796  		b.delta = a + 3
  1797  	}
  1798  
  1799  	if _DBG {
  1800  		fmt.Fprintf(os.Stderr, "END readbyte buf %p w %p (%x %x %x %x) grey %p alpha %p\n",
  1801  			buf, w, buf[0], buf[1], buf[2], buf[3], b.grey, b.alpha)
  1802  		dumpbuf("readbyte", b, dx)
  1803  	}
  1804  
  1805  	return b
  1806  }
  1807  
  1808  // #undef DBG
  1809  
  1810  // #define DBG drawdebug
  1811  func writebyte(p *drawParam, w []uint8, src _Buffer) {
  1812  	img := p.img
  1813  
  1814  	red := src.red
  1815  	grn := src.grn
  1816  	blu := src.blu
  1817  	alpha := src.alpha
  1818  	delta := src.delta
  1819  	grey := src.grey
  1820  	dx := p.dx
  1821  
  1822  	nb := img.Depth / 8
  1823  	var mask uint32
  1824  	if nb == 4 {
  1825  		mask = 0
  1826  	} else {
  1827  		mask = ^((1 << img.Depth) - 1)
  1828  	}
  1829  
  1830  	isalpha := img.Flags & Falpha
  1831  	isgrey := img.Flags & Fgrey
  1832  	adelta := src.delta
  1833  
  1834  	if isalpha != 0 && alpha == nil {
  1835  		alpha = ones[:]
  1836  		adelta = 0
  1837  	}
  1838  
  1839  	for i := 0; i < dx; i++ {
  1840  		di := i * delta
  1841  		ai := i * adelta
  1842  		var u uint32
  1843  		if nb == 4 {
  1844  			u = uint32(w[0]) | uint32(w[1])<<8 | uint32(w[2])<<16 | uint32(w[3])<<24
  1845  		} else if nb == 3 {
  1846  			u = uint32(w[0]) | uint32(w[1])<<8 | uint32(w[2])<<16
  1847  		} else if nb == 2 {
  1848  			u = uint32(w[0]) | uint32(w[1])<<8
  1849  		} else {
  1850  			u = uint32(w[0])
  1851  		}
  1852  		if _DBG {
  1853  			fmt.Fprintf(os.Stderr, "u %.8x...", u)
  1854  		}
  1855  		u &= mask
  1856  		if _DBG {
  1857  			fmt.Fprintf(os.Stderr, "&mask %.8x...", u)
  1858  		}
  1859  		if isgrey != 0 {
  1860  			u |= ((uint32(grey[di]) >> (8 - img.nbits[draw.CGrey])) & img.mask[draw.CGrey]) << img.shift[draw.CGrey]
  1861  			if _DBG {
  1862  				fmt.Fprintf(os.Stderr, "|grey %.8x...", u)
  1863  			}
  1864  		} else {
  1865  			u |= ((uint32(red[di]) >> (8 - img.nbits[draw.CRed])) & img.mask[draw.CRed]) << img.shift[draw.CRed]
  1866  			u |= ((uint32(grn[di]) >> (8 - img.nbits[draw.CGreen])) & img.mask[draw.CGreen]) << img.shift[draw.CGreen]
  1867  			u |= ((uint32(blu[di]) >> (8 - img.nbits[draw.CBlue])) & img.mask[draw.CBlue]) << img.shift[draw.CBlue]
  1868  			if _DBG {
  1869  				fmt.Fprintf(os.Stderr, "|rgb %.8x...", u)
  1870  			}
  1871  		}
  1872  
  1873  		if isalpha != 0 {
  1874  			u |= ((uint32(alpha[ai]) >> (8 - img.nbits[draw.CAlpha])) & img.mask[draw.CAlpha]) << img.shift[draw.CAlpha]
  1875  			if _DBG {
  1876  				fmt.Fprintf(os.Stderr, "|alpha %.8x...", u)
  1877  			}
  1878  		}
  1879  
  1880  		if nb == 4 {
  1881  			w[0] = uint8(u)
  1882  			w[1] = uint8(u >> 8)
  1883  			w[2] = uint8(u >> 16)
  1884  			w[3] = uint8(u >> 24)
  1885  		} else if nb == 3 {
  1886  			w[0] = uint8(u)
  1887  			w[1] = uint8(u >> 8)
  1888  			w[2] = uint8(u >> 16)
  1889  		} else if nb == 2 {
  1890  			w[0] = uint8(u)
  1891  			w[1] = uint8(u >> 8)
  1892  		} else {
  1893  			w[0] = uint8(u)
  1894  		}
  1895  		if _DBG {
  1896  			fmt.Fprintf(os.Stderr, "write back %.8x...", u)
  1897  		}
  1898  		w = w[nb:]
  1899  	}
  1900  }
  1901  
  1902  // #undef DBG
  1903  
  1904  func readfn(img *Image) _Readfn {
  1905  	if img.Depth < 8 {
  1906  		return readnbit
  1907  	}
  1908  	if img.nbits[draw.CMap] == 8 {
  1909  		return readcmap
  1910  	}
  1911  	return readbyte
  1912  }
  1913  
  1914  func readalphafn(m *Image) _Readfn {
  1915  	return readbyte
  1916  }
  1917  
  1918  func writefn(img *Image) _Writefn {
  1919  	if img.Depth < 8 {
  1920  		return writenbit
  1921  	}
  1922  	if img.Pix == draw.CMAP8 {
  1923  		return writecmap
  1924  	}
  1925  	return writebyte
  1926  }
  1927  
  1928  func nullwrite(p *drawParam, s []uint8, b _Buffer) {
  1929  }
  1930  
  1931  func readptr(p *drawParam, s []uint8, y int) _Buffer {
  1932  	var b _Buffer
  1933  	q := p.bytermin[y*p.bwidth:]
  1934  	b.red = q /* ptr to data */
  1935  	b.alpha = nil
  1936  	b.grey = b.alpha
  1937  	b.blu = b.grey
  1938  	b.grn = b.blu
  1939  	b.rgba = q
  1940  	b.delta = p.img.Depth / 8
  1941  	return b
  1942  }
  1943  
  1944  func boolmemmove(bdst _Buffer, bsrc _Buffer, b1 _Buffer, dx int, i bool, o draw.Op) _Buffer {
  1945  	copy(bdst.red[:dx*bdst.delta], bsrc.red[:dx*bdst.delta])
  1946  	return bdst
  1947  }
  1948  
  1949  func boolcopy8(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, i bool, o draw.Op) _Buffer {
  1950  	m := bmask.grey
  1951  	w := bdst.red
  1952  	r := bsrc.red
  1953  	for i := 0; i < dx; i++ {
  1954  		if m[i] != 0 {
  1955  			w[i] = r[i]
  1956  		}
  1957  	}
  1958  	return bdst /* not used */
  1959  }
  1960  
  1961  func boolcopy16(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, i bool, o draw.Op) _Buffer {
  1962  	m := bmask.grey
  1963  	w := uint8shorts(bdst.red)
  1964  	r := uint8shorts(bsrc.red)
  1965  	for i := 0; i < dx; i++ {
  1966  		if m[i] != 0 {
  1967  			w[i] = r[i]
  1968  		}
  1969  	}
  1970  	return bdst /* not used */
  1971  }
  1972  
  1973  func boolcopy24(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, i bool, o draw.Op) _Buffer {
  1974  	m := bmask.grey
  1975  	w := bdst.red
  1976  	r := bsrc.red
  1977  	for i, j := 0, 0; i < dx; i, j = i+1, j+3 {
  1978  		if m[i] != 0 {
  1979  			w[j] = r[j]
  1980  			w[j+1] = r[j+1]
  1981  			w[j+2] = r[j+2]
  1982  		}
  1983  	}
  1984  	return bdst /* not used */
  1985  }
  1986  
  1987  func boolcopy32(bdst _Buffer, bsrc _Buffer, bmask _Buffer, dx int, i bool, o draw.Op) _Buffer {
  1988  	m := bmask.grey
  1989  	w := uint8words(bdst.red)
  1990  	r := uint8words(bsrc.red)
  1991  	for i := 0; i < dx; i++ {
  1992  		if m[i] != 0 {
  1993  			w[i] = r[i]
  1994  		}
  1995  	}
  1996  	return bdst /* not used */
  1997  }
  1998  
  1999  func genconv(p *drawParam, buf []uint8, y int) _Buffer {
  2000  	/* read from source into RGB format in convbuf */
  2001  	b := p.convreadcall(p, p.convbuf, y)
  2002  
  2003  	/* write RGB format into dst format in buf */
  2004  	p.convwritecall(p.convdpar, buf, b)
  2005  
  2006  	if p.convdx != 0 {
  2007  		nb := p.convdpar.img.Depth / 8
  2008  		r := buf
  2009  		w := buf[nb*p.dx : nb*p.convdx]
  2010  		copy(w, r)
  2011  	}
  2012  
  2013  	b.red = buf
  2014  	b.alpha = nil
  2015  	b.grey = b.alpha
  2016  	b.grn = b.grey
  2017  	b.blu = b.grn
  2018  	b.rgba = buf
  2019  	b.delta = 0
  2020  
  2021  	return b
  2022  }
  2023  
  2024  func convfn(dst *Image, dpar *drawParam, src *Image, spar *drawParam) _Readfn {
  2025  	if dst.Pix == src.Pix && src.Flags&Frepl == 0 {
  2026  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "readptr..."); */
  2027  		return readptr
  2028  	}
  2029  
  2030  	if dst.Pix == draw.CMAP8 && (src.Pix == draw.GREY1 || src.Pix == draw.GREY2 || src.Pix == draw.GREY4) {
  2031  		/* cheat because we know the replicated value is exactly the color map entry. */
  2032  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "Readnbit..."); */
  2033  		return readnbit
  2034  	}
  2035  
  2036  	spar.convreadcall = readfn(src)
  2037  	spar.convwritecall = writefn(dst)
  2038  	spar.convdpar = dpar
  2039  
  2040  	/* allocate a conversion buffer */
  2041  	spar.convbufoff = ndrawbuf
  2042  	ndrawbuf += spar.dx * 4
  2043  
  2044  	if spar.dx > spar.img.R.Dx() {
  2045  		spar.convdx = spar.dx
  2046  		spar.dx = spar.img.R.Dx()
  2047  	}
  2048  
  2049  	/*if(drawdebug) fmt.Fprintf(os.Stderr, "genconv..."); */
  2050  	return genconv
  2051  }
  2052  
  2053  /*
  2054   * Do NOT call this directly.  pixelbits is a wrapper
  2055   * around this that fetches the bits from the X server
  2056   * when necessary.
  2057   */
  2058  func _pixelbits(i *Image, pt draw.Point) uint32 {
  2059  	val := uint32(0)
  2060  	p := byteaddr(i, pt)
  2061  	bpp := i.Depth
  2062  	var off int
  2063  	var npack int
  2064  	switch bpp {
  2065  	case 1, 2, 4:
  2066  		npack = 8 / bpp
  2067  		off = pt.X % npack
  2068  		val = uint32(p[0]) >> (bpp * (npack - 1 - off))
  2069  		val &= (1 << bpp) - 1
  2070  	case 8:
  2071  		val = uint32(p[0])
  2072  	case 16:
  2073  		val = uint32(p[0]) | uint32(p[1])<<8
  2074  	case 24:
  2075  		val = uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16
  2076  	case 32:
  2077  		val = uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
  2078  	}
  2079  	for bpp < 32 {
  2080  		val |= val << bpp
  2081  		bpp *= 2
  2082  	}
  2083  	return val
  2084  }
  2085  
  2086  func boolcopyfn(img *Image, mask *Image) _Calcfn {
  2087  	if mask.Flags&Frepl != 0 && mask.R.Dx() == 1 && mask.R.Dy() == 1 && ^pixelbits(mask, mask.R.Min) == 0 {
  2088  		return boolmemmove
  2089  	}
  2090  
  2091  	switch img.Depth {
  2092  	case 8:
  2093  		return boolcopy8
  2094  	case 16:
  2095  		return boolcopy16
  2096  	case 24:
  2097  		return boolcopy24
  2098  	case 32:
  2099  		return boolcopy32
  2100  	default:
  2101  		panic("boolcopyfn")
  2102  	}
  2103  	return nil
  2104  }
  2105  
  2106  /*
  2107   * Optimized draw for filling and scrolling; uses memset and memmove.
  2108   */
  2109  func memsets(vp []byte, val uint16, n int) {
  2110  	p := uint8shorts(vp)[:n]
  2111  	for i := range p {
  2112  		p[i] = val
  2113  	}
  2114  }
  2115  
  2116  func memsetl(vp []byte, val uint32, n int) {
  2117  	p := uint8words(vp)[:n]
  2118  	for i := range p {
  2119  		p[i] = val
  2120  	}
  2121  }
  2122  
  2123  func memset24(vp []byte, val uint32, n int) {
  2124  	p := vp
  2125  	a := uint8(val)
  2126  	b := uint8(val >> 8)
  2127  	c := uint8(val >> 16)
  2128  	n *= 3
  2129  	for j := 0; j < n; j += 3 {
  2130  		p[j] = a
  2131  		p[j+1] = b
  2132  		p[j+2] = c
  2133  	}
  2134  }
  2135  
  2136  func _NBITS(c draw.Pix) uint { return uint(c & 15) }
  2137  func _TYPE(c draw.Pix) int   { return int((c >> 4) & 15) }
  2138  
  2139  func _imgtorgba(img *Image, val uint32) draw.Color {
  2140  	a := uint32(0xFF) /* garbage */
  2141  	b := uint32(0xAA)
  2142  	g := uint32(b)
  2143  	r := uint32(g)
  2144  	for chan_ := img.Pix; chan_ != 0; chan_ >>= 8 {
  2145  		nb := _NBITS(chan_)
  2146  		v := val & ((1 << nb) - 1)
  2147  		ov := v
  2148  		val >>= nb
  2149  
  2150  		for nb < 8 {
  2151  			v |= v << nb
  2152  			nb *= 2
  2153  		}
  2154  		v >>= (nb - 8)
  2155  
  2156  		switch _TYPE(chan_) {
  2157  		case draw.CRed:
  2158  			r = v
  2159  		case draw.CGreen:
  2160  			g = v
  2161  		case draw.CBlue:
  2162  			b = v
  2163  		case draw.CAlpha:
  2164  			a = v
  2165  		case draw.CGrey:
  2166  			b = v
  2167  			g = b
  2168  			r = g
  2169  		case draw.CMap:
  2170  			p := img.cmap.Cmap2rgb[3*ov:]
  2171  			r = uint32(p[0])
  2172  			g = uint32(p[1])
  2173  			b = uint32(p[2])
  2174  			if _DBG {
  2175  				fmt.Fprintf(os.Stderr, "%x -> %x %x %x\n", ov, r, g, b)
  2176  			}
  2177  		}
  2178  	}
  2179  	return draw.Color(r<<24 | g<<16 | b<<8 | a)
  2180  }
  2181  
  2182  func _rgbatoimg(img *Image, rgba draw.Color) uint32 {
  2183  	v := uint32(0)
  2184  	r := uint32(rgba>>24) & 0xFF
  2185  	g := uint32(rgba>>16) & 0xFF
  2186  	b := uint32(rgba>>8) & 0xFF
  2187  	a := uint32(rgba) & 0xFF
  2188  	d := uint(0)
  2189  	for chan_ := img.Pix; chan_ != 0; chan_ >>= 8 {
  2190  		nb := _NBITS(chan_)
  2191  		var m uint32
  2192  		switch _TYPE(chan_) {
  2193  		case draw.CRed:
  2194  			v |= (r >> (8 - nb)) << d
  2195  		case draw.CGreen:
  2196  			v |= (g >> (8 - nb)) << d
  2197  		case draw.CBlue:
  2198  			v |= (b >> (8 - nb)) << d
  2199  		case draw.CAlpha:
  2200  			v |= (a >> (8 - nb)) << d
  2201  		case draw.CMap:
  2202  			p := img.cmap.Rgb2cmap[:]
  2203  			m = uint32(p[(uint32(r)>>4)*256+(uint32(g)>>4)*16+(uint32(b)>>4)])
  2204  			if _DBG {
  2205  				fmt.Fprintf(os.Stderr, "%x %x %x -> %x\n", r, g, b, m)
  2206  			}
  2207  			v |= (m >> (8 - nb)) << d
  2208  		case draw.CGrey:
  2209  			m = uint32(_RGB2K(uint8(r), uint8(g), uint8(b)))
  2210  			v |= (m >> (8 - nb)) << d
  2211  		}
  2212  		d += nb
  2213  	}
  2214  	/*	fmt.Fprintf(os.Stderr, "rgba2img %.8x = %.*lux\n", rgba, 2*d/8, v); */
  2215  	return v
  2216  }
  2217  
  2218  // #define DBG 0
  2219  func memoptdraw(par *memDrawParam) int {
  2220  	dx := par.r.Dx()
  2221  	dy := par.r.Dy()
  2222  	src := par.src
  2223  	dst := par.dst
  2224  	op := par.op
  2225  
  2226  	if _DBG {
  2227  		fmt.Fprintf(os.Stderr, "state %x mval %x dd %d\n", par.state, par.mval, dst.Depth)
  2228  	}
  2229  	/*
  2230  	 * If we have an opaque mask and source is one opaque pixel we can convert to the
  2231  	 * destination format and just replicate with memset.
  2232  	 */
  2233  	m := uint32(_Simplesrc | _Simplemask | _Fullmask)
  2234  	if par.state&m == m && par.srgba&0xFF == 0xFF && (op == draw.S || op == draw.SoverD) {
  2235  		if _DBG {
  2236  			fmt.Fprintf(os.Stderr, "memopt, dst %p, dst->data->bdata %p\n", dst, dst.Data.Bdata)
  2237  		}
  2238  		dwid := int(dst.Width) * 4
  2239  		dp := byteaddr(dst, par.r.Min)
  2240  		v := par.sdval
  2241  		if _DBG {
  2242  			fmt.Fprintf(os.Stderr, "sdval %lud, depth %d\n", v, dst.Depth)
  2243  		}
  2244  		switch dst.Depth {
  2245  		case 1, 2, 4:
  2246  			for d := dst.Depth; d < 8; d *= 2 {
  2247  				v |= v << d
  2248  			}
  2249  			ppb := 8 / dst.Depth /* pixels per byte */
  2250  			m := ppb - 1
  2251  			/* left edge */
  2252  			np := par.r.Min.X & m /* no. pixels unused on left side of word */
  2253  			dx -= (ppb - np)
  2254  			nb := 8 - np*dst.Depth /* no. bits used on right side of word */
  2255  			lm := (uint8(1) << nb) - 1
  2256  			if _DBG {
  2257  				fmt.Fprintf(os.Stderr, "np %d x %d nb %d lm %x ppb %d m %x\n", np, par.r.Min.X, nb, lm, ppb, m)
  2258  			}
  2259  
  2260  			/* right edge */
  2261  			np = par.r.Max.X & m /* no. pixels used on left side of word */
  2262  			dx -= np
  2263  			nb = 8 - np*dst.Depth /* no. bits unused on right side of word */
  2264  			rm := ^((uint8(1) << nb) - 1)
  2265  			if _DBG {
  2266  				fmt.Fprintf(os.Stderr, "np %d x %d nb %d rm %x ppb %d m %x\n", np, par.r.Max.X, nb, rm, ppb, m)
  2267  			}
  2268  
  2269  			if _DBG {
  2270  				fmt.Fprintf(os.Stderr, "dx %d Dx %d\n", dx, par.r.Dx())
  2271  			}
  2272  			/* lm, rm are masks that are 1 where we should touch the bits */
  2273  			if dx < 0 { /* just one byte */
  2274  				lm &= rm
  2275  				for y := 0; y < dy; y++ {
  2276  					dp[0] ^= (uint8(v) ^ dp[0]) & lm
  2277  					dp = dp[dwid:]
  2278  				}
  2279  			} else if dx == 0 { /* no full bytes */
  2280  				if lm != 0 {
  2281  					dwid--
  2282  				}
  2283  				for y := 0; y < dy; y++ {
  2284  					if lm != 0 {
  2285  						if _DBG {
  2286  							fmt.Fprintf(os.Stderr, "dp %p v %x lm %x (v ^ *dp) & lm %x\n", dp, v, lm, (uint8(v)^dp[0])&lm)
  2287  						}
  2288  						dp[0] ^= (uint8(v) ^ dp[0]) & lm
  2289  						dp = dp[1:]
  2290  					}
  2291  					dp[0] ^= (uint8(v) ^ dp[0]) & rm
  2292  					dp = dp[dwid:]
  2293  				}
  2294  			} else { /* full bytes in middle */
  2295  				dx /= ppb
  2296  				if lm != 0 {
  2297  					dwid--
  2298  				}
  2299  				dwid -= dx
  2300  
  2301  				for y := 0; y < dy; y++ {
  2302  					if lm != 0 {
  2303  						dp[0] ^= (uint8(v) ^ dp[0]) & lm
  2304  						dp = dp[1:]
  2305  					}
  2306  					row := dp[:dx]
  2307  					for i := range row {
  2308  						row[i] = uint8(v)
  2309  					}
  2310  					dp = dp[dx:]
  2311  					dp[0] ^= (uint8(v) ^ dp[0]) & rm
  2312  					dp = dp[dwid:]
  2313  				}
  2314  			}
  2315  			return 1
  2316  		case 8:
  2317  			for y := 0; y < dy; y++ {
  2318  				row := dp[:dx]
  2319  				for i := range row {
  2320  					row[i] = uint8(v)
  2321  				}
  2322  				dp = dp[dwid:]
  2323  			}
  2324  			return 1
  2325  		case 16:
  2326  			var p [2]uint8
  2327  			p[0] = uint8(v) /* make little endian */
  2328  			p[1] = uint8(v >> 8)
  2329  			v := *(*uint16)(unsafe.Pointer(&p[0]))
  2330  			if _DBG {
  2331  				fmt.Fprintf(os.Stderr, "dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n", dp, dx, dy, dwid)
  2332  			}
  2333  			for y := 0; y < dy; y++ {
  2334  				memsets(dp, v, dx)
  2335  				dp = dp[dwid:]
  2336  			}
  2337  			return 1
  2338  		case 24:
  2339  			for y := 0; y < dy; y++ {
  2340  				memset24(dp, v, dx)
  2341  				dp = dp[dwid:]
  2342  			}
  2343  			return 1
  2344  		case 32:
  2345  			var p [4]uint8
  2346  			p[0] = uint8(v) /* make little endian */
  2347  			p[1] = uint8(v >> 8)
  2348  			p[2] = uint8(v >> 16)
  2349  			p[3] = uint8(v >> 24)
  2350  			v := *(*uint32)(unsafe.Pointer(&p[0]))
  2351  			for y := 0; y < dy; y++ {
  2352  				memsetl(dp[y*dwid:], v, dx)
  2353  			}
  2354  			return 1
  2355  		default:
  2356  			panic("bad dest depth in memoptdraw")
  2357  		}
  2358  	}
  2359  
  2360  	/*
  2361  	 * If no source alpha, an opaque mask, we can just copy the
  2362  	 * source onto the destination.  If the channels are the same and
  2363  	 * the source is not replicated, memmove suffices.
  2364  	 */
  2365  	m = _Simplemask | _Fullmask
  2366  	if par.state&(m|_Replsrc) == m && src.Depth >= 8 && src.Pix == dst.Pix && src.Flags&Falpha == 0 && (op == draw.S || op == draw.SoverD) {
  2367  		var dir int
  2368  		if src.Data == dst.Data && len(byteaddr(dst, par.r.Min)) < len(byteaddr(src, par.sr.Min)) {
  2369  			dir = -1
  2370  		} else {
  2371  			dir = 1
  2372  		}
  2373  
  2374  		swid := int(src.Width) * 4
  2375  		dwid := int(dst.Width) * 4
  2376  		sp := byteaddr(src, par.sr.Min)
  2377  		dp := byteaddr(dst, par.r.Min)
  2378  		nb := (dx * src.Depth) / 8
  2379  
  2380  		if dir == -1 {
  2381  			for y := dy - 1; y >= 0; y-- {
  2382  				copy(dp[dwid*y:dwid*y+nb], sp[swid*y:swid*y+nb])
  2383  			}
  2384  		} else {
  2385  			for y := 0; y < dy; y++ {
  2386  				copy(dp[dwid*y:dwid*y+nb], sp[swid*y:swid*y+nb])
  2387  			}
  2388  		}
  2389  		return 1
  2390  	}
  2391  
  2392  	/*
  2393  	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
  2394  	 * they're all bit aligned, we can just use bit operators.  This happens
  2395  	 * when we're manipulating boolean masks, e.g. in the arc code.
  2396  	 */
  2397  	if par.state&(_Simplemask|_Simplesrc|_Replmask|_Replsrc) == 0 && dst.Pix == draw.GREY1 && src.Pix == draw.GREY1 && par.mask.Pix == draw.GREY1 && par.r.Min.X&7 == par.sr.Min.X&7 && par.r.Min.X&7 == par.mr.Min.X&7 {
  2398  		sp := byteaddr(src, par.sr.Min)
  2399  		dp := byteaddr(dst, par.r.Min)
  2400  		mp := byteaddr(par.mask, par.mr.Min)
  2401  		swid := int(src.Width) * 4
  2402  		dwid := int(dst.Width) * 4
  2403  		mwid := int(par.mask.Width) * 4
  2404  		var dir int
  2405  
  2406  		if src.Data == dst.Data && len(byteaddr(dst, par.r.Min)) < len(byteaddr(src, par.sr.Min)) {
  2407  			dir = -1
  2408  		} else {
  2409  			dir = 1
  2410  		}
  2411  
  2412  		lm := uint8(0xFF) >> (par.r.Min.X & 7)
  2413  		rm := uint8(0xFF) << (8 - (par.r.Max.X & 7))
  2414  		dx -= (8 - (par.r.Min.X & 7)) + (par.r.Max.X & 7)
  2415  
  2416  		if dx < 0 { /* one byte wide */
  2417  			lm &= rm
  2418  			if dir == -1 {
  2419  				for y := dy - 1; y >= 0; y-- {
  2420  					dp[y*dwid] ^= (dp[y*dwid] ^ sp[y*swid]) & mp[y*mwid] & lm
  2421  				}
  2422  			} else {
  2423  				for y := 0; y < dy; y++ {
  2424  					dp[y*dwid] ^= (dp[y*dwid] ^ sp[y*swid]) & mp[y*mwid] & lm
  2425  				}
  2426  			}
  2427  			return 1
  2428  		}
  2429  
  2430  		dx /= 8
  2431  		if dir == 1 {
  2432  			for y := 0; y < dy; y++ {
  2433  				j := 0
  2434  				if lm != 0 {
  2435  					dp[y*dwid] ^= (dp[y*dwid] ^ sp[y*swid]) & mp[y*mwid] & lm
  2436  					j = 1
  2437  				}
  2438  				for x := 0; x < dx; x++ {
  2439  					dp[y*dwid+j+x] ^= (dp[y*dwid+j+x] ^ sp[y*swid+j+x]) & mp[y*mwid+j+x]
  2440  				}
  2441  				if rm != 0 {
  2442  					dp[y*dwid+j+dx] ^= (dp[y*dwid+j+dx] ^ sp[y*swid+j+dx]) & mp[y*mwid+j+dx] & rm
  2443  				}
  2444  			}
  2445  			return 1
  2446  		} else {
  2447  			/* dir == -1 */
  2448  			for y := dy - 1; y >= 0; y-- {
  2449  				j := 0
  2450  				if lm != 0 {
  2451  					j = 1
  2452  				}
  2453  				if rm != 0 {
  2454  					dp[y*dwid+j+dx] ^= (dp[y*dwid+j+dx] ^ sp[y*swid+j+dx]) & mp[y*mwid+j+dx] & rm
  2455  				}
  2456  				for x := dx - 1; x >= 0; x-- {
  2457  					dp[y*dwid+j+x] ^= (dp[y*dwid+j+x] ^ sp[y*swid+j+x]) & mp[y*mwid+j+x]
  2458  				}
  2459  				if lm != 0 {
  2460  					dp[y*dwid] ^= (dp[y*dwid] ^ sp[y*swid]) & mp[y*mwid] & lm
  2461  				}
  2462  			}
  2463  		}
  2464  		return 1
  2465  	}
  2466  	return 0
  2467  }
  2468  
  2469  // #undef DBG
  2470  
  2471  /*
  2472   * Boolean character drawing.
  2473   * Solid opaque color through a 1-bit greyscale mask.
  2474   */
  2475  // #define DBG 0
  2476  func chardraw(par *memDrawParam) int {
  2477  	// black box to hide pointer conversions from gcc.
  2478  	// we'll see how long this works.
  2479  
  2480  	if 0 != 0 {
  2481  		if drawdebug != 0 {
  2482  			fmt.Fprintf(os.Stderr, "chardraw? mf %x md %d sf %x dxs %d dys %d dd %d ddat %p sdat %p\n", par.mask.Flags, par.mask.Depth, par.src.Flags, par.src.R.Dx(), par.src.R.Dy(), par.dst.Depth, par.dst.Data, par.src.Data)
  2483  		}
  2484  	}
  2485  
  2486  	mask := par.mask
  2487  	src := par.src
  2488  	dst := par.dst
  2489  	r := par.r
  2490  	mr := par.mr
  2491  	op := par.op
  2492  
  2493  	if par.state&(_Replsrc|_Simplesrc|_Fullsrc|_Replmask) != _Replsrc|_Simplesrc|_Fullsrc || mask.Depth != 1 || dst.Depth < 8 || dst.Data == src.Data || op != draw.SoverD {
  2494  		return 0
  2495  	}
  2496  
  2497  	/*if(drawdebug) fmt.Fprintf(os.Stderr, "chardraw..."); */
  2498  
  2499  	depth := mask.Depth
  2500  	maskwid := int(mask.Width) * 4
  2501  	rp := byteaddr(mask, mr.Min)
  2502  	npack := 8 / depth
  2503  	bsh := (mr.Min.X % npack) * depth
  2504  
  2505  	wp := byteaddr(dst, r.Min)
  2506  	dstwid := int(dst.Width) * 4
  2507  	if _DBG {
  2508  		fmt.Fprintf(os.Stderr, "bsh %d\n", bsh)
  2509  	}
  2510  	dy := r.Dy()
  2511  	dx := r.Dx()
  2512  
  2513  	ddepth := dst.Depth
  2514  
  2515  	/*
  2516  	 * for loop counts from bsh to bsh+dx
  2517  	 *
  2518  	 * we want the bottom bits to be the amount
  2519  	 * to shift the pixels down, so for n≡0 (mod 8) we want
  2520  	 * bottom bits 7.  for n≡1, 6, etc.
  2521  	 * the bits come from -n-1.
  2522  	 */
  2523  
  2524  	bx := -bsh - 1
  2525  	ex := -bsh - 1 - dx
  2526  	v := par.sdval
  2527  
  2528  	/* make little endian */
  2529  	var sp [4]uint8
  2530  	sp[0] = uint8(v)
  2531  	sp[1] = uint8(v >> 8)
  2532  	sp[2] = uint8(v >> 16)
  2533  	sp[3] = uint8(v >> 24)
  2534  
  2535  	/*fmt.Fprintf(os.Stderr, "sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]); */
  2536  	for y := 0; y < dy; {
  2537  		q := rp
  2538  		var bits uint32
  2539  		if bsh != 0 {
  2540  			bits = uint32(q[0])
  2541  			q = q[1:]
  2542  		}
  2543  
  2544  		switch ddepth {
  2545  		/*if(drawdebug) fmt.Fprintf(os.Stderr, "8loop..."); */
  2546  		case 8:
  2547  			wc := wp
  2548  			for x := bx; x > ex; x-- {
  2549  				i := x & 7
  2550  				if i == 8-1 {
  2551  					bits = uint32(q[0])
  2552  					q = q[1:]
  2553  				}
  2554  				if _DBG {
  2555  					fmt.Fprintf(os.Stderr, "bits %x sh %d...", bits, i)
  2556  				}
  2557  				if (bits>>i)&1 != 0 {
  2558  					wc[0] = uint8(v)
  2559  				}
  2560  				wc = wc[1:]
  2561  			}
  2562  		case 16:
  2563  			ws := uint8shorts(wp)
  2564  			v := *(*uint16)(unsafe.Pointer(&sp[0]))
  2565  			for x := bx; x > ex; x-- {
  2566  				i := x & 7
  2567  				if i == 8-1 {
  2568  					bits = uint32(q[0])
  2569  					q = q[1:]
  2570  				}
  2571  				if _DBG {
  2572  					fmt.Fprintf(os.Stderr, "bits %x sh %d...", bits, i)
  2573  				}
  2574  				if (bits>>i)&1 != 0 {
  2575  					ws[0] = v
  2576  				}
  2577  				ws = ws[1:]
  2578  			}
  2579  		case 24:
  2580  			wc := wp
  2581  			for x := bx; x > ex; x-- {
  2582  				i := x & 7
  2583  				if i == 8-1 {
  2584  					bits = uint32(q[0])
  2585  					q = q[1:]
  2586  				}
  2587  				if _DBG {
  2588  					fmt.Fprintf(os.Stderr, "bits %x sh %d...", bits, i)
  2589  				}
  2590  				if (bits>>i)&1 != 0 {
  2591  					wc[0] = sp[0]
  2592  					wc[1] = sp[1]
  2593  					wc[2] = sp[2]
  2594  				}
  2595  				wc = wc[3:]
  2596  			}
  2597  		case 32:
  2598  			wl := uint8words(wp)
  2599  			v := *(*uint32)(unsafe.Pointer(&sp[0]))
  2600  			for x := bx; x > ex; x-- {
  2601  				i := x & 7
  2602  				if i == 8-1 {
  2603  					bits = uint32(q[0])
  2604  					q = q[1:]
  2605  				}
  2606  				if _DBG {
  2607  					fmt.Fprintf(os.Stderr, "bits %x sh %d...", bits, i)
  2608  				}
  2609  				if (bits>>i)&1 != 0 {
  2610  					wl[0] = v
  2611  				}
  2612  				wl = wl[1:]
  2613  			}
  2614  		}
  2615  		if y++; y >= dy {
  2616  			break
  2617  		}
  2618  		rp = rp[maskwid:]
  2619  		wp = wp[dstwid:]
  2620  	}
  2621  
  2622  	if _DBG {
  2623  		fmt.Fprintf(os.Stderr, "\n")
  2624  	}
  2625  	return 1
  2626  }
  2627  
  2628  // #undef DBG
  2629  
  2630  /*
  2631   * Fill entire byte with replicated (if necessary) copy of source pixel,
  2632   * assuming destination ldepth is >= source ldepth.
  2633   *
  2634   * This code is just plain wrong for >8bpp.
  2635   *
  2636  u32int
  2637  membyteval(Memimage *src)
  2638  {
  2639  	int i, val, bpp;
  2640  	uchar uc;
  2641  
  2642  	unloadmemimage(src, src->r, &uc, 1);
  2643  	bpp = src->depth;
  2644  	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
  2645  	uc &= ~(0xFF>>bpp);
  2646  	* pixel value is now in high part of byte. repeat throughout byte
  2647  	val = uc;
  2648  	for(i=bpp; i<8; i<<=1)
  2649  		val |= val>>i;
  2650  	return val;
  2651  }
  2652   *
  2653  */
  2654  
  2655  func _memfillcolor(i *Image, val draw.Color) {
  2656  	if val == draw.NoFill {
  2657  		return
  2658  	}
  2659  
  2660  	bits := _rgbatoimg(i, val)
  2661  	switch i.Depth {
  2662  	case 24: /* 24-bit images suck */
  2663  		for y := i.R.Min.Y; y < i.R.Max.Y; y++ {
  2664  			memset24(byteaddr(i, draw.Pt(i.R.Min.X, y)), bits, i.R.Dx())
  2665  		}
  2666  	default: /* 1, 2, 4, 8, 16, 32 */
  2667  		for d := i.Depth; d < 32; d *= 2 {
  2668  			bits = bits<<d | bits
  2669  		}
  2670  		var p [4]uint8
  2671  		p[0] = uint8(bits) /* make little endian */
  2672  		p[1] = uint8(bits >> 8)
  2673  		p[2] = uint8(bits >> 16)
  2674  		p[3] = uint8(bits >> 24)
  2675  		bits := *(*uint32)(unsafe.Pointer(&p[0]))
  2676  		memsetl(byteaddr(i, i.R.Min), bits, int(i.Width)*i.R.Dy())
  2677  	}
  2678  }