github.com/gop9/olt@v0.0.0-20200202132135-d956aad50b08/framework/drawfillover_avx.go (about)

     1  //+build amd64,go1.10
     2  
     3  package framework
     4  
     5  import (
     6  	"fmt"
     7  	"image"
     8  )
     9  
    10  func drawFillOver_SIMD_internal(base *uint8, i0, i1 int, stride, n int, adivm, sr, sg, sb, sa uint32)
    11  func getCPUID1() (edx, ecx uint32)
    12  func getCPUID70() (ebx, ecx uint32)
    13  
    14  const debugUseSIMD = false
    15  
    16  var useSIMD = func() bool {
    17  	dbgnosimd := func(reason string) {
    18  		if !debugUseSIMD {
    19  			return
    20  		}
    21  		fmt.Printf("can not use SIMD, %s\n", reason)
    22  	}
    23  	if debugUseSIMD {
    24  		fmt.Printf("useSIMD check\n")
    25  	}
    26  	edx, ecx := getCPUID1()
    27  	if debugUseSIMD {
    28  		fmt.Printf("EAX = 0x01 -> EDX = %#04x ECX = %#04x\n", edx, ecx)
    29  	}
    30  	if edx&(1<<25) == 0 {
    31  		dbgnosimd("no SSE")
    32  		return false
    33  	}
    34  	if edx&(1<<26) == 0 {
    35  		dbgnosimd("no SSE2")
    36  		return false
    37  	}
    38  	if ecx&(1<<28) == 0 {
    39  		dbgnosimd("no AVX1")
    40  		return false
    41  	}
    42  
    43  	ebx, ecx := getCPUID70()
    44  	if debugUseSIMD {
    45  		fmt.Printf("EAX = 0x07 ECX = 0x00 -> EBX = %#04x ECX = %#04x\n", ebx, ecx)
    46  	}
    47  
    48  	if ebx&(1<<5) == 0 {
    49  		dbgnosimd("no AVX2")
    50  		return false
    51  	}
    52  
    53  	if debugUseSIMD {
    54  		fmt.Printf("can use SIMD for drawFillOver\n")
    55  	}
    56  
    57  	return true
    58  }()
    59  
    60  func drawFillOver(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) {
    61  	const m = 1<<16 - 1
    62  	a := (m - sa) * 0x101
    63  
    64  	if useSIMD {
    65  		adivm := a / m
    66  		i0 := dst.PixOffset(r.Min.X, r.Min.Y)
    67  		i1 := i0 + r.Dx()*4
    68  		drawFillOver_SIMD_internal(&dst.Pix[0], i0, i1, dst.Stride, r.Max.Y-r.Min.Y, adivm, sr, sg, sb, sa)
    69  		return
    70  	}
    71  
    72  	i0 := dst.PixOffset(r.Min.X, r.Min.Y)
    73  	i1 := i0 + r.Dx()*4
    74  	for y := r.Min.Y; y != r.Max.Y; y++ {
    75  		for i := i0; i < i1; i += 4 {
    76  			dr := &dst.Pix[i+0]
    77  			dg := &dst.Pix[i+1]
    78  			db := &dst.Pix[i+2]
    79  			da := &dst.Pix[i+3]
    80  
    81  			*dr = uint8((uint32(*dr)*a/m + sr) >> 8)
    82  			*dg = uint8((uint32(*dg)*a/m + sg) >> 8)
    83  			*db = uint8((uint32(*db)*a/m + sb) >> 8)
    84  			*da = uint8((uint32(*da)*a/m + sa) >> 8)
    85  		}
    86  		i0 += dst.Stride
    87  		i1 += dst.Stride
    88  	}
    89  }