github.com/outcaste-io/ristretto@v0.2.3/z/simd/asm2.go (about)

     1  // +build ignore
     2  
     3  package main
     4  
     5  import (
     6  	. "github.com/mmcloughlin/avo/build"
     7  	. "github.com/mmcloughlin/avo/operand"
     8  )
     9  
    10  //go:generate go run asm2.go -out search_amd64.s -stubs stub_search_amd64.go
    11  
    12  func main() {
    13  	TEXT("Search", NOSPLIT, "func(xs []uint64, k uint64) int16")
    14  	Doc("Search finds the first idx for which xs[idx] >= k in xs.")
    15  	ptr := Load(Param("xs").Base(), GP64())
    16  	n := Load(Param("xs").Len(), GP64())
    17  	key := Load(Param("k"), GP64())
    18  	retInd := ReturnIndex(0)
    19  	retVal, err := retInd.Resolve()
    20  	if err != nil {
    21  		panic(err)
    22  	}
    23  
    24  	Comment("Save n")
    25  	n2 := GP64()
    26  	MOVQ(n, n2)
    27  
    28  	Comment("Initialize idx register to zero.")
    29  	idx := GP64()
    30  	XORL(idx.As32(), idx.As32())
    31  
    32  	Label("loop")
    33  	m := Mem{Base: ptr, Index: idx, Scale: 8}
    34  
    35  	Comment("Unroll1")
    36  	CMPQ(m, key)
    37  	JAE(LabelRef("Found"))
    38  
    39  	Comment("Unroll2")
    40  	CMPQ(m.Offset(16), key)
    41  	JAE(LabelRef("Found2"))
    42  
    43  	Comment("Unroll3")
    44  	CMPQ(m.Offset(32), key)
    45  	JAE(LabelRef("Found3"))
    46  
    47  	Comment("Unroll4")
    48  	CMPQ(m.Offset(48), key)
    49  	JAE(LabelRef("Found4"))
    50  
    51  	Comment("plus8")
    52  	ADDQ(Imm(8), idx)
    53  	CMPQ(idx, n)
    54  	JB(LabelRef("loop"))
    55  	JMP(LabelRef("NotFound"))
    56  
    57  	Label("Found2")
    58  	ADDL(Imm(2), idx.As32())
    59  	JMP(LabelRef("Found"))
    60  
    61  	Label("Found3")
    62  	ADDL(Imm(4), idx.As32())
    63  	JMP(LabelRef("Found"))
    64  
    65  	Label("Found4")
    66  	ADDL(Imm(6), idx.As32())
    67  
    68  	Label("Found")
    69  	MOVL(idx.As32(), n2.As32()) // n2 is no longer being used
    70  
    71  	Label("NotFound")
    72  	MOVL(n2.As32(), idx.As32())
    73  	SHRL(Imm(31), idx.As32())
    74  	ADDL(n2.As32(), idx.As32())
    75  	SHRL(Imm(1), idx.As32())
    76  	MOVL(idx.As32(), retVal.Addr)
    77  	RET()
    78  
    79  	Generate()
    80  }