github.com/dgraph-io/simdjson-go@v0.3.0/flatten_bits_amd64.s (about)

     1  //+build !noasm !appengine gc
     2  
     3  #define MASK    AX
     4  #define INDEX   BX
     5  #define ZEROS   CX
     6  #define CARRIED DX
     7  #define SHIFTS  R8
     8  #define POSITION R10
     9  
    10  TEXT ·_flatten_bits_incremental(SB), $0-40
    11  
    12  	MOVQ base_ptr+0(FP), DI
    13  	MOVQ pbase+8(FP), SI
    14  	MOVQ mask+16(FP), MASK
    15  	MOVQ carried+24(FP), R11
    16  	MOVQ position+32(FP), R12
    17  	MOVQ (SI), INDEX
    18  	MOVQ (R11), CARRIED
    19  	MOVQ (R12), POSITION
    20  	CALL ·__flatten_bits_incremental(SB)
    21  	MOVQ POSITION, (R12)
    22  	MOVQ CARRIED, (R11)
    23  	MOVQ INDEX, (SI)
    24  	RET
    25  
    26  TEXT ·__flatten_bits_incremental(SB), $0
    27  	XORQ SHIFTS, SHIFTS
    28  
    29  	// First iteration takes CARRIED into account
    30  	TZCNTQ MASK, ZEROS
    31  	JCS    done        // carry is set if ZEROS == 64
    32  
    33  	// Two shifts required because maximum combined shift (63+1) exceeds 6-bits
    34  	SHRQ $1, MASK
    35  	SHRQ ZEROS, MASK
    36  	INCQ ZEROS
    37  	ADDQ ZEROS, SHIFTS
    38  	ADDQ CARRIED, ZEROS
    39  	MOVL ZEROS, (DI)(INDEX*4)
    40  	ADDQ $1, INDEX
    41  	ADDQ ZEROS, POSITION
    42  	XORQ CARRIED, CARRIED     // Reset CARRIED to 0 (since it has been used)
    43  
    44  loop:
    45  	TZCNTQ MASK, ZEROS
    46  	JCS    done        // carry is set if ZEROS == 64
    47  
    48  	INCQ ZEROS
    49  	SHRQ ZEROS, MASK
    50  	ADDQ ZEROS, SHIFTS
    51  	MOVL ZEROS, (DI)(INDEX*4)
    52  	ADDQ $1, INDEX
    53  	ADDQ ZEROS, POSITION
    54  	JMP  loop
    55  
    56  done:
    57  	MOVQ $64, R9
    58  	SUBQ SHIFTS, R9
    59  	ADDQ R9, CARRIED // CARRIED += 64 - shifts (remaining empty bits to carry over to next call)
    60  	RET