github.com/dgraph-io/simdjson-go@v0.3.0/flatten_bits_amd64.s (about) 1 //+build !noasm !appengine gc 2 3 #define MASK AX 4 #define INDEX BX 5 #define ZEROS CX 6 #define CARRIED DX 7 #define SHIFTS R8 8 #define POSITION R10 9 10 TEXT ·_flatten_bits_incremental(SB), $0-40 11 12 MOVQ base_ptr+0(FP), DI 13 MOVQ pbase+8(FP), SI 14 MOVQ mask+16(FP), MASK 15 MOVQ carried+24(FP), R11 16 MOVQ position+32(FP), R12 17 MOVQ (SI), INDEX 18 MOVQ (R11), CARRIED 19 MOVQ (R12), POSITION 20 CALL ·__flatten_bits_incremental(SB) 21 MOVQ POSITION, (R12) 22 MOVQ CARRIED, (R11) 23 MOVQ INDEX, (SI) 24 RET 25 26 TEXT ·__flatten_bits_incremental(SB), $0 27 XORQ SHIFTS, SHIFTS 28 29 // First iteration takes CARRIED into account 30 TZCNTQ MASK, ZEROS 31 JCS done // carry is set if ZEROS == 64 32 33 // Two shifts required because maximum combined shift (63+1) exceeds 6-bits 34 SHRQ $1, MASK 35 SHRQ ZEROS, MASK 36 INCQ ZEROS 37 ADDQ ZEROS, SHIFTS 38 ADDQ CARRIED, ZEROS 39 MOVL ZEROS, (DI)(INDEX*4) 40 ADDQ $1, INDEX 41 ADDQ ZEROS, POSITION 42 XORQ CARRIED, CARRIED // Reset CARRIED to 0 (since it has been used) 43 44 loop: 45 TZCNTQ MASK, ZEROS 46 JCS done // carry is set if ZEROS == 64 47 48 INCQ ZEROS 49 SHRQ ZEROS, MASK 50 ADDQ ZEROS, SHIFTS 51 MOVL ZEROS, (DI)(INDEX*4) 52 ADDQ $1, INDEX 53 ADDQ ZEROS, POSITION 54 JMP loop 55 56 done: 57 MOVQ $64, R9 58 SUBQ SHIFTS, R9 59 ADDQ R9, CARRIED // CARRIED += 64 - shifts (remaining empty bits to carry over to next call) 60 RET