github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/roaring/popcnt_amd64.s (about) 1 // +build amd64,!appengine,!go1.9 2 3 TEXT ·hasAsm(SB),4,$0-1 4 MOVQ $1, AX 5 CPUID 6 SHRQ $23, CX 7 ANDQ $1, CX 8 MOVB CX, ret+0(FP) 9 RET 10 11 #define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 12 13 TEXT ·popcntSliceAsm(SB),4,$0-32 14 XORQ AX, AX 15 MOVQ s+0(FP), SI 16 MOVQ s_len+8(FP), CX 17 TESTQ CX, CX 18 JZ popcntSliceEnd 19 popcntSliceLoop: 20 BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX 21 ADDQ DX, AX 22 ADDQ $8, SI 23 LOOP popcntSliceLoop 24 popcntSliceEnd: 25 MOVQ AX, ret+24(FP) 26 RET 27 28 TEXT ·popcntMaskSliceAsm(SB),4,$0-56 29 XORQ AX, AX 30 MOVQ s+0(FP), SI 31 MOVQ s_len+8(FP), CX 32 TESTQ CX, CX 33 JZ popcntMaskSliceEnd 34 MOVQ m+24(FP), DI 35 popcntMaskSliceLoop: 36 MOVQ (DI), DX 37 NOTQ DX 38 ANDQ (SI), DX 39 POPCNTQ_DX_DX 40 ADDQ DX, AX 41 ADDQ $8, SI 42 ADDQ $8, DI 43 LOOP popcntMaskSliceLoop 44 popcntMaskSliceEnd: 45 MOVQ AX, ret+48(FP) 46 RET 47 48 TEXT ·popcntAndSliceAsm(SB),4,$0-56 49 XORQ AX, AX 50 MOVQ s+0(FP), SI 51 MOVQ s_len+8(FP), CX 52 TESTQ CX, CX 53 JZ popcntAndSliceEnd 54 MOVQ m+24(FP), DI 55 popcntAndSliceLoop: 56 MOVQ (DI), DX 57 ANDQ (SI), DX 58 POPCNTQ_DX_DX 59 ADDQ DX, AX 60 ADDQ $8, SI 61 ADDQ $8, DI 62 LOOP popcntAndSliceLoop 63 popcntAndSliceEnd: 64 MOVQ AX, ret+48(FP) 65 RET 66 67 TEXT ·popcntOrSliceAsm(SB),4,$0-56 68 XORQ AX, AX 69 MOVQ s+0(FP), SI 70 MOVQ s_len+8(FP), CX 71 TESTQ CX, CX 72 JZ popcntOrSliceEnd 73 MOVQ m+24(FP), DI 74 popcntOrSliceLoop: 75 MOVQ (DI), DX 76 ORQ (SI), DX 77 POPCNTQ_DX_DX 78 ADDQ DX, AX 79 ADDQ $8, SI 80 ADDQ $8, DI 81 LOOP popcntOrSliceLoop 82 popcntOrSliceEnd: 83 MOVQ AX, ret+48(FP) 84 RET 85 86 TEXT ·popcntXorSliceAsm(SB),4,$0-56 87 XORQ AX, AX 88 MOVQ s+0(FP), SI 89 MOVQ s_len+8(FP), CX 90 TESTQ CX, CX 91 JZ popcntXorSliceEnd 92 MOVQ m+24(FP), DI 93 popcntXorSliceLoop: 94 MOVQ (DI), DX 95 XORQ (SI), DX 96 POPCNTQ_DX_DX 97 ADDQ DX, AX 98 ADDQ $8, SI 99 ADDQ $8, DI 100 LOOP popcntXorSliceLoop 101 popcntXorSliceEnd: 102 MOVQ AX, ret+48(FP) 103 RET