github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/hash/crc32/crc32_amd64p32.s (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func castagnoliSSE42(crc uint32, p []byte) uint32
     8  TEXT ·castagnoliSSE42(SB),NOSPLIT,$0
     9  	MOVL crc+0(FP), AX  // CRC value
    10  	MOVL p+4(FP), SI  // data pointer
    11  	MOVL p_len+8(FP), CX  // len(p)
    12  
    13  	NOTL AX
    14  
    15  	/* If there's less than 8 bytes to process, we do it byte-by-byte. */
    16  	CMPQ CX, $8
    17  	JL cleanup
    18  
    19  	/* Process individual bytes until the input is 8-byte aligned. */
    20  startup:
    21  	MOVQ SI, BX
    22  	ANDQ $7, BX
    23  	JZ aligned
    24  
    25  	CRC32B (SI), AX
    26  	DECQ CX
    27  	INCQ SI
    28  	JMP startup
    29  
    30  aligned:
    31  	/* The input is now 8-byte aligned and we can process 8-byte chunks. */
    32  	CMPQ CX, $8
    33  	JL cleanup
    34  
    35  	CRC32Q (SI), AX
    36  	ADDQ $8, SI
    37  	SUBQ $8, CX
    38  	JMP aligned
    39  
    40  cleanup:
    41  	/* We may have some bytes left over that we process one at a time. */
    42  	CMPQ CX, $0
    43  	JE done
    44  
    45  	CRC32B (SI), AX
    46  	INCQ SI
    47  	DECQ CX
    48  	JMP cleanup
    49  
    50  done:
    51  	NOTL AX
    52  	MOVL AX, ret+16(FP)
    53  	RET
    54  
    55  // func haveSSE42() bool
    56  TEXT ·haveSSE42(SB),NOSPLIT,$0
    57  	XORQ AX, AX
    58  	INCL AX
    59  	CPUID
    60  	SHRQ $20, CX
    61  	ANDQ $1, CX
    62  	MOVB CX, ret+0(FP)
    63  	RET
    64