github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/crc32/crc32_amd64p32.s (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build gc
     6  
     7  #define NOSPLIT 4
     8  #define RODATA 8
     9  
    10  // func castagnoliSSE42(crc uint32, p []byte) uint32
    11  TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
    12  	MOVL crc+0(FP), AX   // CRC value
    13  	MOVL p+4(FP), SI     // data pointer
    14  	MOVL p_len+8(FP), CX // len(p)
    15  
    16  	NOTL AX
    17  
    18  	// If there's less than 8 bytes to process, we do it byte-by-byte.
    19  	CMPQ CX, $8
    20  	JL   cleanup
    21  
    22  	// Process individual bytes until the input is 8-byte aligned.
    23  startup:
    24  	MOVQ SI, BX
    25  	ANDQ $7, BX
    26  	JZ   aligned
    27  
    28  	CRC32B (SI), AX
    29  	DECQ   CX
    30  	INCQ   SI
    31  	JMP    startup
    32  
    33  aligned:
    34  	// The input is now 8-byte aligned and we can process 8-byte chunks.
    35  	CMPQ CX, $8
    36  	JL   cleanup
    37  
    38  	CRC32Q (SI), AX
    39  	ADDQ   $8, SI
    40  	SUBQ   $8, CX
    41  	JMP    aligned
    42  
    43  cleanup:
    44  	// We may have some bytes left over that we process one at a time.
    45  	CMPQ CX, $0
    46  	JE   done
    47  
    48  	CRC32B (SI), AX
    49  	INCQ   SI
    50  	DECQ   CX
    51  	JMP    cleanup
    52  
    53  done:
    54  	NOTL AX
    55  	MOVL AX, ret+16(FP)
    56  	RET
    57  
    58  // func haveSSE42() bool
    59  TEXT ·haveSSE42(SB), NOSPLIT, $0
    60  	XORQ AX, AX
    61  	INCL AX
    62  	CPUID
    63  	SHRQ $20, CX
    64  	ANDQ $1, CX
    65  	MOVB CX, ret+0(FP)
    66  	RET
    67