github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/compress/flate/crc32_amd64.s (about)

     1  //+build !noasm !appengine
     2  
     3  // Copyright 2015, Klaus Post, see LICENSE for details.
     4  
     5  // func crc32sse(a []byte) hash
     6  TEXT ·crc32sse(SB), 7, $0
     7  	MOVQ a+0(FP), R10
     8  	XORQ BX, BX
     9  
    10  	// CRC32   dword (R10), EBX
    11  	BYTE $0xF2; BYTE $0x41; BYTE $0x0f
    12  	BYTE $0x38; BYTE $0xf1; BYTE $0x1a
    13  
    14  	MOVL BX, ret+24(FP)
    15  	RET
    16  
    17  // func crc32sseAll(a []byte, dst []hash)
    18  TEXT ·crc32sseAll(SB), 7, $0
    19  	MOVQ  a+0(FP), R8      // R8: src
    20  	MOVQ  a_len+8(FP), R10 // input length
    21  	MOVQ  dst+24(FP), R9   // R9: dst
    22  	SUBQ  $4, R10
    23  	JS    end
    24  	JZ    one_crc
    25  	MOVQ  R10, R13
    26  	SHRQ  $2, R10          // len/4
    27  	ANDQ  $3, R13          // len&3
    28  	XORQ  BX, BX
    29  	ADDQ  $1, R13
    30  	TESTQ R10, R10
    31  	JZ    rem_loop
    32  
    33  crc_loop:
    34  	MOVQ (R8), R11
    35  	XORQ BX, BX
    36  	XORQ DX, DX
    37  	XORQ DI, DI
    38  	MOVQ R11, R12
    39  	SHRQ $8, R11
    40  	MOVQ R12, AX
    41  	MOVQ R11, CX
    42  	SHRQ $16, R12
    43  	SHRQ $16, R11
    44  	MOVQ R12, SI
    45  
    46  	// CRC32   EAX, EBX
    47  	BYTE $0xF2; BYTE $0x0f
    48  	BYTE $0x38; BYTE $0xf1; BYTE $0xd8
    49  
    50  	// CRC32   ECX, EDX
    51  	BYTE $0xF2; BYTE $0x0f
    52  	BYTE $0x38; BYTE $0xf1; BYTE $0xd1
    53  
    54  	// CRC32   ESI, EDI
    55  	BYTE $0xF2; BYTE $0x0f
    56  	BYTE $0x38; BYTE $0xf1; BYTE $0xfe
    57  	MOVL BX, (R9)
    58  	MOVL DX, 4(R9)
    59  	MOVL DI, 8(R9)
    60  
    61  	XORQ BX, BX
    62  	MOVL R11, AX
    63  
    64  	// CRC32   EAX, EBX
    65  	BYTE $0xF2; BYTE $0x0f
    66  	BYTE $0x38; BYTE $0xf1; BYTE $0xd8
    67  	MOVL BX, 12(R9)
    68  
    69  	ADDQ $16, R9
    70  	ADDQ $4, R8
    71  	XORQ BX, BX
    72  	SUBQ $1, R10
    73  	JNZ  crc_loop
    74  
    75  rem_loop:
    76  	MOVL (R8), AX
    77  
    78  	// CRC32   EAX, EBX
    79  	BYTE $0xF2; BYTE $0x0f
    80  	BYTE $0x38; BYTE $0xf1; BYTE $0xd8
    81  
    82  	MOVL BX, (R9)
    83  	ADDQ $4, R9
    84  	ADDQ $1, R8
    85  	XORQ BX, BX
    86  	SUBQ $1, R13
    87  	JNZ  rem_loop
    88  
    89  end:
    90  	RET
    91  
    92  one_crc:
    93  	MOVQ $1, R13
    94  	XORQ BX, BX
    95  	JMP  rem_loop
    96  
    97  // func matchLenSSE4(a, b []byte, max int) int
    98  TEXT ·matchLenSSE4(SB), 7, $0
    99  	MOVQ  a+0(FP), SI        // RSI: &a
   100  	MOVQ  b+24(FP), DI       // RDI: &b
   101  	MOVQ  max+48(FP), R10    // R10: max
   102  	XORQ  R11, R11           // R11: match length
   103  	MOVQ  R10, R12           // R12: Remainder
   104  	SHRQ  $4, R10            // max / 16
   105  	MOVQ  $16, AX            // Set length for PCMPESTRI
   106  	MOVQ  $16, DX            // Set length for PCMPESTRI
   107  	ANDQ  $15, R12           // max & 15
   108  	TESTQ R10, R10
   109  	JZ    matchlen_verysmall
   110  
   111  loopback_matchlen:
   112  	MOVOU (SI), X0 // a[x]
   113  	MOVOU (DI), X1 // b[x]
   114  
   115  	// PCMPESTRI $0x18, X1, X0
   116  	// 0x18 = _SIDD_UBYTE_OPS (0x0) | _SIDD_CMP_EQUAL_EACH (0x8) | _SIDD_NEGATIVE_POLARITY (0x10)
   117  	BYTE $0x66; BYTE $0x0f; BYTE $0x3a
   118  	BYTE $0x61; BYTE $0xc1; BYTE $0x18
   119  
   120  	JC match_ended
   121  
   122  	ADDQ $16, SI
   123  	ADDQ $16, DI
   124  	ADDQ $16, R11
   125  
   126  	SUBQ $1, R10
   127  	JNZ  loopback_matchlen
   128  
   129  	// Check the remainder using REP CMPSB
   130  matchlen_verysmall:
   131  	TESTQ R12, R12
   132  	JZ    done_matchlen
   133  	MOVQ  R12, CX
   134  	ADDQ  R12, R11
   135  
   136  	// Compare CX bytes at [SI] [DI]
   137  	// Subtract one from CX for every match.
   138  	// Terminates when CX is zero (checked pre-compare)
   139  	CLD
   140  	REP; CMPSB
   141  
   142  	// Check if last was a match.
   143  	JZ done_matchlen
   144  
   145  	// Subtract remanding bytes.
   146  	SUBQ CX, R11
   147  	SUBQ $1, R11
   148  	MOVQ R11, ret+56(FP)
   149  	RET
   150  
   151  match_ended:
   152  	ADDQ CX, R11
   153  
   154  done_matchlen:
   155  	MOVQ R11, ret+56(FP)
   156  	RET
   157  
   158  // func histogram(b []byte, h []int32)
   159  TEXT ·histogram(SB), 7, $0
   160  	MOVQ b+0(FP), SI     // SI: &b
   161  	MOVQ b_len+8(FP), R9 // R9: len(b)
   162  	MOVQ h+24(FP), DI    // DI: Histogram
   163  	MOVQ R9, R8
   164  	SHRQ $3, R8
   165  	JZ   hist1
   166  	XORQ R11, R11
   167  
   168  loop_hist8:
   169  	MOVQ (SI), R10
   170  
   171  	MOVB R10, R11
   172  	INCL (DI)(R11*4)
   173  	SHRQ $8, R10
   174  
   175  	MOVB R10, R11
   176  	INCL (DI)(R11*4)
   177  	SHRQ $8, R10
   178  
   179  	MOVB R10, R11
   180  	INCL (DI)(R11*4)
   181  	SHRQ $8, R10
   182  
   183  	MOVB R10, R11
   184  	INCL (DI)(R11*4)
   185  	SHRQ $8, R10
   186  
   187  	MOVB R10, R11
   188  	INCL (DI)(R11*4)
   189  	SHRQ $8, R10
   190  
   191  	MOVB R10, R11
   192  	INCL (DI)(R11*4)
   193  	SHRQ $8, R10
   194  
   195  	MOVB R10, R11
   196  	INCL (DI)(R11*4)
   197  	SHRQ $8, R10
   198  
   199  	INCL (DI)(R10*4)
   200  
   201  	ADDQ $8, SI
   202  	DECQ R8
   203  	JNZ  loop_hist8
   204  
   205  hist1:
   206  	ANDQ $7, R9
   207  	JZ   end_hist
   208  	XORQ R10, R10
   209  
   210  loop_hist1:
   211  	MOVB (SI), R10
   212  	INCL (DI)(R10*4)
   213  	INCQ SI
   214  	DECQ R9
   215  	JNZ  loop_hist1
   216  
   217  end_hist:
   218  	RET