github.com/piotrnar/gocoin@v0.0.0-20240512203912-faa0448c5e96/lib/others/siphash/hash128_amd64.s (about)

     1  // +build amd64,!appengine,!gccgo
     2  
     3  // This is a translation of the gcc output of FloodyBerry's pure-C public
     4  // domain siphash implementation at https://github.com/floodyberry/siphash
     5  
     6  // This assembly code has been modified from the 64-bit output to the experiment 128-bit output.
     7  
     8  // SI = v0
     9  // AX = v1
    10  // CX = v2
    11  // DX = v3
    12  
    13  // func Hash128(k0, k1 uint64, b []byte) (r0 uint64, r1 uint64)
    14  TEXT	·Hash128(SB),4,$0-56
    15  	MOVQ	k0+0(FP),CX
    16  	MOVQ	$0x736F6D6570736575,R9
    17  	MOVQ	k1+8(FP),DI
    18  	MOVQ	$0x6C7967656E657261,BX
    19  	MOVQ	$0x646F72616E646F6D,AX
    20  	MOVQ	b_len+24(FP),DX
    21  	XORQ	$0xEE,AX
    22  	MOVQ	DX,R11
    23  	MOVQ	DX,R10
    24  	XORQ	CX,R9
    25  	XORQ	CX,BX
    26  	MOVQ	$0x7465646279746573,CX
    27  	XORQ	DI,AX
    28  	XORQ	DI,CX
    29  	SHLQ	$0x38,R11
    30  	XORQ	DI,DI
    31  	MOVQ	b_base+16(FP),SI
    32  	ANDQ	$0xFFFFFFFFFFFFFFF8,R10
    33  	JE	afterLoop
    34  	XCHGQ	AX,AX
    35  loopBody:
    36  	MOVQ	0(SI)(DI*1),R8
    37  	ADDQ	AX,R9
    38  	RORQ	$0x33,AX
    39  	XORQ	R9,AX
    40  	RORQ	$0x20,R9
    41  	ADDQ	$0x8,DI
    42  	XORQ	R8,CX
    43  	ADDQ	CX,BX
    44  	RORQ	$0x30,CX
    45  	XORQ	BX,CX
    46  	ADDQ	AX,BX
    47  	RORQ	$0x2F,AX
    48  	ADDQ	CX,R9
    49  	RORQ	$0x2B,CX
    50  	XORQ	BX,AX
    51  	XORQ	R9,CX
    52  	RORQ	$0x20,BX
    53  	ADDQ	AX,R9
    54  	ADDQ	CX,BX
    55  	RORQ	$0x33,AX
    56  	RORQ	$0x30,CX
    57  	XORQ	R9,AX
    58  	XORQ	BX,CX
    59  	RORQ	$0x20,R9
    60  	ADDQ	AX,BX
    61  	ADDQ	CX,R9
    62  	RORQ	$0x2F,AX
    63  	RORQ	$0x2B,CX
    64  	XORQ	BX,AX
    65  	RORQ	$0x20,BX
    66  	XORQ	R9,CX
    67  	XORQ	R8,R9
    68  	CMPQ	R10,DI
    69  	JA	loopBody
    70  afterLoop:
    71  	SUBQ	R10,DX
    72  
    73  	CMPQ	DX,$0x7
    74  	JA	afterSwitch
    75  
    76  	// no support for jump tables
    77  
    78  	CMPQ	DX,$0x7
    79  	JE	sw7
    80  
    81  	CMPQ	DX,$0x6
    82  	JE	sw6
    83  
    84  	CMPQ	DX,$0x5
    85  	JE	sw5
    86  
    87  	CMPQ	DX,$0x4
    88  	JE	sw4
    89  
    90  	CMPQ	DX,$0x3
    91  	JE	sw3
    92  
    93  	CMPQ	DX,$0x2
    94  	JE	sw2
    95  
    96  	CMPQ	DX,$0x1
    97  	JE	sw1
    98  
    99  	JMP	afterSwitch
   100  
   101  sw7:	MOVBQZX	6(SI)(DI*1),DX
   102  	SHLQ	$0x30,DX
   103  	ORQ	DX,R11
   104  sw6:	MOVBQZX	0x5(SI)(DI*1),DX
   105  	SHLQ	$0x28,DX
   106  	ORQ	DX,R11
   107  sw5:	MOVBQZX	0x4(SI)(DI*1),DX
   108  	SHLQ	$0x20,DX
   109  	ORQ	DX,R11
   110  sw4:	MOVBQZX	0x3(SI)(DI*1),DX
   111  	SHLQ	$0x18,DX
   112  	ORQ	DX,R11
   113  sw3:	MOVBQZX	0x2(SI)(DI*1),DX
   114  	SHLQ	$0x10,DX
   115  	ORQ	DX,R11
   116  sw2:	MOVBQZX	0x1(SI)(DI*1),DX
   117  	SHLQ	$0x8,DX
   118  	ORQ	DX,R11
   119  sw1:	MOVBQZX	0(SI)(DI*1),DX
   120  	ORQ	DX,R11
   121  afterSwitch:
   122  	LEAQ	(AX)(R9*1),SI
   123  	XORQ	R11,CX
   124  	RORQ	$0x33,AX
   125  	ADDQ	CX,BX
   126  	MOVQ	CX,DX
   127  	XORQ	SI,AX
   128  	RORQ	$0x30,DX
   129  	RORQ	$0x20,SI
   130  	LEAQ	0(BX)(AX*1),CX
   131  	XORQ	BX,DX
   132  	RORQ	$0x2F,AX
   133  	ADDQ	DX,SI
   134  	RORQ	$0x2B,DX
   135  	XORQ	CX,AX
   136  	XORQ	SI,DX
   137  	RORQ	$0x20,CX
   138  	ADDQ	AX,SI
   139  	RORQ	$0x33,AX
   140  	ADDQ	DX,CX
   141  	XORQ	SI,AX
   142  	RORQ	$0x30,DX
   143  	RORQ	$0x20,SI
   144  	XORQ	CX,DX
   145  	ADDQ	AX,CX
   146  	RORQ	$0x2F,AX
   147  	ADDQ	DX,SI
   148  	XORQ	CX,AX
   149  	RORQ	$0x2B,DX
   150  	RORQ	$0x20,CX
   151  	XORQ	SI,DX
   152  	XORQ	R11,SI
   153  	XORB	$0xEE,CL
   154  	ADDQ	AX,SI
   155  	RORQ	$0x33,AX
   156  	ADDQ	DX,CX
   157  	RORQ	$0x30,DX
   158  	XORQ	SI,AX
   159  	XORQ	CX,DX
   160  	RORQ	$0x20,SI
   161  	ADDQ	AX,CX
   162  	ADDQ	DX,SI
   163  	RORQ	$0x2F,AX
   164  	RORQ	$0x2B,DX
   165  	XORQ	CX,AX
   166  	XORQ	SI,DX
   167  	RORQ	$0x20,CX
   168  	ADDQ	AX,SI
   169  	ADDQ	DX,CX
   170  	RORQ	$0x33,AX
   171  	RORQ	$0x30,DX
   172  	XORQ	SI,AX
   173  	RORQ	$0x20,SI
   174  	XORQ	CX,DX
   175  	ADDQ	AX,CX
   176  	RORQ	$0x2F,AX
   177  	ADDQ	DX,SI
   178  	RORQ	$0x2B,DX
   179  	XORQ	CX,AX
   180  	XORQ	SI,DX
   181  	RORQ	$0x20,CX
   182  	ADDQ	AX,SI
   183  	ADDQ	DX,CX
   184  	RORQ	$0x33,AX
   185  	RORQ	$0x30,DX
   186  	XORQ	CX,DX
   187  	XORQ	SI,AX
   188  	RORQ	$0x20,SI
   189  	ADDQ	DX,SI
   190  	ADDQ	AX,CX
   191  	RORQ	$0x2F,AX
   192  	XORQ	CX,AX
   193  	RORQ	$0x2B,DX
   194  	RORQ	$0x20,CX
   195  	XORQ	SI,DX
   196  
   197  	// gcc optimized the tail end of this function differently.  However,
   198  	// we need to preserve out registers to carry out the second stage of
   199  	// the finalization.  This is a duplicate of an earlier finalization
   200  	// round.
   201  
   202  	ADDQ	AX,SI
   203  	RORQ	$0x33,AX
   204  	ADDQ	DX,CX
   205  	RORQ	$0x30,DX
   206  	XORQ	SI,AX
   207  	XORQ	CX,DX
   208  	RORQ	$0x20,SI
   209  	ADDQ	AX,CX
   210  	ADDQ	DX,SI
   211  	RORQ	$0x2F,AX
   212  	RORQ	$0x2B,DX
   213  	XORQ	CX,AX
   214  	XORQ	SI,DX
   215  	RORQ	$0x20,CX
   216  
   217  	// Stuff the result into BX instead of AX as gcc had done
   218  
   219  	MOVQ	SI,BX
   220  	XORQ	AX,BX
   221  	XORQ	DX,BX
   222  	XORQ	CX,BX
   223  	MOVQ	BX,ret+40(FP)
   224  
   225  	// Start the second finalization round
   226  
   227  	XORB	$0xDD,AL
   228  	ADDQ	AX,SI
   229  	RORQ	$0x33,AX
   230  	ADDQ	DX,CX
   231  	RORQ	$0x30,DX
   232  	XORQ	SI,AX
   233  	XORQ	CX,DX
   234  	RORQ	$0x20,SI
   235  	ADDQ	AX,CX
   236  	ADDQ	DX,SI
   237  	RORQ	$0x2F,AX
   238  	RORQ	$0x2B,DX
   239  	XORQ	CX,AX
   240  	XORQ	SI,DX
   241  	RORQ	$0x20,CX
   242  	ADDQ	AX,SI
   243  	ADDQ	DX,CX
   244  	RORQ	$0x33,AX
   245  	RORQ	$0x30,DX
   246  	XORQ	SI,AX
   247  	RORQ	$0x20,SI
   248  	XORQ	CX,DX
   249  	ADDQ	AX,CX
   250  	RORQ	$0x2F,AX
   251  	ADDQ	DX,SI
   252  	RORQ	$0x2B,DX
   253  	XORQ	CX,AX
   254  	XORQ	SI,DX
   255  	RORQ	$0x20,CX
   256  	ADDQ	AX,SI
   257  	ADDQ	DX,CX
   258  	RORQ	$0x33,AX
   259  	RORQ	$0x30,DX
   260  	XORQ	CX,DX
   261  	XORQ	SI,AX
   262  	RORQ	$0x20,SI
   263  	ADDQ	DX,SI
   264  	ADDQ	AX,CX
   265  	RORQ	$0x2F,AX
   266  	XORQ	CX,AX
   267  	RORQ	$0x2B,DX
   268  	RORQ	$0x20,CX
   269  	XORQ	SI,DX
   270  
   271  	ADDQ	AX,SI
   272  	RORQ	$0x33,AX
   273  	ADDQ	DX,CX
   274  	RORQ	$0x30,DX
   275  	XORQ	SI,AX
   276  	XORQ	CX,DX
   277  	RORQ	$0x20,SI
   278  	ADDQ	AX,CX
   279  	ADDQ	DX,SI
   280  	RORQ	$0x2F,AX
   281  	RORQ	$0x2B,DX
   282  	XORQ	CX,AX
   283  	XORQ	SI,DX
   284  	RORQ	$0x20,CX
   285  
   286  	MOVQ	SI,BX
   287  	XORQ	AX,BX
   288  	XORQ	DX,BX
   289  	XORQ	CX,BX
   290  	MOVQ	BX,ret1+48(FP)
   291  
   292  	RET