github.com/primecitizens/pcz/std@v0.2.1/core/mem/equal_386.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2018 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && 386
     9  
    10  #include "textflag.h"
    11  
    12  // Equal(a, b unsafe.Pointer, size uintptr) bool
    13  TEXT ·Equal(SB),NOSPLIT,$0-13
    14  	MOVL a+0(FP), SI
    15  	MOVL b+4(FP), DI
    16  	CMPL SI, DI
    17  	JEQ eq
    18  	MOVL size+8(FP), BX
    19  	LEAL ret+12(FP), AX
    20  	JMP memeqbody<>(SB)
    21  eq:
    22  	MOVB    $1, ret+12(FP)
    23  	RET
    24  
    25  // memequal_varlen(a, b unsafe.Pointer) bool
    26  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    27  	MOVL    a+0(FP), SI
    28  	MOVL    b+4(FP), DI
    29  	CMPL    SI, DI
    30  	JEQ     eq
    31  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    32  	LEAL ret+8(FP), AX
    33  	JMP memeqbody<>(SB)
    34  eq:
    35  	MOVB    $1, ret+8(FP)
    36  	RET
    37  
    38  // a in SI
    39  // b in DI
    40  // count in BX
    41  // address of result byte in AX
    42  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    43  	CMPL BX, $4
    44  	JB small
    45  
    46  	// 64 bytes at a time using xmm registers
    47  hugeloop:
    48  	CMPL BX, $64
    49  	JB bigloop
    50  #ifdef GO386_softfloat
    51  	JMP bigloop
    52  #endif
    53  	MOVOU (SI), X0
    54  	MOVOU (DI), X1
    55  	MOVOU 16(SI), X2
    56  	MOVOU 16(DI), X3
    57  	MOVOU 32(SI), X4
    58  	MOVOU 32(DI), X5
    59  	MOVOU 48(SI), X6
    60  	MOVOU 48(DI), X7
    61  	PCMPEQB X1, X0
    62  	PCMPEQB X3, X2
    63  	PCMPEQB X5, X4
    64  	PCMPEQB X7, X6
    65  	PAND X2, X0
    66  	PAND X6, X4
    67  	PAND X4, X0
    68  	PMOVMSKB X0, DX
    69  	ADDL $64, SI
    70  	ADDL $64, DI
    71  	SUBL $64, BX
    72  	CMPL DX, $0xffff
    73  	JEQ hugeloop
    74  	MOVB $0, (AX)
    75  	RET
    76  
    77  	// 4 bytes at a time using 32-bit register
    78  bigloop:
    79  	CMPL BX, $4
    80  	JBE leftover
    81  	MOVL (SI), CX
    82  	MOVL (DI), DX
    83  	ADDL $4, SI
    84  	ADDL $4, DI
    85  	SUBL $4, BX
    86  	CMPL CX, DX
    87  	JEQ bigloop
    88  	MOVB $0, (AX)
    89  	RET
    90  
    91  	// remaining 0-4 bytes
    92  leftover:
    93  	MOVL -4(SI)(BX*1), CX
    94  	MOVL -4(DI)(BX*1), DX
    95  	CMPL CX, DX
    96  	SETEQ (AX)
    97  	RET
    98  
    99  small:
   100  	CMPL BX, $0
   101  	JEQ equal
   102  
   103  	LEAL 0(BX*8), CX
   104  	NEGL CX
   105  
   106  	MOVL SI, DX
   107  	CMPB DX, $0xfc
   108  	JA si_high
   109  
   110  	// load at SI won't cross a page boundary.
   111  	MOVL (SI), SI
   112  	JMP si_finish
   113  si_high:
   114  	// address ends in 111111xx. Load up to bytes we want, move to correct position.
   115  	MOVL -4(SI)(BX*1), SI
   116  	SHRL CX, SI
   117  si_finish:
   118  
   119  	// same for DI.
   120  	MOVL DI, DX
   121  	CMPB DX, $0xfc
   122  	JA di_high
   123  	MOVL (DI), DI
   124  	JMP di_finish
   125  di_high:
   126  	MOVL -4(DI)(BX*1), DI
   127  	SHRL CX, DI
   128  di_finish:
   129  
   130  	SUBL SI, DI
   131  	SHLL CX, DI
   132  equal:
   133  	SETEQ (AX)
   134  	RET