github.com/primecitizens/pcz/std@v0.2.1/core/cmp/bs_386.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  // 
     4  // Copyright 2018 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && 386
     9  
    10  #include "textflag.h"
    11  
    12  TEXT ·Bytes(SB),NOSPLIT,$0-28
    13  	MOVL a_base+0(FP), SI
    14  	MOVL a_len+4(FP), BX
    15  	MOVL b_base+12(FP), DI
    16  	MOVL b_len+16(FP), DX
    17  	LEAL ret+24(FP), AX
    18  	JMP cmpbody<>(SB)
    19  
    20  TEXT ·String(SB),NOSPLIT,$0-20
    21  	MOVL a_base+0(FP), SI
    22  	MOVL a_len+4(FP), BX
    23  	MOVL b_base+8(FP), DI
    24  	MOVL b_len+12(FP), DX
    25  	LEAL ret+16(FP), AX
    26  	JMP cmpbody<>(SB)
    27  
    28  // input:
    29  //   SI = a
    30  //   DI = b
    31  //   BX = alen
    32  //   DX = blen
    33  //   AX = address of return word (set to 1/0/-1)
    34  TEXT cmpbody<>(SB),NOSPLIT,$0-0
    35  	MOVL DX, BP
    36  	SUBL BX, DX // DX = blen-alen
    37  	JLE 2(PC)
    38  	MOVL BX, BP // BP = min(alen, blen)
    39  	CMPL SI, DI
    40  	JEQ allsame
    41  	CMPL BP, $4
    42  	JB small
    43  #ifdef GO386_softfloat
    44  	JMP mediumloop
    45  #endif
    46  largeloop:
    47  	CMPL BP, $16
    48  	JB mediumloop
    49  	MOVOU (SI), X0
    50  	MOVOU (DI), X1
    51  	PCMPEQB X0, X1
    52  	PMOVMSKB X1, BX
    53  	XORL $0xffff, BX // convert EQ to NE
    54  	JNE diff16 // branch if at least one byte is not equal
    55  	ADDL $16, SI
    56  	ADDL $16, DI
    57  	SUBL $16, BP
    58  	JMP largeloop
    59  
    60  diff16:
    61  	BSFL BX, BX // index of first byte that differs
    62  	XORL DX, DX
    63  	MOVB (SI)(BX*1), CX
    64  	CMPB CX, (DI)(BX*1)
    65  	SETHI DX
    66  	LEAL -1(DX*2), DX // convert 1/0 to +1/-1
    67  	MOVL DX, (AX)
    68  	RET
    69  
    70  mediumloop:
    71  	CMPL BP, $4
    72  	JBE _0through4
    73  	MOVL (SI), BX
    74  	MOVL (DI), CX
    75  	CMPL BX, CX
    76  	JNE diff4
    77  	ADDL $4, SI
    78  	ADDL $4, DI
    79  	SUBL $4, BP
    80  	JMP mediumloop
    81  
    82  _0through4:
    83  	MOVL -4(SI)(BP*1), BX
    84  	MOVL -4(DI)(BP*1), CX
    85  	CMPL BX, CX
    86  	JEQ allsame
    87  
    88  diff4:
    89  	BSWAPL BX // reverse order of bytes
    90  	BSWAPL CX
    91  	XORL BX, CX // find bit differences
    92  	BSRL CX, CX // index of highest bit difference
    93  	SHRL CX, BX // move a's bit to bottom
    94  	ANDL $1, BX // mask bit
    95  	LEAL -1(BX*2), BX // 1/0 => +1/-1
    96  	MOVL BX, (AX)
    97  	RET
    98  
    99  	// 0-3 bytes in common
   100  small:
   101  	LEAL (BP*8), CX
   102  	NEGL CX
   103  	JEQ allsame
   104  
   105  	// load si
   106  	CMPB SI, $0xfc
   107  	JA si_high
   108  	MOVL (SI), SI
   109  	JMP si_finish
   110  si_high:
   111  	MOVL -4(SI)(BP*1), SI
   112  	SHRL CX, SI
   113  si_finish:
   114  	SHLL CX, SI
   115  
   116  	// same for di
   117  	CMPB DI, $0xfc
   118  	JA di_high
   119  	MOVL (DI), DI
   120  	JMP di_finish
   121  di_high:
   122  	MOVL -4(DI)(BP*1), DI
   123  	SHRL CX, DI
   124  di_finish:
   125  	SHLL CX, DI
   126  
   127  	BSWAPL SI // reverse order of bytes
   128  	BSWAPL DI
   129  	XORL SI, DI // find bit differences
   130  	JEQ allsame
   131  	BSRL DI, CX // index of highest bit difference
   132  	SHRL CX, SI // move a's bit to bottom
   133  	ANDL $1, SI // mask bit
   134  	LEAL -1(SI*2), BX // 1/0 => +1/-1
   135  	MOVL BX, (AX)
   136  	RET
   137  
   138  	// all the bytes in common are the same, so we just need
   139  	// to compare the lengths.
   140  allsame:
   141  	XORL BX, BX
   142  	XORL CX, CX
   143  	TESTL DX, DX
   144  	SETLT BX // 1 if alen > blen
   145  	SETEQ CX // 1 if alen == blen
   146  	LEAL -1(CX)(BX*2), BX // 1,0,-1 result
   147  	MOVL BX, (AX)
   148  	RET