github.com/gopherd/gonum@v0.0.4/internal/asm/f64/l2normdist_amd64.s (about)

     1  // Copyright ©2019 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !noasm,!gccgo,!safe
     6  
     7  #include "textflag.h"
     8  
     9  #define SUMSQ X0
    10  #define ABSX X1
    11  #define SCALE X2
    12  #define ZERO X3
    13  #define TMP X4
    14  #define ABSMASK X5
    15  #define INF X7
    16  #define INFMASK X11
    17  #define NANMASK X12
    18  #define IDX AX
    19  #define X_ DI
    20  #define Y_ BX
    21  #define LEN SI
    22  
    23  #define ABSMASK_DATA l2nrodata<>+0(SB)
    24  #define INF_DATA l2nrodata<>+8(SB)
    25  #define NAN_DATA l2nrodata<>+16(SB)
    26  // AbsMask
    27  DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF
    28  // Inf
    29  DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000
    30  // NaN
    31  DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000
    32  GLOBL l2nrodata<>+0(SB), RODATA, $24
    33  
    34  // L2DistanceUnitary returns the L2-norm of x-y.
    35  // func L2DistanceUnitary(x,y []float64) (norm float64)
    36  TEXT ·L2DistanceUnitary(SB), NOSPLIT, $0
    37  	MOVQ    x_base+0(FP), X_
    38  	MOVQ    y_base+24(FP), Y_
    39  	PXOR    ZERO, ZERO
    40  	MOVQ    x_len+8(FP), LEN  // LEN = min( len(x), len(y) )
    41  	CMPQ    y_len+32(FP), LEN
    42  	CMOVQLE y_len+32(FP), LEN
    43  	CMPQ    LEN, $0           // if LEN == 0 { return 0 }
    44  	JZ      retZero
    45  
    46  	PXOR  INFMASK, INFMASK
    47  	PXOR  NANMASK, NANMASK
    48  	MOVSD $1.0, SUMSQ           // ssq = 1
    49  	XORPS SCALE, SCALE
    50  	MOVSD ABSMASK_DATA, ABSMASK
    51  	MOVSD INF_DATA, INF
    52  	XORQ  IDX, IDX              // idx == 0
    53  
    54  initZero:  // for ;x[i]==0; i++ {}
    55  	// Skip all leading zeros, to avoid divide by zero NaN
    56  	MOVSD   (X_)(IDX*8), ABSX // absxi = x[i]
    57  	SUBSD   (Y_)(IDX*8), ABSX // absxi = x[i]-y[i]
    58  	UCOMISD ABSX, ZERO
    59  	JP      retNaN            // if isNaN(absxi) { return NaN }
    60  	JNE     loop              // if absxi != 0 { goto loop }
    61  	INCQ    IDX               // i++
    62  	CMPQ    IDX, LEN
    63  	JE      retZero           // if i == LEN { return 0 }
    64  	JMP     initZero
    65  
    66  loop:
    67  	MOVSD   (X_)(IDX*8), ABSX // absxi = x[i]
    68  	SUBSD   (Y_)(IDX*8), ABSX // absxi = x[i]-y[i]
    69  	MOVUPS  ABSX, TMP
    70  	CMPSD   ABSX, TMP, $3
    71  	ORPD    TMP, NANMASK      // NANMASK = NANMASK | IsNaN(absxi)
    72  	MOVSD   INF, TMP
    73  	ANDPD   ABSMASK, ABSX     // absxi == Abs(absxi)
    74  	CMPSD   ABSX, TMP, $0
    75  	ORPD    TMP, INFMASK      // INFMASK =  INFMASK | IsInf(absxi)
    76  	UCOMISD SCALE, ABSX
    77  	JA      adjScale          // IF SCALE > ABSXI { goto adjScale }
    78  
    79  	DIVSD SCALE, ABSX // absxi = scale / absxi
    80  	MULSD ABSX, ABSX  // absxi *= absxi
    81  	ADDSD ABSX, SUMSQ // sumsq += absxi
    82  	INCQ  IDX         // i++
    83  	CMPQ  IDX, LEN
    84  	JNE   loop        // if i < LEN { continue }
    85  	JMP   retSum      // if i == LEN { goto retSum }
    86  
    87  adjScale:  // Scale > Absxi
    88  	DIVSD  ABSX, SCALE  // tmp = absxi / scale
    89  	MULSD  SCALE, SUMSQ // sumsq *= tmp
    90  	MULSD  SCALE, SUMSQ // sumsq *= tmp
    91  	ADDSD  $1.0, SUMSQ  // sumsq += 1
    92  	MOVUPS ABSX, SCALE  // scale = absxi
    93  	INCQ   IDX          // i++
    94  	CMPQ   IDX, LEN
    95  	JNE    loop         // if i < LEN { continue }
    96  
    97  retSum:  // Calculate return value
    98  	SQRTSD  SUMSQ, SUMSQ     // sumsq = sqrt(sumsq)
    99  	MULSD   SCALE, SUMSQ     // sumsq += scale
   100  	MOVQ    SUMSQ, R10       // tmp = sumsq
   101  	UCOMISD ZERO, INFMASK
   102  	CMOVQPS INF_DATA, R10    // if INFMASK { tmp = INF }
   103  	UCOMISD ZERO, NANMASK
   104  	CMOVQPS NAN_DATA, R10    // if NANMASK { tmp = NaN }
   105  	MOVQ    R10, norm+48(FP) // return tmp
   106  	RET
   107  
   108  retZero:
   109  	MOVSD ZERO, norm+48(FP) // return 0
   110  	RET
   111  
   112  retNaN:
   113  	MOVSD NAN_DATA, TMP    // return NaN
   114  	MOVSD TMP, norm+48(FP)
   115  	RET