gonum.org/v1/gonum@v0.14.0/internal/asm/f64/l2norm_amd64.s (about)

     1  // Copyright ©2019 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !noasm,!gccgo,!safe
     6  
     7  #include "textflag.h"
     8  
     9  #define SUMSQ X0
    10  #define ABSX X1
    11  #define SCALE X2
    12  #define ZERO X3
    13  #define TMP X4
    14  #define ABSMASK X5
    15  #define INF X7
    16  #define INFMASK X11
    17  #define NANMASK X12
    18  #define IDX AX
    19  #define LEN SI
    20  #define X_ DI
    21  
    22  #define ABSMASK_DATA l2nrodata<>+0(SB)
    23  #define INF_DATA l2nrodata<>+8(SB)
    24  #define NAN_DATA l2nrodata<>+16(SB)
    25  // AbsMask
    26  DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF
    27  // Inf
    28  DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000
    29  // NaN
    30  DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000
    31  GLOBL l2nrodata<>+0(SB), RODATA, $24
    32  
    33  // L2NormUnitary returns the L2-norm of x.
    34  // func L2NormUnitary(x []float64) (norm float64)
    35  TEXT ·L2NormUnitary(SB), NOSPLIT, $0
    36  	MOVQ x_len+8(FP), LEN // LEN = len(x)
    37  	MOVQ x_base+0(FP), X_
    38  	PXOR ZERO, ZERO
    39  	CMPQ LEN, $0          // if LEN == 0 { return 0 }
    40  	JZ   retZero
    41  
    42  	PXOR  INFMASK, INFMASK
    43  	PXOR  NANMASK, NANMASK
    44  	MOVSD $1.0, SUMSQ           // ssq = 1
    45  	XORPS SCALE, SCALE
    46  	MOVSD ABSMASK_DATA, ABSMASK
    47  	MOVSD INF_DATA, INF
    48  	XORQ  IDX, IDX              // idx == 0
    49  
    50  initZero:  // for ;x[i]==0; i++ {}
    51  	// Skip all leading zeros, to avoid divide by zero NaN
    52  	MOVSD   (X_)(IDX*8), ABSX // absxi = x[i]
    53  	UCOMISD ABSX, ZERO
    54  	JP      retNaN            // if isNaN(x[i]) { return NaN }
    55  	JNE     loop              // if x[i] != 0 { goto loop }
    56  	INCQ    IDX               // i++
    57  	CMPQ    IDX, LEN
    58  	JE      retZero           // if i == LEN { return 0 }
    59  	JMP     initZero
    60  
    61  loop:
    62  	MOVSD   (X_)(IDX*8), ABSX // absxi = x[i]
    63  	MOVUPS  ABSX, TMP
    64  	CMPSD   ABSX, TMP, $3
    65  	ORPD    TMP, NANMASK      // NANMASK = NANMASK | IsNaN(absxi)
    66  	MOVSD   INF, TMP
    67  	ANDPD   ABSMASK, ABSX     // absxi == Abs(absxi)
    68  	CMPSD   ABSX, TMP, $0
    69  	ORPD    TMP, INFMASK      // INFMASK =  INFMASK | IsInf(absxi)
    70  	UCOMISD SCALE, ABSX
    71  	JA      adjScale          // IF SCALE > ABSXI { goto adjScale }
    72  
    73  	DIVSD SCALE, ABSX // absxi = scale / absxi
    74  	MULSD ABSX, ABSX  // absxi *= absxi
    75  	ADDSD ABSX, SUMSQ // sumsq += absxi
    76  	INCQ  IDX         // i++
    77  	CMPQ  IDX, LEN
    78  	JNE   loop        // if i < LEN { continue }
    79  	JMP   retSum      // if i == LEN { goto retSum }
    80  
    81  adjScale:  // Scale > Absxi
    82  	DIVSD  ABSX, SCALE  // tmp = absxi / scale
    83  	MULSD  SCALE, SUMSQ // sumsq *= tmp
    84  	MULSD  SCALE, SUMSQ // sumsq *= tmp
    85  	ADDSD  $1.0, SUMSQ  // sumsq += 1
    86  	MOVUPS ABSX, SCALE  // scale = absxi
    87  	INCQ   IDX          // i++
    88  	CMPQ   IDX, LEN
    89  	JNE    loop         // if i < LEN { continue }
    90  
    91  retSum:  // Calculate return value
    92  	SQRTSD  SUMSQ, SUMSQ     // sumsq = sqrt(sumsq)
    93  	MULSD   SCALE, SUMSQ     // sumsq += scale
    94  	MOVQ    SUMSQ, R10       // tmp = sumsq
    95  	UCOMISD ZERO, INFMASK
    96  	CMOVQPS INF_DATA, R10    // if INFMASK { tmp = INF }
    97  	UCOMISD ZERO, NANMASK
    98  	CMOVQPS NAN_DATA, R10    // if NANMASK { tmp = NaN }
    99  	MOVQ    R10, norm+24(FP) // return tmp
   100  	RET
   101  
   102  retZero:
   103  	MOVSD ZERO, norm+24(FP) // return 0
   104  	RET
   105  
   106  retNaN:
   107  	MOVSD NAN_DATA, TMP    // return NaN
   108  	MOVSD TMP, norm+24(FP)
   109  	RET