github.com/gopherd/gonum@v0.0.4/internal/asm/f64/abssum_amd64.s (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !noasm,!gccgo,!safe
     6  
     7  #include "textflag.h"
     8  
     9  // func L1Norm(x []float64) float64
    10  TEXT ·L1Norm(SB), NOSPLIT, $0
    11  	MOVQ x_base+0(FP), SI // SI = &x
    12  	MOVQ x_len+8(FP), CX  // CX = len(x)
    13  	XORQ AX, AX           // i = 0
    14  	PXOR X0, X0           // p_sum_i = 0
    15  	PXOR X1, X1
    16  	PXOR X2, X2
    17  	PXOR X3, X3
    18  	PXOR X4, X4
    19  	PXOR X5, X5
    20  	PXOR X6, X6
    21  	PXOR X7, X7
    22  	CMPQ CX, $0           // if CX == 0 { return 0 }
    23  	JE   absum_end
    24  	MOVQ CX, BX
    25  	ANDQ $7, BX           // BX = len(x) % 8
    26  	SHRQ $3, CX           // CX = floor( len(x) / 8 )
    27  	JZ   absum_tail_start // if CX == 0 { goto absum_tail_start }
    28  
    29  absum_loop: // do {
    30  	// p_sum += max( p_sum + x[i], p_sum - x[i] )
    31  	MOVUPS (SI)(AX*8), X8    // X_i = x[i:i+1]
    32  	MOVUPS 16(SI)(AX*8), X9
    33  	MOVUPS 32(SI)(AX*8), X10
    34  	MOVUPS 48(SI)(AX*8), X11
    35  	ADDPD  X8, X0            // p_sum_i += X_i  ( positive values )
    36  	ADDPD  X9, X2
    37  	ADDPD  X10, X4
    38  	ADDPD  X11, X6
    39  	SUBPD  X8, X1            // p_sum_(i+1) -= X_i  ( negative values )
    40  	SUBPD  X9, X3
    41  	SUBPD  X10, X5
    42  	SUBPD  X11, X7
    43  	MAXPD  X1, X0            // p_sum_i = max( p_sum_i, p_sum_(i+1) )
    44  	MAXPD  X3, X2
    45  	MAXPD  X5, X4
    46  	MAXPD  X7, X6
    47  	MOVAPS X0, X1            // p_sum_(i+1) = p_sum_i
    48  	MOVAPS X2, X3
    49  	MOVAPS X4, X5
    50  	MOVAPS X6, X7
    51  	ADDQ   $8, AX            // i += 8
    52  	LOOP   absum_loop        // } while --CX > 0
    53  
    54  	// p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) )
    55  	ADDPD X3, X0
    56  	ADDPD X5, X7
    57  	ADDPD X7, X0
    58  
    59  	// p_sum_0[0] = p_sum_0[0] + p_sum_0[1]
    60  	MOVAPS X0, X1
    61  	SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 )
    62  	ADDSD  X1, X0
    63  	CMPQ   BX, $0
    64  	JE     absum_end    // if BX == 0 { goto absum_end }
    65  
    66  absum_tail_start: // Reset loop registers
    67  	MOVQ  BX, CX // Loop counter:  CX = BX
    68  	XORPS X8, X8 // X_8 = 0
    69  
    70  absum_tail: // do {
    71  	// p_sum += max( p_sum + x[i], p_sum - x[i] )
    72  	MOVSD (SI)(AX*8), X8 // X_8 = x[i]
    73  	MOVSD X0, X1         // p_sum_1 = p_sum_0
    74  	ADDSD X8, X0         // p_sum_0 += X_8
    75  	SUBSD X8, X1         // p_sum_1 -= X_8
    76  	MAXSD X1, X0         // p_sum_0 = max( p_sum_0, p_sum_1 )
    77  	INCQ  AX             // i++
    78  	LOOP  absum_tail     // } while --CX > 0
    79  
    80  absum_end: // return p_sum_0
    81  	MOVSD X0, sum+24(FP)
    82  	RET