github.com/gopherd/gonum@v0.0.4/internal/asm/f64/abssum_amd64.s (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !noasm,!gccgo,!safe 6 7 #include "textflag.h" 8 9 // func L1Norm(x []float64) float64 10 TEXT ·L1Norm(SB), NOSPLIT, $0 11 MOVQ x_base+0(FP), SI // SI = &x 12 MOVQ x_len+8(FP), CX // CX = len(x) 13 XORQ AX, AX // i = 0 14 PXOR X0, X0 // p_sum_i = 0 15 PXOR X1, X1 16 PXOR X2, X2 17 PXOR X3, X3 18 PXOR X4, X4 19 PXOR X5, X5 20 PXOR X6, X6 21 PXOR X7, X7 22 CMPQ CX, $0 // if CX == 0 { return 0 } 23 JE absum_end 24 MOVQ CX, BX 25 ANDQ $7, BX // BX = len(x) % 8 26 SHRQ $3, CX // CX = floor( len(x) / 8 ) 27 JZ absum_tail_start // if CX == 0 { goto absum_tail_start } 28 29 absum_loop: // do { 30 // p_sum += max( p_sum + x[i], p_sum - x[i] ) 31 MOVUPS (SI)(AX*8), X8 // X_i = x[i:i+1] 32 MOVUPS 16(SI)(AX*8), X9 33 MOVUPS 32(SI)(AX*8), X10 34 MOVUPS 48(SI)(AX*8), X11 35 ADDPD X8, X0 // p_sum_i += X_i ( positive values ) 36 ADDPD X9, X2 37 ADDPD X10, X4 38 ADDPD X11, X6 39 SUBPD X8, X1 // p_sum_(i+1) -= X_i ( negative values ) 40 SUBPD X9, X3 41 SUBPD X10, X5 42 SUBPD X11, X7 43 MAXPD X1, X0 // p_sum_i = max( p_sum_i, p_sum_(i+1) ) 44 MAXPD X3, X2 45 MAXPD X5, X4 46 MAXPD X7, X6 47 MOVAPS X0, X1 // p_sum_(i+1) = p_sum_i 48 MOVAPS X2, X3 49 MOVAPS X4, X5 50 MOVAPS X6, X7 51 ADDQ $8, AX // i += 8 52 LOOP absum_loop // } while --CX > 0 53 54 // p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) ) 55 ADDPD X3, X0 56 ADDPD X5, X7 57 ADDPD X7, X0 58 59 // p_sum_0[0] = p_sum_0[0] + p_sum_0[1] 60 MOVAPS X0, X1 61 SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 ) 62 ADDSD X1, X0 63 CMPQ BX, $0 64 JE absum_end // if BX == 0 { goto absum_end } 65 66 absum_tail_start: // Reset loop registers 67 MOVQ BX, CX // Loop counter: CX = BX 68 XORPS X8, X8 // X_8 = 0 69 70 absum_tail: // do { 71 // p_sum += max( p_sum + x[i], p_sum - x[i] ) 72 MOVSD (SI)(AX*8), X8 // X_8 = x[i] 73 MOVSD X0, X1 // p_sum_1 = p_sum_0 74 ADDSD X8, X0 // p_sum_0 += X_8 75 SUBSD X8, X1 // p_sum_1 -= X_8 76 MAXSD X1, X0 // p_sum_0 = max( p_sum_0, p_sum_1 ) 77 INCQ AX // i++ 78 LOOP absum_tail // } while --CX > 0 79 80 absum_end: // return p_sum_0 81 MOVSD X0, sum+24(FP) 82 RET