gonum.org/v1/gonum@v0.14.0/internal/asm/f64/abssuminc_amd64.s (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !noasm,!gccgo,!safe 6 7 #include "textflag.h" 8 9 // func L1NormInc(x []float64, n, incX int) (sum float64) 10 TEXT ·L1NormInc(SB), NOSPLIT, $0 11 MOVQ x_base+0(FP), SI // SI = &x 12 MOVQ n+24(FP), CX // CX = n 13 MOVQ incX+32(FP), AX // AX = increment * sizeof( float64 ) 14 SHLQ $3, AX 15 MOVQ AX, DX // DX = AX * 3 16 IMULQ $3, DX 17 PXOR X0, X0 // p_sum_i = 0 18 PXOR X1, X1 19 PXOR X2, X2 20 PXOR X3, X3 21 PXOR X4, X4 22 PXOR X5, X5 23 PXOR X6, X6 24 PXOR X7, X7 25 CMPQ CX, $0 // if CX == 0 { return 0 } 26 JE absum_end 27 MOVQ CX, BX 28 ANDQ $7, BX // BX = n % 8 29 SHRQ $3, CX // CX = floor( n / 8 ) 30 JZ absum_tail_start // if CX == 0 { goto absum_tail_start } 31 32 absum_loop: // do { 33 // p_sum = max( p_sum + x[i], p_sum - x[i] ) 34 MOVSD (SI), X8 // X_i[0] = x[i] 35 MOVSD (SI)(AX*1), X9 36 MOVSD (SI)(AX*2), X10 37 MOVSD (SI)(DX*1), X11 38 LEAQ (SI)(AX*4), SI // SI = SI + 4 39 MOVHPD (SI), X8 // X_i[1] = x[i+4] 40 MOVHPD (SI)(AX*1), X9 41 MOVHPD (SI)(AX*2), X10 42 MOVHPD (SI)(DX*1), X11 43 ADDPD X8, X0 // p_sum_i += X_i ( positive values ) 44 ADDPD X9, X2 45 ADDPD X10, X4 46 ADDPD X11, X6 47 SUBPD X8, X1 // p_sum_(i+1) -= X_i ( negative values ) 48 SUBPD X9, X3 49 SUBPD X10, X5 50 SUBPD X11, X7 51 MAXPD X1, X0 // p_sum_i = max( p_sum_i, p_sum_(i+1) ) 52 MAXPD X3, X2 53 MAXPD X5, X4 54 MAXPD X7, X6 55 MOVAPS X0, X1 // p_sum_(i+1) = p_sum_i 56 MOVAPS X2, X3 57 MOVAPS X4, X5 58 MOVAPS X6, X7 59 LEAQ (SI)(AX*4), SI // SI = SI + 4 60 LOOP absum_loop // } while --CX > 0 61 62 // p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) ) 63 ADDPD X3, X0 64 ADDPD X5, X7 65 ADDPD X7, X0 66 67 // p_sum_0[0] = p_sum_0[0] + p_sum_0[1] 68 MOVAPS X0, X1 69 SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 ) 70 ADDSD X1, X0 71 CMPQ BX, $0 72 JE absum_end // if BX == 0 { goto absum_end } 73 74 absum_tail_start: // Reset loop registers 75 MOVQ BX, CX // Loop counter: CX = BX 76 XORPS X8, X8 // X_8 = 0 77 78 absum_tail: // do { 79 // p_sum += max( p_sum + x[i], p_sum - x[i] ) 80 MOVSD (SI), X8 // X_8 = x[i] 81 MOVSD X0, X1 // p_sum_1 = p_sum_0 82 ADDSD X8, X0 // p_sum_0 += X_8 83 SUBSD X8, X1 // p_sum_1 -= X_8 84 MAXSD X1, X0 // p_sum_0 = max( p_sum_0, p_sum_1 ) 85 ADDQ AX, SI // i++ 86 LOOP absum_tail // } while --CX > 0 87 88 absum_end: // return p_sum_0 89 MOVSD X0, sum+40(FP) 90 RET