github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/internal/asm/f64/addconst_amd64.s (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !noasm,!gccgo,!safe 6 7 #include "textflag.h" 8 9 // func Addconst(alpha float64, x []float64) 10 TEXT ·AddConst(SB), NOSPLIT, $0 11 MOVQ x_base+8(FP), SI // SI = &x 12 MOVQ x_len+16(FP), CX // CX = len(x) 13 CMPQ CX, $0 // if len(x) == 0 { return } 14 JE ac_end 15 MOVSD alpha+0(FP), X4 // X4 = { a, a } 16 SHUFPD $0, X4, X4 17 MOVUPS X4, X5 // X5 = X4 18 XORQ AX, AX // i = 0 19 MOVQ CX, BX 20 ANDQ $7, BX // BX = len(x) % 8 21 SHRQ $3, CX // CX = floor( len(x) / 8 ) 22 JZ ac_tail_start // if CX == 0 { goto ac_tail_start } 23 24 ac_loop: // Loop unrolled 8x do { 25 MOVUPS (SI)(AX*8), X0 // X_i = s[i:i+1] 26 MOVUPS 16(SI)(AX*8), X1 27 MOVUPS 32(SI)(AX*8), X2 28 MOVUPS 48(SI)(AX*8), X3 29 ADDPD X4, X0 // X_i += a 30 ADDPD X5, X1 31 ADDPD X4, X2 32 ADDPD X5, X3 33 MOVUPS X0, (SI)(AX*8) // s[i:i+1] = X_i 34 MOVUPS X1, 16(SI)(AX*8) 35 MOVUPS X2, 32(SI)(AX*8) 36 MOVUPS X3, 48(SI)(AX*8) 37 ADDQ $8, AX // i += 8 38 LOOP ac_loop // } while --CX > 0 39 CMPQ BX, $0 // if BX == 0 { return } 40 JE ac_end 41 42 ac_tail_start: // Reset loop counters 43 MOVQ BX, CX // Loop counter: CX = BX 44 45 ac_tail: // do { 46 MOVSD (SI)(AX*8), X0 // X0 = s[i] 47 ADDSD X4, X0 // X0 += a 48 MOVSD X0, (SI)(AX*8) // s[i] = X0 49 INCQ AX // ++i 50 LOOP ac_tail // } while --CX > 0 51 52 ac_end: 53 RET