github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/internal/asm/f64/addconst_amd64.s (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !noasm,!gccgo,!safe
     6  
     7  #include "textflag.h"
     8  
     9  // func Addconst(alpha float64, x []float64)
    10  TEXT ·AddConst(SB), NOSPLIT, $0
    11  	MOVQ   x_base+8(FP), SI // SI = &x
    12  	MOVQ   x_len+16(FP), CX // CX = len(x)
    13  	CMPQ   CX, $0           // if len(x) == 0 { return }
    14  	JE     ac_end
    15  	MOVSD  alpha+0(FP), X4  // X4 = { a, a }
    16  	SHUFPD $0, X4, X4
    17  	MOVUPS X4, X5           // X5 = X4
    18  	XORQ   AX, AX           // i = 0
    19  	MOVQ   CX, BX
    20  	ANDQ   $7, BX           // BX = len(x) % 8
    21  	SHRQ   $3, CX           // CX = floor( len(x) / 8 )
    22  	JZ     ac_tail_start    // if CX == 0 { goto ac_tail_start }
    23  
    24  ac_loop: // Loop unrolled 8x   do {
    25  	MOVUPS (SI)(AX*8), X0   // X_i = s[i:i+1]
    26  	MOVUPS 16(SI)(AX*8), X1
    27  	MOVUPS 32(SI)(AX*8), X2
    28  	MOVUPS 48(SI)(AX*8), X3
    29  	ADDPD  X4, X0           // X_i += a
    30  	ADDPD  X5, X1
    31  	ADDPD  X4, X2
    32  	ADDPD  X5, X3
    33  	MOVUPS X0, (SI)(AX*8)   // s[i:i+1] = X_i
    34  	MOVUPS X1, 16(SI)(AX*8)
    35  	MOVUPS X2, 32(SI)(AX*8)
    36  	MOVUPS X3, 48(SI)(AX*8)
    37  	ADDQ   $8, AX           // i += 8
    38  	LOOP   ac_loop          // } while --CX > 0
    39  	CMPQ   BX, $0           // if BX == 0 { return }
    40  	JE     ac_end
    41  
    42  ac_tail_start: // Reset loop counters
    43  	MOVQ BX, CX // Loop counter: CX = BX
    44  
    45  ac_tail: // do {
    46  	MOVSD (SI)(AX*8), X0 // X0 = s[i]
    47  	ADDSD X4, X0         // X0 += a
    48  	MOVSD X0, (SI)(AX*8) // s[i] = X0
    49  	INCQ  AX             // ++i
    50  	LOOP  ac_tail        // } while --CX > 0
    51  
    52  ac_end:
    53  	RET