gonum.org/v1/gonum@v0.14.0/internal/asm/c128/dscalinc_amd64.s (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !noasm,!gccgo,!safe 6 7 #include "textflag.h" 8 9 #define SRC SI 10 #define DST SI 11 #define LEN CX 12 #define TAIL BX 13 #define INC R9 14 #define INC3 R10 15 #define ALPHA X0 16 #define ALPHA_2 X1 17 18 #define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0 19 20 // func DscalInc(alpha float64, x []complex128, n, inc uintptr) 21 TEXT ·DscalInc(SB), NOSPLIT, $0 22 MOVQ x_base+8(FP), SRC // SRC = &x 23 MOVQ n+32(FP), LEN // LEN = n 24 CMPQ LEN, $0 // if LEN == 0 { return } 25 JE dscal_end 26 27 MOVDDUP_ALPHA // ALPHA = alpha 28 MOVQ inc+40(FP), INC // INC = inc 29 SHLQ $4, INC // INC = INC * sizeof(complex128) 30 LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC 31 MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining 32 MOVQ LEN, TAIL // TAIL = LEN 33 SHRQ $2, LEN // LEN = floor( n / 4 ) 34 JZ dscal_tail // if LEN == 0 { goto dscal_tail } 35 36 dscal_loop: // do { 37 MOVUPS (SRC), X2 // X_i = x[i] 38 MOVUPS (SRC)(INC*1), X3 39 MOVUPS (SRC)(INC*2), X4 40 MOVUPS (SRC)(INC3*1), X5 41 42 MULPD ALPHA, X2 // X_i *= ALPHA 43 MULPD ALPHA_2, X3 44 MULPD ALPHA, X4 45 MULPD ALPHA_2, X5 46 47 MOVUPS X2, (DST) // x[i] = X_i 48 MOVUPS X3, (DST)(INC*1) 49 MOVUPS X4, (DST)(INC*2) 50 MOVUPS X5, (DST)(INC3*1) 51 52 LEAQ (SRC)(INC*4), SRC // SRC += INC*4 53 DECQ LEN 54 JNZ dscal_loop // } while --LEN > 0 55 56 dscal_tail: 57 ANDQ $3, TAIL // TAIL = TAIL % 4 58 JE dscal_end // if TAIL == 0 { return } 59 60 dscal_tail_loop: // do { 61 MOVUPS (SRC), X2 // X_i = x[i] 62 MULPD ALPHA, X2 // X_i *= ALPHA 63 MOVUPS X2, (DST) // x[i] = X_i 64 ADDQ INC, SRC // SRC += INC 65 DECQ TAIL 66 JNZ dscal_tail_loop // } while --TAIL > 0 67 68 dscal_end: 69 RET