github.com/gopherd/gonum@v0.0.4/internal/asm/c128/dscalunitary_amd64.s (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !noasm,!gccgo,!safe 6 7 #include "textflag.h" 8 9 #define SRC SI 10 #define DST SI 11 #define LEN CX 12 #define IDX AX 13 #define TAIL BX 14 #define ALPHA X0 15 #define ALPHA_2 X1 16 17 #define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0 18 19 // func DscalUnitary(alpha float64, x []complex128) 20 TEXT ·DscalUnitary(SB), NOSPLIT, $0 21 MOVQ x_base+8(FP), SRC // SRC = &x 22 MOVQ x_len+16(FP), LEN // LEN = len(x) 23 CMPQ LEN, $0 // if LEN == 0 { return } 24 JE dscal_end 25 26 MOVDDUP_ALPHA // ALPHA = alpha 27 XORQ IDX, IDX // IDX = 0 28 MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining 29 MOVQ LEN, TAIL // TAIL = LEN 30 SHRQ $2, LEN // LEN = floor( n / 4 ) 31 JZ dscal_tail // if LEN == 0 { goto dscal_tail } 32 33 dscal_loop: // do { 34 MOVUPS (SRC)(IDX*8), X2 // X_i = x[i] 35 MOVUPS 16(SRC)(IDX*8), X3 36 MOVUPS 32(SRC)(IDX*8), X4 37 MOVUPS 48(SRC)(IDX*8), X5 38 39 MULPD ALPHA, X2 // X_i *= ALPHA 40 MULPD ALPHA_2, X3 41 MULPD ALPHA, X4 42 MULPD ALPHA_2, X5 43 44 MOVUPS X2, (DST)(IDX*8) // x[i] = X_i 45 MOVUPS X3, 16(DST)(IDX*8) 46 MOVUPS X4, 32(DST)(IDX*8) 47 MOVUPS X5, 48(DST)(IDX*8) 48 49 ADDQ $8, IDX // IDX += 8 50 DECQ LEN 51 JNZ dscal_loop // } while --LEN > 0 52 53 dscal_tail: 54 ANDQ $3, TAIL // TAIL = TAIL % 4 55 JZ dscal_end // if TAIL == 0 { return } 56 57 dscal_tail_loop: // do { 58 MOVUPS (SRC)(IDX*8), X2 // X_i = x[i] 59 MULPD ALPHA, X2 // X_i *= ALPHA 60 MOVUPS X2, (DST)(IDX*8) // x[i] = X_i 61 ADDQ $2, IDX // IDX += 2 62 DECQ TAIL 63 JNZ dscal_tail_loop // } while --TAIL > 0 64 65 dscal_end: 66 RET