github.com/gopherd/gonum@v0.0.4/internal/asm/c128/dscalunitary_amd64.s (about)

     1  // Copyright ©2017 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !noasm,!gccgo,!safe
     6  
     7  #include "textflag.h"
     8  
     9  #define SRC SI
    10  #define DST SI
    11  #define LEN CX
    12  #define IDX AX
    13  #define TAIL BX
    14  #define ALPHA X0
    15  #define ALPHA_2 X1
    16  
    17  #define MOVDDUP_ALPHA    LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
    18  
    19  // func DscalUnitary(alpha float64, x []complex128)
    20  TEXT ·DscalUnitary(SB), NOSPLIT, $0
    21  	MOVQ x_base+8(FP), SRC // SRC = &x
    22  	MOVQ x_len+16(FP), LEN // LEN = len(x)
    23  	CMPQ LEN, $0           // if LEN == 0 { return }
    24  	JE   dscal_end
    25  
    26  	MOVDDUP_ALPHA         // ALPHA = alpha
    27  	XORQ   IDX, IDX       // IDX = 0
    28  	MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
    29  	MOVQ   LEN, TAIL      // TAIL = LEN
    30  	SHRQ   $2, LEN        // LEN = floor( n / 4 )
    31  	JZ     dscal_tail     // if LEN == 0 { goto dscal_tail }
    32  
    33  dscal_loop: // do {
    34  	MOVUPS (SRC)(IDX*8), X2   // X_i = x[i]
    35  	MOVUPS 16(SRC)(IDX*8), X3
    36  	MOVUPS 32(SRC)(IDX*8), X4
    37  	MOVUPS 48(SRC)(IDX*8), X5
    38  
    39  	MULPD ALPHA, X2   // X_i *= ALPHA
    40  	MULPD ALPHA_2, X3
    41  	MULPD ALPHA, X4
    42  	MULPD ALPHA_2, X5
    43  
    44  	MOVUPS X2, (DST)(IDX*8)   // x[i] = X_i
    45  	MOVUPS X3, 16(DST)(IDX*8)
    46  	MOVUPS X4, 32(DST)(IDX*8)
    47  	MOVUPS X5, 48(DST)(IDX*8)
    48  
    49  	ADDQ $8, IDX    // IDX += 8
    50  	DECQ LEN
    51  	JNZ  dscal_loop // } while --LEN > 0
    52  
    53  dscal_tail:
    54  	ANDQ $3, TAIL  // TAIL = TAIL % 4
    55  	JZ   dscal_end // if TAIL == 0 { return }
    56  
    57  dscal_tail_loop: // do {
    58  	MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
    59  	MULPD  ALPHA, X2        // X_i *= ALPHA
    60  	MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
    61  	ADDQ   $2, IDX          // IDX += 2
    62  	DECQ   TAIL
    63  	JNZ    dscal_tail_loop  // } while --TAIL > 0
    64  
    65  dscal_end:
    66  	RET