github.com/gopherd/gonum@v0.0.4/internal/asm/c128/dscalinc_amd64.s (about)

     1  // Copyright ©2017 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !noasm,!gccgo,!safe
     6  
     7  #include "textflag.h"
     8  
     9  #define SRC SI
    10  #define DST SI
    11  #define LEN CX
    12  #define TAIL BX
    13  #define INC R9
    14  #define INC3 R10
    15  #define ALPHA X0
    16  #define ALPHA_2 X1
    17  
    18  #define MOVDDUP_ALPHA    LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
    19  
    20  // func DscalInc(alpha float64, x []complex128, n, inc uintptr)
    21  TEXT ·DscalInc(SB), NOSPLIT, $0
    22  	MOVQ x_base+8(FP), SRC // SRC = &x
    23  	MOVQ n+32(FP), LEN     // LEN = n
    24  	CMPQ LEN, $0           // if LEN == 0 { return }
    25  	JE   dscal_end
    26  
    27  	MOVDDUP_ALPHA             // ALPHA = alpha
    28  	MOVQ   inc+40(FP), INC    // INC = inc
    29  	SHLQ   $4, INC            // INC = INC * sizeof(complex128)
    30  	LEAQ   (INC)(INC*2), INC3 // INC3 = 3 * INC
    31  	MOVUPS ALPHA, ALPHA_2     // Copy ALPHA and ALPHA_2 for pipelining
    32  	MOVQ   LEN, TAIL          // TAIL = LEN
    33  	SHRQ   $2, LEN            // LEN = floor( n / 4 )
    34  	JZ     dscal_tail         // if LEN == 0 { goto dscal_tail }
    35  
    36  dscal_loop: // do {
    37  	MOVUPS (SRC), X2         // X_i = x[i]
    38  	MOVUPS (SRC)(INC*1), X3
    39  	MOVUPS (SRC)(INC*2), X4
    40  	MOVUPS (SRC)(INC3*1), X5
    41  
    42  	MULPD ALPHA, X2   // X_i *= ALPHA
    43  	MULPD ALPHA_2, X3
    44  	MULPD ALPHA, X4
    45  	MULPD ALPHA_2, X5
    46  
    47  	MOVUPS X2, (DST)         // x[i] = X_i
    48  	MOVUPS X3, (DST)(INC*1)
    49  	MOVUPS X4, (DST)(INC*2)
    50  	MOVUPS X5, (DST)(INC3*1)
    51  
    52  	LEAQ (SRC)(INC*4), SRC // SRC += INC*4
    53  	DECQ LEN
    54  	JNZ  dscal_loop        // } while --LEN > 0
    55  
    56  dscal_tail:
    57  	ANDQ $3, TAIL  // TAIL = TAIL % 4
    58  	JE   dscal_end // if TAIL == 0 { return }
    59  
    60  dscal_tail_loop: // do {
    61  	MOVUPS (SRC), X2       // X_i = x[i]
    62  	MULPD  ALPHA, X2       // X_i *= ALPHA
    63  	MOVUPS X2, (DST)       // x[i] = X_i
    64  	ADDQ   INC, SRC        // SRC += INC
    65  	DECQ   TAIL
    66  	JNZ    dscal_tail_loop // } while --TAIL > 0
    67  
    68  dscal_end:
    69  	RET