github.com/gopherd/gonum@v0.0.4/internal/asm/f64/l1norm_amd64.s (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !noasm,!gccgo,!safe 6 7 #include "textflag.h" 8 9 // func L1Dist(s, t []float64) float64 10 TEXT ·L1Dist(SB), NOSPLIT, $0 11 MOVQ s_base+0(FP), DI // DI = &s 12 MOVQ t_base+24(FP), SI // SI = &t 13 MOVQ s_len+8(FP), CX // CX = len(s) 14 CMPQ t_len+32(FP), CX // CX = max( CX, len(t) ) 15 CMOVQLE t_len+32(FP), CX 16 PXOR X3, X3 // norm = 0 17 CMPQ CX, $0 // if CX == 0 { return 0 } 18 JE l1_end 19 XORQ AX, AX // i = 0 20 MOVQ CX, BX 21 ANDQ $1, BX // BX = CX % 2 22 SHRQ $1, CX // CX = floor( CX / 2 ) 23 JZ l1_tail_start // if CX == 0 { return 0 } 24 25 l1_loop: // Loop unrolled 2x do { 26 MOVUPS (SI)(AX*8), X0 // X0 = t[i:i+1] 27 MOVUPS (DI)(AX*8), X1 // X1 = s[i:i+1] 28 MOVAPS X0, X2 29 SUBPD X1, X0 30 SUBPD X2, X1 31 MAXPD X1, X0 // X0 = max( X0 - X1, X1 - X0 ) 32 ADDPD X0, X3 // norm += X0 33 ADDQ $2, AX // i += 2 34 LOOP l1_loop // } while --CX > 0 35 CMPQ BX, $0 // if BX == 0 { return } 36 JE l1_end 37 38 l1_tail_start: // Reset loop registers 39 MOVQ BX, CX // Loop counter: CX = BX 40 PXOR X0, X0 // reset X0, X1 to break dependencies 41 PXOR X1, X1 42 43 l1_tail: 44 MOVSD (SI)(AX*8), X0 // X0 = t[i] 45 MOVSD (DI)(AX*8), X1 // x1 = s[i] 46 MOVAPD X0, X2 47 SUBSD X1, X0 48 SUBSD X2, X1 49 MAXSD X1, X0 // X0 = max( X0 - X1, X1 - X0 ) 50 ADDSD X0, X3 // norm += X0 51 52 l1_end: 53 MOVAPS X3, X2 54 SHUFPD $1, X2, X2 55 ADDSD X3, X2 // X2 = X3[1] + X3[0] 56 MOVSD X2, ret+48(FP) // return X2 57 RET 58