github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/memmove_amd64.s (about)

     1  // Derived from Inferno's libkern/memmove-386.s (adapted for amd64)
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "../../cmd/ld/textflag.h"
    27  
    28  // void runtime·memmove(void*, void*, uintptr)
    29  TEXT runtime·memmove(SB), NOSPLIT, $0-24
    30  
    31  	MOVQ	to+0(FP), DI
    32  	MOVQ	fr+8(FP), SI
    33  	MOVQ	n+16(FP), BX
    34  
    35  	// REP instructions have a high startup cost, so we handle small sizes
    36  	// with some straightline code.  The REP MOVSQ instruction is really fast
    37  	// for large sizes.  The cutover is approximately 1K.  We implement up to
    38  	// 256 because that is the maximum SSE register load (loading all data
    39  	// into registers lets us ignore copy direction).
    40  tail:
    41  	TESTQ	BX, BX
    42  	JEQ	move_0
    43  	CMPQ	BX, $2
    44  	JBE	move_1or2
    45  	CMPQ	BX, $4
    46  	JBE	move_3or4
    47  	CMPQ	BX, $8
    48  	JBE	move_5through8
    49  	CMPQ	BX, $16
    50  	JBE	move_9through16
    51  	CMPQ	BX, $32
    52  	JBE	move_17through32
    53  	CMPQ	BX, $64
    54  	JBE	move_33through64
    55  	CMPQ	BX, $128
    56  	JBE	move_65through128
    57  	CMPQ	BX, $256
    58  	JBE	move_129through256
    59  	// TODO: use branch table and BSR to make this just a single dispatch
    60  
    61  /*
    62   * check and set for backwards
    63   */
    64  	CMPQ	SI, DI
    65  	JLS	back
    66  
    67  /*
    68   * forward copy loop
    69   */
    70  forward:
    71  	MOVQ	BX, CX
    72  	SHRQ	$3, CX
    73  	ANDQ	$7, BX
    74  
    75  	REP;	MOVSQ
    76  	JMP	tail
    77  
    78  back:
    79  /*
    80   * check overlap
    81   */
    82  	MOVQ	SI, CX
    83  	ADDQ	BX, CX
    84  	CMPQ	CX, DI
    85  	JLS	forward
    86  	
    87  /*
    88   * whole thing backwards has
    89   * adjusted addresses
    90   */
    91  	ADDQ	BX, DI
    92  	ADDQ	BX, SI
    93  	STD
    94  
    95  /*
    96   * copy
    97   */
    98  	MOVQ	BX, CX
    99  	SHRQ	$3, CX
   100  	ANDQ	$7, BX
   101  
   102  	SUBQ	$8, DI
   103  	SUBQ	$8, SI
   104  	REP;	MOVSQ
   105  
   106  	CLD
   107  	ADDQ	$8, DI
   108  	ADDQ	$8, SI
   109  	SUBQ	BX, DI
   110  	SUBQ	BX, SI
   111  	JMP	tail
   112  
   113  move_1or2:
   114  	MOVB	(SI), AX
   115  	MOVB	-1(SI)(BX*1), CX
   116  	MOVB	AX, (DI)
   117  	MOVB	CX, -1(DI)(BX*1)
   118  move_0:
   119  	RET
   120  move_3or4:
   121  	MOVW	(SI), AX
   122  	MOVW	-2(SI)(BX*1), CX
   123  	MOVW	AX, (DI)
   124  	MOVW	CX, -2(DI)(BX*1)
   125  	RET
   126  move_5through8:
   127  	MOVL	(SI), AX
   128  	MOVL	-4(SI)(BX*1), CX
   129  	MOVL	AX, (DI)
   130  	MOVL	CX, -4(DI)(BX*1)
   131  	RET
   132  move_9through16:
   133  	MOVQ	(SI), AX
   134  	MOVQ	-8(SI)(BX*1), CX
   135  	MOVQ	AX, (DI)
   136  	MOVQ	CX, -8(DI)(BX*1)
   137  	RET
   138  move_17through32:
   139  	MOVOU	(SI), X0
   140  	MOVOU	-16(SI)(BX*1), X1
   141  	MOVOU	X0, (DI)
   142  	MOVOU	X1, -16(DI)(BX*1)
   143  	RET
   144  move_33through64:
   145  	MOVOU	(SI), X0
   146  	MOVOU	16(SI), X1
   147  	MOVOU	-32(SI)(BX*1), X2
   148  	MOVOU	-16(SI)(BX*1), X3
   149  	MOVOU	X0, (DI)
   150  	MOVOU	X1, 16(DI)
   151  	MOVOU	X2, -32(DI)(BX*1)
   152  	MOVOU	X3, -16(DI)(BX*1)
   153  	RET
   154  move_65through128:
   155  	MOVOU	(SI), X0
   156  	MOVOU	16(SI), X1
   157  	MOVOU	32(SI), X2
   158  	MOVOU	48(SI), X3
   159  	MOVOU	-64(SI)(BX*1), X4
   160  	MOVOU	-48(SI)(BX*1), X5
   161  	MOVOU	-32(SI)(BX*1), X6
   162  	MOVOU	-16(SI)(BX*1), X7
   163  	MOVOU	X0, (DI)
   164  	MOVOU	X1, 16(DI)
   165  	MOVOU	X2, 32(DI)
   166  	MOVOU	X3, 48(DI)
   167  	MOVOU	X4, -64(DI)(BX*1)
   168  	MOVOU	X5, -48(DI)(BX*1)
   169  	MOVOU	X6, -32(DI)(BX*1)
   170  	MOVOU	X7, -16(DI)(BX*1)
   171  	RET
   172  move_129through256:
   173  	MOVOU	(SI), X0
   174  	MOVOU	16(SI), X1
   175  	MOVOU	32(SI), X2
   176  	MOVOU	48(SI), X3
   177  	MOVOU	64(SI), X4
   178  	MOVOU	80(SI), X5
   179  	MOVOU	96(SI), X6
   180  	MOVOU	112(SI), X7
   181  	MOVOU	-128(SI)(BX*1), X8
   182  	MOVOU	-112(SI)(BX*1), X9
   183  	MOVOU	-96(SI)(BX*1), X10
   184  	MOVOU	-80(SI)(BX*1), X11
   185  	MOVOU	-64(SI)(BX*1), X12
   186  	MOVOU	-48(SI)(BX*1), X13
   187  	MOVOU	-32(SI)(BX*1), X14
   188  	MOVOU	-16(SI)(BX*1), X15
   189  	MOVOU	X0, (DI)
   190  	MOVOU	X1, 16(DI)
   191  	MOVOU	X2, 32(DI)
   192  	MOVOU	X3, 48(DI)
   193  	MOVOU	X4, 64(DI)
   194  	MOVOU	X5, 80(DI)
   195  	MOVOU	X6, 96(DI)
   196  	MOVOU	X7, 112(DI)
   197  	MOVOU	X8, -128(DI)(BX*1)
   198  	MOVOU	X9, -112(DI)(BX*1)
   199  	MOVOU	X10, -96(DI)(BX*1)
   200  	MOVOU	X11, -80(DI)(BX*1)
   201  	MOVOU	X12, -64(DI)(BX*1)
   202  	MOVOU	X13, -48(DI)(BX*1)
   203  	MOVOU	X14, -32(DI)(BX*1)
   204  	MOVOU	X15, -16(DI)(BX*1)
   205  	RET