github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/memmove_arm.s (about)

     1  // Inferno's libkern/memmove-arm.s
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "textflag.h"
    27  
    28  // TE or TS are spilled to the stack during bulk register moves.
    29  #define TS	R0
    30  #define TE	R8
    31  
    32  // Warning: the linker will use R11 to synthesize certain instructions. Please
    33  // take care and double check with objdump.
    34  #define FROM	R11
    35  #define N	R12
    36  #define TMP	R12				/* N and TMP don't overlap */
    37  #define TMP1	R5
    38  
    39  #define RSHIFT	R5
    40  #define LSHIFT	R6
    41  #define OFFSET	R7
    42  
    43  #define BR0	R0					/* shared with TS */
    44  #define BW0	R1
    45  #define BR1	R1
    46  #define BW1	R2
    47  #define BR2	R2
    48  #define BW2	R3
    49  #define BR3	R3
    50  #define BW3	R4
    51  
    52  #define FW0	R1
    53  #define FR0	R2
    54  #define FW1	R2
    55  #define FR1	R3
    56  #define FW2	R3
    57  #define FR2	R4
    58  #define FW3	R4
    59  #define FR3	R8					/* shared with TE */
    60  
    61  // func memmove(to, from unsafe.Pointer, n uintptr)
    62  TEXT runtime·memmove(SB), NOSPLIT, $4-12
    63  _memmove:
    64  	MOVW	to+0(FP), TS
    65  	MOVW	from+4(FP), FROM
    66  	MOVW	n+8(FP), N
    67  
    68  	ADD	N, TS, TE	/* to end pointer */
    69  
    70  	CMP	FROM, TS
    71  	BLS	_forward
    72  
    73  _back:
    74  	ADD	N, FROM		/* from end pointer */
    75  	CMP	$4, N		/* need at least 4 bytes to copy */
    76  	BLT	_b1tail
    77  
    78  _b4align:				/* align destination on 4 */
    79  	AND.S	$3, TE, TMP
    80  	BEQ	_b4aligned
    81  
    82  	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    83  	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    84  	B	_b4align
    85  
    86  _b4aligned:				/* is source now aligned? */
    87  	AND.S	$3, FROM, TMP
    88  	BNE	_bunaligned
    89  
    90  	ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
    91  	MOVW	TS, savedts-4(SP)
    92  _b32loop:
    93  	CMP	TMP, TE
    94  	BLS	_b4tail
    95  
    96  	MOVM.DB.W (FROM), [R0-R7]
    97  	MOVM.DB.W [R0-R7], (TE)
    98  	B	_b32loop
    99  
   100  _b4tail:				/* do remaining words if possible */
   101  	MOVW	savedts-4(SP), TS
   102  	ADD	$3, TS, TMP
   103  _b4loop:
   104  	CMP	TMP, TE
   105  	BLS	_b1tail
   106  
   107  	MOVW.W	-4(FROM), TMP1	/* pre-indexed */
   108  	MOVW.W	TMP1, -4(TE)	/* pre-indexed */
   109  	B	_b4loop
   110  
   111  _b1tail:				/* remaining bytes */
   112  	CMP	TE, TS
   113  	BEQ	_return
   114  
   115  	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
   116  	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
   117  	B	_b1tail
   118  
   119  _forward:
   120  	CMP	$4, N		/* need at least 4 bytes to copy */
   121  	BLT	_f1tail
   122  
   123  _f4align:				/* align destination on 4 */
   124  	AND.S	$3, TS, TMP
   125  	BEQ	_f4aligned
   126  
   127  	MOVBU.P	1(FROM), TMP	/* implicit write back */
   128  	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   129  	B	_f4align
   130  
   131  _f4aligned:				/* is source now aligned? */
   132  	AND.S	$3, FROM, TMP
   133  	BNE	_funaligned
   134  
   135  	SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
   136  	MOVW	TE, savedte-4(SP)
   137  _f32loop:
   138  	CMP	TMP, TS
   139  	BHS	_f4tail
   140  
   141  	MOVM.IA.W (FROM), [R1-R8]
   142  	MOVM.IA.W [R1-R8], (TS)
   143  	B	_f32loop
   144  
   145  _f4tail:
   146  	MOVW	savedte-4(SP), TE
   147  	SUB	$3, TE, TMP	/* do remaining words if possible */
   148  _f4loop:
   149  	CMP	TMP, TS
   150  	BHS	_f1tail
   151  
   152  	MOVW.P	4(FROM), TMP1	/* implicit write back */
   153  	MOVW.P	TMP1, 4(TS)	/* implicit write back */
   154  	B	_f4loop
   155  
   156  _f1tail:
   157  	CMP	TS, TE
   158  	BEQ	_return
   159  
   160  	MOVBU.P	1(FROM), TMP	/* implicit write back */
   161  	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   162  	B	_f1tail
   163  
   164  _return:
   165  	MOVW	to+0(FP), R0
   166  	RET
   167  
   168  _bunaligned:
   169  	CMP	$2, TMP		/* is TMP < 2 ? */
   170  
   171  	MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
   172  	MOVW.LT	$24, LSHIFT
   173  	MOVW.LT	$1, OFFSET
   174  
   175  	MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
   176  	MOVW.EQ	$16, LSHIFT
   177  	MOVW.EQ	$2, OFFSET
   178  
   179  	MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
   180  	MOVW.GT	$8, LSHIFT
   181  	MOVW.GT	$3, OFFSET
   182  
   183  	ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
   184  	CMP	TMP, TE
   185  	BLS	_b1tail
   186  
   187  	BIC	$3, FROM		/* align source */
   188  	MOVW	TS, savedts-4(SP)
   189  	MOVW	(FROM), BR0	/* prime first block register */
   190  
   191  _bu16loop:
   192  	CMP	TMP, TE
   193  	BLS	_bu1tail
   194  
   195  	MOVW	BR0<<LSHIFT, BW3
   196  	MOVM.DB.W (FROM), [BR0-BR3]
   197  	ORR	BR3>>RSHIFT, BW3
   198  
   199  	MOVW	BR3<<LSHIFT, BW2
   200  	ORR	BR2>>RSHIFT, BW2
   201  
   202  	MOVW	BR2<<LSHIFT, BW1
   203  	ORR	BR1>>RSHIFT, BW1
   204  
   205  	MOVW	BR1<<LSHIFT, BW0
   206  	ORR	BR0>>RSHIFT, BW0
   207  
   208  	MOVM.DB.W [BW0-BW3], (TE)
   209  	B	_bu16loop
   210  
   211  _bu1tail:
   212  	MOVW	savedts-4(SP), TS
   213  	ADD	OFFSET, FROM
   214  	B	_b1tail
   215  
   216  _funaligned:
   217  	CMP	$2, TMP
   218  
   219  	MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
   220  	MOVW.LT	$24, LSHIFT
   221  	MOVW.LT	$3, OFFSET
   222  
   223  	MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
   224  	MOVW.EQ	$16, LSHIFT
   225  	MOVW.EQ	$2, OFFSET
   226  
   227  	MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
   228  	MOVW.GT	$8, LSHIFT
   229  	MOVW.GT	$1, OFFSET
   230  
   231  	SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
   232  	CMP	TMP, TS
   233  	BHS	_f1tail
   234  
   235  	BIC	$3, FROM		/* align source */
   236  	MOVW	TE, savedte-4(SP)
   237  	MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
   238  
   239  _fu16loop:
   240  	CMP	TMP, TS
   241  	BHS	_fu1tail
   242  
   243  	MOVW	FR3>>RSHIFT, FW0
   244  	MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
   245  	ORR	FR0<<LSHIFT, FW0
   246  
   247  	MOVW	FR0>>RSHIFT, FW1
   248  	ORR	FR1<<LSHIFT, FW1
   249  
   250  	MOVW	FR1>>RSHIFT, FW2
   251  	ORR	FR2<<LSHIFT, FW2
   252  
   253  	MOVW	FR2>>RSHIFT, FW3
   254  	ORR	FR3<<LSHIFT, FW3
   255  
   256  	MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
   257  	B	_fu16loop
   258  
   259  _fu1tail:
   260  	MOVW	savedte-4(SP), TE
   261  	SUB	OFFSET, FROM
   262  	B	_f1tail