github.com/aloncn/graphics-go@v0.0.1/src/runtime/memmove_arm.s

github.com/aloncn/graphics-go@v0.0.1/src/runtime/memmove_arm.s (about)

     1  // Inferno's libkern/memmove-arm.s
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "textflag.h"
    27  
    28  // TE or TS are spilled to the stack during bulk register moves.
    29  #define TS	R0
    30  #define TE	R8
    31  
    32  // Warning: the linker will use R11 to synthesize certain instructions. Please
    33  // take care and double check with objdump.
    34  #define FROM	R11
    35  #define N	R12
    36  #define TMP	R12				/* N and TMP don't overlap */
    37  #define TMP1	R5
    38  
    39  #define RSHIFT	R5
    40  #define LSHIFT	R6
    41  #define OFFSET	R7
    42  
    43  #define BR0	R0					/* shared with TS */
    44  #define BW0	R1
    45  #define BR1	R1
    46  #define BW1	R2
    47  #define BR2	R2
    48  #define BW2	R3
    49  #define BR3	R3
    50  #define BW3	R4
    51  
    52  #define FW0	R1
    53  #define FR0	R2
    54  #define FW1	R2
    55  #define FR1	R3
    56  #define FW2	R3
    57  #define FR2	R4
    58  #define FW3	R4
    59  #define FR3	R8					/* shared with TE */
    60  
    61  TEXT runtime·memmove(SB), NOSPLIT, $4-12
    62  _memmove:
    63  	MOVW	to+0(FP), TS
    64  	MOVW	from+4(FP), FROM
    65  	MOVW	n+8(FP), N
    66  
    67  	ADD	N, TS, TE	/* to end pointer */
    68  
    69  	CMP	FROM, TS
    70  	BLS	_forward
    71  
    72  _back:
    73  	ADD	N, FROM		/* from end pointer */
    74  	CMP	$4, N		/* need at least 4 bytes to copy */
    75  	BLT	_b1tail
    76  
    77  _b4align:				/* align destination on 4 */
    78  	AND.S	$3, TE, TMP
    79  	BEQ	_b4aligned
    80  
    81  	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    82  	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    83  	B	_b4align
    84  
    85  _b4aligned:				/* is source now aligned? */
    86  	AND.S	$3, FROM, TMP
    87  	BNE	_bunaligned
    88  
    89  	ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
    90  	MOVW	TS, savedts-4(SP)
    91  _b32loop:
    92  	CMP	TMP, TE
    93  	BLS	_b4tail
    94  
    95  	MOVM.DB.W (FROM), [R0-R7]
    96  	MOVM.DB.W [R0-R7], (TE)
    97  	B	_b32loop
    98  
    99  _b4tail:				/* do remaining words if possible */
   100  	MOVW	savedts-4(SP), TS
   101  	ADD	$3, TS, TMP
   102  _b4loop:
   103  	CMP	TMP, TE
   104  	BLS	_b1tail
   105  
   106  	MOVW.W	-4(FROM), TMP1	/* pre-indexed */
   107  	MOVW.W	TMP1, -4(TE)	/* pre-indexed */
   108  	B	_b4loop
   109  
   110  _b1tail:				/* remaining bytes */
   111  	CMP	TE, TS
   112  	BEQ	_return
   113  
   114  	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
   115  	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
   116  	B	_b1tail
   117  
   118  _forward:
   119  	CMP	$4, N		/* need at least 4 bytes to copy */
   120  	BLT	_f1tail
   121  
   122  _f4align:				/* align destination on 4 */
   123  	AND.S	$3, TS, TMP
   124  	BEQ	_f4aligned
   125  
   126  	MOVBU.P	1(FROM), TMP	/* implicit write back */
   127  	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   128  	B	_f4align
   129  
   130  _f4aligned:				/* is source now aligned? */
   131  	AND.S	$3, FROM, TMP
   132  	BNE	_funaligned
   133  
   134  	SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
   135  	MOVW	TE, savedte-4(SP)
   136  _f32loop:
   137  	CMP	TMP, TS
   138  	BHS	_f4tail
   139  
   140  	MOVM.IA.W (FROM), [R1-R8] 
   141  	MOVM.IA.W [R1-R8], (TS)
   142  	B	_f32loop
   143  
   144  _f4tail:
   145  	MOVW	savedte-4(SP), TE
   146  	SUB	$3, TE, TMP	/* do remaining words if possible */
   147  _f4loop:
   148  	CMP	TMP, TS
   149  	BHS	_f1tail
   150  
   151  	MOVW.P	4(FROM), TMP1	/* implicit write back */
   152  	MOVW.P	TMP1, 4(TS)	/* implicit write back */
   153  	B	_f4loop
   154  
   155  _f1tail:
   156  	CMP	TS, TE
   157  	BEQ	_return
   158  
   159  	MOVBU.P	1(FROM), TMP	/* implicit write back */
   160  	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   161  	B	_f1tail
   162  
   163  _return:
   164  	MOVW	to+0(FP), R0
   165  	RET
   166  
   167  _bunaligned:
   168  	CMP	$2, TMP		/* is TMP < 2 ? */
   169  
   170  	MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
   171  	MOVW.LT	$24, LSHIFT
   172  	MOVW.LT	$1, OFFSET
   173  
   174  	MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
   175  	MOVW.EQ	$16, LSHIFT
   176  	MOVW.EQ	$2, OFFSET
   177  
   178  	MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
   179  	MOVW.GT	$8, LSHIFT
   180  	MOVW.GT	$3, OFFSET
   181  
   182  	ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
   183  	CMP	TMP, TE
   184  	BLS	_b1tail
   185  
   186  	BIC	$3, FROM		/* align source */
   187  	MOVW	TS, savedts-4(SP)
   188  	MOVW	(FROM), BR0	/* prime first block register */
   189  
   190  _bu16loop:
   191  	CMP	TMP, TE
   192  	BLS	_bu1tail
   193  
   194  	MOVW	BR0<<LSHIFT, BW3
   195  	MOVM.DB.W (FROM), [BR0-BR3]
   196  	ORR	BR3>>RSHIFT, BW3
   197  
   198  	MOVW	BR3<<LSHIFT, BW2
   199  	ORR	BR2>>RSHIFT, BW2
   200  
   201  	MOVW	BR2<<LSHIFT, BW1
   202  	ORR	BR1>>RSHIFT, BW1
   203  
   204  	MOVW	BR1<<LSHIFT, BW0
   205  	ORR	BR0>>RSHIFT, BW0
   206  
   207  	MOVM.DB.W [BW0-BW3], (TE)
   208  	B	_bu16loop
   209  
   210  _bu1tail:
   211  	MOVW	savedts-4(SP), TS
   212  	ADD	OFFSET, FROM
   213  	B	_b1tail
   214  
   215  _funaligned:
   216  	CMP	$2, TMP
   217  
   218  	MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
   219  	MOVW.LT	$24, LSHIFT
   220  	MOVW.LT	$3, OFFSET
   221  
   222  	MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
   223  	MOVW.EQ	$16, LSHIFT
   224  	MOVW.EQ	$2, OFFSET
   225  
   226  	MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
   227  	MOVW.GT	$8, LSHIFT
   228  	MOVW.GT	$1, OFFSET
   229  
   230  	SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
   231  	CMP	TMP, TS
   232  	BHS	_f1tail
   233  
   234  	BIC	$3, FROM		/* align source */
   235  	MOVW	TE, savedte-4(SP)
   236  	MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
   237  
   238  _fu16loop:
   239  	CMP	TMP, TS
   240  	BHS	_fu1tail
   241  
   242  	MOVW	FR3>>RSHIFT, FW0
   243  	MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
   244  	ORR	FR0<<LSHIFT, FW0
   245  
   246  	MOVW	FR0>>RSHIFT, FW1
   247  	ORR	FR1<<LSHIFT, FW1
   248  
   249  	MOVW	FR1>>RSHIFT, FW2
   250  	ORR	FR2<<LSHIFT, FW2
   251  
   252  	MOVW	FR2>>RSHIFT, FW3
   253  	ORR	FR3<<LSHIFT, FW3
   254  
   255  	MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
   256  	B	_fu16loop
   257  
   258  _fu1tail:
   259  	MOVW	savedte-4(SP), TE
   260  	SUB	OFFSET, FROM
   261  	B	_f1tail