github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/memmove_arm.s (about)

     1  // Inferno's libkern/memmove-arm.s
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "../../cmd/ld/textflag.h"
    27  
    28  // TE or TS are spilled to the stack during bulk register moves.
    29  TS = 0
    30  TE = 8
    31  
    32  // Warning: the linker will use R11 to synthesize certain instructions. Please
    33  // take care and double check with objdump.
    34  FROM = 11
    35  N = 12
    36  TMP = 12				/* N and TMP don't overlap */
    37  TMP1 = 5
    38  
    39  RSHIFT = 5
    40  LSHIFT = 6
    41  OFFSET = 7
    42  
    43  BR0 = 0					/* shared with TS */
    44  BW0 = 1
    45  BR1 = 1
    46  BW1 = 2
    47  BR2 = 2
    48  BW2 = 3
    49  BR3 = 3
    50  BW3 = 4
    51  
    52  FW0 = 1
    53  FR0 = 2
    54  FW1 = 2
    55  FR1 = 3
    56  FW2 = 3
    57  FR2 = 4
    58  FW3 = 4
    59  FR3 = 8					/* shared with TE */
    60  
    61  TEXT runtime·memmove(SB), NOSPLIT, $4-12
    62  _memmove:
    63  	MOVW	to+0(FP), R(TS)
    64  	MOVW	from+4(FP), R(FROM)
    65  	MOVW	n+8(FP), R(N)
    66  
    67  	ADD	R(N), R(TS), R(TE)	/* to end pointer */
    68  
    69  	CMP	R(FROM), R(TS)
    70  	BLS	_forward
    71  
    72  _back:
    73  	ADD	R(N), R(FROM)		/* from end pointer */
    74  	CMP	$4, R(N)		/* need at least 4 bytes to copy */
    75  	BLT	_b1tail
    76  
    77  _b4align:				/* align destination on 4 */
    78  	AND.S	$3, R(TE), R(TMP)
    79  	BEQ	_b4aligned
    80  
    81  	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
    82  	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
    83  	B	_b4align
    84  
    85  _b4aligned:				/* is source now aligned? */
    86  	AND.S	$3, R(FROM), R(TMP)
    87  	BNE	_bunaligned
    88  
    89  	ADD	$31, R(TS), R(TMP)	/* do 32-byte chunks if possible */
    90  	MOVW	R(TS), savedts-4(SP)
    91  _b32loop:
    92  	CMP	R(TMP), R(TE)
    93  	BLS	_b4tail
    94  
    95  	MOVM.DB.W (R(FROM)), [R0-R7]
    96  	MOVM.DB.W [R0-R7], (R(TE))
    97  	B	_b32loop
    98  
    99  _b4tail:				/* do remaining words if possible */
   100  	MOVW	savedts-4(SP), R(TS)
   101  	ADD	$3, R(TS), R(TMP)
   102  _b4loop:
   103  	CMP	R(TMP), R(TE)
   104  	BLS	_b1tail
   105  
   106  	MOVW.W	-4(R(FROM)), R(TMP1)	/* pre-indexed */
   107  	MOVW.W	R(TMP1), -4(R(TE))	/* pre-indexed */
   108  	B	_b4loop
   109  
   110  _b1tail:				/* remaining bytes */
   111  	CMP	R(TE), R(TS)
   112  	BEQ	_return
   113  
   114  	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
   115  	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
   116  	B	_b1tail
   117  
   118  _forward:
   119  	CMP	$4, R(N)		/* need at least 4 bytes to copy */
   120  	BLT	_f1tail
   121  
   122  _f4align:				/* align destination on 4 */
   123  	AND.S	$3, R(TS), R(TMP)
   124  	BEQ	_f4aligned
   125  
   126  	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
   127  	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
   128  	B	_f4align
   129  
   130  _f4aligned:				/* is source now aligned? */
   131  	AND.S	$3, R(FROM), R(TMP)
   132  	BNE	_funaligned
   133  
   134  	SUB	$31, R(TE), R(TMP)	/* do 32-byte chunks if possible */
   135  	MOVW	R(TE), savedte-4(SP)
   136  _f32loop:
   137  	CMP	R(TMP), R(TS)
   138  	BHS	_f4tail
   139  
   140  	MOVM.IA.W (R(FROM)), [R1-R8] 
   141  	MOVM.IA.W [R1-R8], (R(TS))
   142  	B	_f32loop
   143  
   144  _f4tail:
   145  	MOVW	savedte-4(SP), R(TE)
   146  	SUB	$3, R(TE), R(TMP)	/* do remaining words if possible */
   147  _f4loop:
   148  	CMP	R(TMP), R(TS)
   149  	BHS	_f1tail
   150  
   151  	MOVW.P	4(R(FROM)), R(TMP1)	/* implicit write back */
   152  	MOVW.P	R(TMP1), 4(R(TS))	/* implicit write back */
   153  	B	_f4loop
   154  
   155  _f1tail:
   156  	CMP	R(TS), R(TE)
   157  	BEQ	_return
   158  
   159  	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
   160  	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
   161  	B	_f1tail
   162  
   163  _return:
   164  	MOVW	to+0(FP), R0
   165  	RET
   166  
   167  _bunaligned:
   168  	CMP	$2, R(TMP)		/* is R(TMP) < 2 ? */
   169  
   170  	MOVW.LT	$8, R(RSHIFT)		/* (R(n)<<24)|(R(n-1)>>8) */
   171  	MOVW.LT	$24, R(LSHIFT)
   172  	MOVW.LT	$1, R(OFFSET)
   173  
   174  	MOVW.EQ	$16, R(RSHIFT)		/* (R(n)<<16)|(R(n-1)>>16) */
   175  	MOVW.EQ	$16, R(LSHIFT)
   176  	MOVW.EQ	$2, R(OFFSET)
   177  
   178  	MOVW.GT	$24, R(RSHIFT)		/* (R(n)<<8)|(R(n-1)>>24) */
   179  	MOVW.GT	$8, R(LSHIFT)
   180  	MOVW.GT	$3, R(OFFSET)
   181  
   182  	ADD	$16, R(TS), R(TMP)	/* do 16-byte chunks if possible */
   183  	CMP	R(TMP), R(TE)
   184  	BLS	_b1tail
   185  
   186  	BIC	$3, R(FROM)		/* align source */
   187  	MOVW	R(TS), savedts-4(SP)
   188  	MOVW	(R(FROM)), R(BR0)	/* prime first block register */
   189  
   190  _bu16loop:
   191  	CMP	R(TMP), R(TE)
   192  	BLS	_bu1tail
   193  
   194  	MOVW	R(BR0)<<R(LSHIFT), R(BW3)
   195  	MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)]
   196  	ORR	R(BR3)>>R(RSHIFT), R(BW3)
   197  
   198  	MOVW	R(BR3)<<R(LSHIFT), R(BW2)
   199  	ORR	R(BR2)>>R(RSHIFT), R(BW2)
   200  
   201  	MOVW	R(BR2)<<R(LSHIFT), R(BW1)
   202  	ORR	R(BR1)>>R(RSHIFT), R(BW1)
   203  
   204  	MOVW	R(BR1)<<R(LSHIFT), R(BW0)
   205  	ORR	R(BR0)>>R(RSHIFT), R(BW0)
   206  
   207  	MOVM.DB.W [R(BW0)-R(BW3)], (R(TE))
   208  	B	_bu16loop
   209  
   210  _bu1tail:
   211  	MOVW	savedts-4(SP), R(TS)
   212  	ADD	R(OFFSET), R(FROM)
   213  	B	_b1tail
   214  
   215  _funaligned:
   216  	CMP	$2, R(TMP)
   217  
   218  	MOVW.LT	$8, R(RSHIFT)		/* (R(n+1)<<24)|(R(n)>>8) */
   219  	MOVW.LT	$24, R(LSHIFT)
   220  	MOVW.LT	$3, R(OFFSET)
   221  
   222  	MOVW.EQ	$16, R(RSHIFT)		/* (R(n+1)<<16)|(R(n)>>16) */
   223  	MOVW.EQ	$16, R(LSHIFT)
   224  	MOVW.EQ	$2, R(OFFSET)
   225  
   226  	MOVW.GT	$24, R(RSHIFT)		/* (R(n+1)<<8)|(R(n)>>24) */
   227  	MOVW.GT	$8, R(LSHIFT)
   228  	MOVW.GT	$1, R(OFFSET)
   229  
   230  	SUB	$16, R(TE), R(TMP)	/* do 16-byte chunks if possible */
   231  	CMP	R(TMP), R(TS)
   232  	BHS	_f1tail
   233  
   234  	BIC	$3, R(FROM)		/* align source */
   235  	MOVW	R(TE), savedte-4(SP)
   236  	MOVW.P	4(R(FROM)), R(FR3)	/* prime last block register, implicit write back */
   237  
   238  _fu16loop:
   239  	CMP	R(TMP), R(TS)
   240  	BHS	_fu1tail
   241  
   242  	MOVW	R(FR3)>>R(RSHIFT), R(FW0)
   243  	MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)]
   244  	ORR	R(FR0)<<R(LSHIFT), R(FW0)
   245  
   246  	MOVW	R(FR0)>>R(RSHIFT), R(FW1)
   247  	ORR	R(FR1)<<R(LSHIFT), R(FW1)
   248  
   249  	MOVW	R(FR1)>>R(RSHIFT), R(FW2)
   250  	ORR	R(FR2)<<R(LSHIFT), R(FW2)
   251  
   252  	MOVW	R(FR2)>>R(RSHIFT), R(FW3)
   253  	ORR	R(FR3)<<R(LSHIFT), R(FW3)
   254  
   255  	MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS))
   256  	B	_fu16loop
   257  
   258  _fu1tail:
   259  	MOVW	savedte-4(SP), R(TE)
   260  	SUB	R(OFFSET), R(FROM)
   261  	B	_f1tail