github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/memmove_arm.s

github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/memmove_arm.s (about)

     1  // Inferno's libkern/memmove-arm.s
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  // TE or TS are spilled to the stack during bulk register moves.
    27  TS = 0
    28  TE = 8
    29  
    30  // Warning: the linker will use R11 to synthesize certain instructions. Please
    31  // take care and double check with objdump.
    32  FROM = 11
    33  N = 12
    34  TMP = 12				/* N and TMP don't overlap */
    35  TMP1 = 5
    36  
    37  RSHIFT = 5
    38  LSHIFT = 6
    39  OFFSET = 7
    40  
    41  BR0 = 0					/* shared with TS */
    42  BW0 = 1
    43  BR1 = 1
    44  BW1 = 2
    45  BR2 = 2
    46  BW2 = 3
    47  BR3 = 3
    48  BW3 = 4
    49  
    50  FW0 = 1
    51  FR0 = 2
    52  FW1 = 2
    53  FR1 = 3
    54  FW2 = 3
    55  FR2 = 4
    56  FW3 = 4
    57  FR3 = 8					/* shared with TE */
    58  
    59  TEXT runtime·memmove(SB), 7, $4
    60  _memmove:
    61  	MOVW	to+0(FP), R(TS)
    62  	MOVW	from+4(FP), R(FROM)
    63  	MOVW	n+8(FP), R(N)
    64  
    65  	ADD	R(N), R(TS), R(TE)	/* to end pointer */
    66  
    67  	CMP	R(FROM), R(TS)
    68  	BLS	_forward
    69  
    70  _back:
    71  	ADD	R(N), R(FROM)		/* from end pointer */
    72  	CMP	$4, R(N)		/* need at least 4 bytes to copy */
    73  	BLT	_b1tail
    74  
    75  _b4align:				/* align destination on 4 */
    76  	AND.S	$3, R(TE), R(TMP)
    77  	BEQ	_b4aligned
    78  
    79  	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
    80  	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
    81  	B	_b4align
    82  
    83  _b4aligned:				/* is source now aligned? */
    84  	AND.S	$3, R(FROM), R(TMP)
    85  	BNE	_bunaligned
    86  
    87  	ADD	$31, R(TS), R(TMP)	/* do 32-byte chunks if possible */
    88  	MOVW	R(TS), savedts+4(SP)
    89  _b32loop:
    90  	CMP	R(TMP), R(TE)
    91  	BLS	_b4tail
    92  
    93  	MOVM.DB.W (R(FROM)), [R0-R7]
    94  	MOVM.DB.W [R0-R7], (R(TE))
    95  	B	_b32loop
    96  
    97  _b4tail:				/* do remaining words if possible */
    98  	MOVW	savedts+4(SP), R(TS)
    99  	ADD	$3, R(TS), R(TMP)
   100  _b4loop:
   101  	CMP	R(TMP), R(TE)
   102  	BLS	_b1tail
   103  
   104  	MOVW.W	-4(R(FROM)), R(TMP1)	/* pre-indexed */
   105  	MOVW.W	R(TMP1), -4(R(TE))	/* pre-indexed */
   106  	B	_b4loop
   107  
   108  _b1tail:				/* remaining bytes */
   109  	CMP	R(TE), R(TS)
   110  	BEQ	_return
   111  
   112  	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
   113  	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
   114  	B	_b1tail
   115  
   116  _forward:
   117  	CMP	$4, R(N)		/* need at least 4 bytes to copy */
   118  	BLT	_f1tail
   119  
   120  _f4align:				/* align destination on 4 */
   121  	AND.S	$3, R(TS), R(TMP)
   122  	BEQ	_f4aligned
   123  
   124  	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
   125  	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
   126  	B	_f4align
   127  
   128  _f4aligned:				/* is source now aligned? */
   129  	AND.S	$3, R(FROM), R(TMP)
   130  	BNE	_funaligned
   131  
   132  	SUB	$31, R(TE), R(TMP)	/* do 32-byte chunks if possible */
   133  	MOVW	R(TE), savedte+4(SP)
   134  _f32loop:
   135  	CMP	R(TMP), R(TS)
   136  	BHS	_f4tail
   137  
   138  	MOVM.IA.W (R(FROM)), [R1-R8] 
   139  	MOVM.IA.W [R1-R8], (R(TS))
   140  	B	_f32loop
   141  
   142  _f4tail:
   143  	MOVW	savedte+4(SP), R(TE)
   144  	SUB	$3, R(TE), R(TMP)	/* do remaining words if possible */
   145  _f4loop:
   146  	CMP	R(TMP), R(TS)
   147  	BHS	_f1tail
   148  
   149  	MOVW.P	4(R(FROM)), R(TMP1)	/* implicit write back */
   150  	MOVW.P	R(TMP1), 4(R(TS))	/* implicit write back */
   151  	B	_f4loop
   152  
   153  _f1tail:
   154  	CMP	R(TS), R(TE)
   155  	BEQ	_return
   156  
   157  	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
   158  	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
   159  	B	_f1tail
   160  
   161  _return:
   162  	MOVW	to+0(FP), R0
   163  	RET
   164  
   165  _bunaligned:
   166  	CMP	$2, R(TMP)		/* is R(TMP) < 2 ? */
   167  
   168  	MOVW.LT	$8, R(RSHIFT)		/* (R(n)<<24)|(R(n-1)>>8) */
   169  	MOVW.LT	$24, R(LSHIFT)
   170  	MOVW.LT	$1, R(OFFSET)
   171  
   172  	MOVW.EQ	$16, R(RSHIFT)		/* (R(n)<<16)|(R(n-1)>>16) */
   173  	MOVW.EQ	$16, R(LSHIFT)
   174  	MOVW.EQ	$2, R(OFFSET)
   175  
   176  	MOVW.GT	$24, R(RSHIFT)		/* (R(n)<<8)|(R(n-1)>>24) */
   177  	MOVW.GT	$8, R(LSHIFT)
   178  	MOVW.GT	$3, R(OFFSET)
   179  
   180  	ADD	$16, R(TS), R(TMP)	/* do 16-byte chunks if possible */
   181  	CMP	R(TMP), R(TE)
   182  	BLS	_b1tail
   183  
   184  	BIC	$3, R(FROM)		/* align source */
   185  	MOVW	R(TS), savedts+4(SP)
   186  	MOVW	(R(FROM)), R(BR0)	/* prime first block register */
   187  
   188  _bu16loop:
   189  	CMP	R(TMP), R(TE)
   190  	BLS	_bu1tail
   191  
   192  	MOVW	R(BR0)<<R(LSHIFT), R(BW3)
   193  	MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)]
   194  	ORR	R(BR3)>>R(RSHIFT), R(BW3)
   195  
   196  	MOVW	R(BR3)<<R(LSHIFT), R(BW2)
   197  	ORR	R(BR2)>>R(RSHIFT), R(BW2)
   198  
   199  	MOVW	R(BR2)<<R(LSHIFT), R(BW1)
   200  	ORR	R(BR1)>>R(RSHIFT), R(BW1)
   201  
   202  	MOVW	R(BR1)<<R(LSHIFT), R(BW0)
   203  	ORR	R(BR0)>>R(RSHIFT), R(BW0)
   204  
   205  	MOVM.DB.W [R(BW0)-R(BW3)], (R(TE))
   206  	B	_bu16loop
   207  
   208  _bu1tail:
   209  	MOVW	savedts+4(SP), R(TS)
   210  	ADD	R(OFFSET), R(FROM)
   211  	B	_b1tail
   212  
   213  _funaligned:
   214  	CMP	$2, R(TMP)
   215  
   216  	MOVW.LT	$8, R(RSHIFT)		/* (R(n+1)<<24)|(R(n)>>8) */
   217  	MOVW.LT	$24, R(LSHIFT)
   218  	MOVW.LT	$3, R(OFFSET)
   219  
   220  	MOVW.EQ	$16, R(RSHIFT)		/* (R(n+1)<<16)|(R(n)>>16) */
   221  	MOVW.EQ	$16, R(LSHIFT)
   222  	MOVW.EQ	$2, R(OFFSET)
   223  
   224  	MOVW.GT	$24, R(RSHIFT)		/* (R(n+1)<<8)|(R(n)>>24) */
   225  	MOVW.GT	$8, R(LSHIFT)
   226  	MOVW.GT	$1, R(OFFSET)
   227  
   228  	SUB	$16, R(TE), R(TMP)	/* do 16-byte chunks if possible */
   229  	CMP	R(TMP), R(TS)
   230  	BHS	_f1tail
   231  
   232  	BIC	$3, R(FROM)		/* align source */
   233  	MOVW	R(TE), savedte+4(SP)
   234  	MOVW.P	4(R(FROM)), R(FR3)	/* prime last block register, implicit write back */
   235  
   236  _fu16loop:
   237  	CMP	R(TMP), R(TS)
   238  	BHS	_fu1tail
   239  
   240  	MOVW	R(FR3)>>R(RSHIFT), R(FW0)
   241  	MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)]
   242  	ORR	R(FR0)<<R(LSHIFT), R(FW0)
   243  
   244  	MOVW	R(FR0)>>R(RSHIFT), R(FW1)
   245  	ORR	R(FR1)<<R(LSHIFT), R(FW1)
   246  
   247  	MOVW	R(FR1)>>R(RSHIFT), R(FW2)
   248  	ORR	R(FR2)<<R(LSHIFT), R(FW2)
   249  
   250  	MOVW	R(FR2)>>R(RSHIFT), R(FW3)
   251  	ORR	R(FR3)<<R(LSHIFT), R(FW3)
   252  
   253  	MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS))
   254  	B	_fu16loop
   255  
   256  _fu1tail:
   257  	MOVW	savedte+4(SP), R(TE)
   258  	SUB	R(OFFSET), R(FROM)
   259  	B	_f1tail