github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/cmd/compile/internal/amd64/cgen.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  )
    12  
    13  func blockcopy(n, ns *gc.Node, osrc, odst, w int64) {
    14  	var noddi gc.Node
    15  	gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI)
    16  	var nodsi gc.Node
    17  	gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI)
    18  
    19  	var nodl gc.Node
    20  	var nodr gc.Node
    21  	if n.Ullman >= ns.Ullman {
    22  		gc.Agenr(n, &nodr, &nodsi)
    23  		if ns.Op == gc.ONAME {
    24  			gc.Gvardef(ns)
    25  		}
    26  		gc.Agenr(ns, &nodl, &noddi)
    27  	} else {
    28  		if ns.Op == gc.ONAME {
    29  			gc.Gvardef(ns)
    30  		}
    31  		gc.Agenr(ns, &nodl, &noddi)
    32  		gc.Agenr(n, &nodr, &nodsi)
    33  	}
    34  
    35  	if nodl.Reg != x86.REG_DI {
    36  		gmove(&nodl, &noddi)
    37  	}
    38  	if nodr.Reg != x86.REG_SI {
    39  		gmove(&nodr, &nodsi)
    40  	}
    41  	gc.Regfree(&nodl)
    42  	gc.Regfree(&nodr)
    43  
    44  	c := w % 8 // bytes
    45  	q := w / 8 // quads
    46  
    47  	var oldcx gc.Node
    48  	var cx gc.Node
    49  	savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64])
    50  
    51  	// if we are copying forward on the stack and
    52  	// the src and dst overlap, then reverse direction
    53  	if osrc < odst && odst < osrc+w {
    54  		// reverse direction
    55  		gins(x86.ASTD, nil, nil) // set direction flag
    56  		if c > 0 {
    57  			gconreg(addptr, w-1, x86.REG_SI)
    58  			gconreg(addptr, w-1, x86.REG_DI)
    59  
    60  			gconreg(movptr, c, x86.REG_CX)
    61  			gins(x86.AREP, nil, nil)   // repeat
    62  			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
    63  		}
    64  
    65  		if q > 0 {
    66  			if c > 0 {
    67  				gconreg(addptr, -7, x86.REG_SI)
    68  				gconreg(addptr, -7, x86.REG_DI)
    69  			} else {
    70  				gconreg(addptr, w-8, x86.REG_SI)
    71  				gconreg(addptr, w-8, x86.REG_DI)
    72  			}
    73  
    74  			gconreg(movptr, q, x86.REG_CX)
    75  			gins(x86.AREP, nil, nil)   // repeat
    76  			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)-
    77  		}
    78  
    79  		// we leave with the flag clear
    80  		gins(x86.ACLD, nil, nil)
    81  	} else {
    82  		// normal direction
    83  		if q > 128 || (gc.Nacl && q >= 4) {
    84  			gconreg(movptr, q, x86.REG_CX)
    85  			gins(x86.AREP, nil, nil)   // repeat
    86  			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
    87  		} else if q >= 4 {
    88  			p := gins(obj.ADUFFCOPY, nil, nil)
    89  			p.To.Type = obj.TYPE_ADDR
    90  			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
    91  
    92  			// 14 and 128 = magic constants: see ../../runtime/asm_amd64.s
    93  			p.To.Offset = 14 * (128 - q)
    94  		} else if !gc.Nacl && c == 0 {
    95  			// We don't need the MOVSQ side-effect of updating SI and DI,
    96  			// and issuing a sequence of MOVQs directly is faster.
    97  			nodsi.Op = gc.OINDREG
    98  
    99  			noddi.Op = gc.OINDREG
   100  			for q > 0 {
   101  				gmove(&nodsi, &cx) // MOVQ x+(SI),CX
   102  				gmove(&cx, &noddi) // MOVQ CX,x+(DI)
   103  				nodsi.Xoffset += 8
   104  				noddi.Xoffset += 8
   105  				q--
   106  			}
   107  		} else {
   108  			for q > 0 {
   109  				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
   110  				q--
   111  			}
   112  		}
   113  
   114  		// copy the remaining c bytes
   115  		if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) {
   116  			for c > 0 {
   117  				gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
   118  				c--
   119  			}
   120  		} else if w < 8 || c <= 4 {
   121  			nodsi.Op = gc.OINDREG
   122  			noddi.Op = gc.OINDREG
   123  			cx.Type = gc.Types[gc.TINT32]
   124  			nodsi.Type = gc.Types[gc.TINT32]
   125  			noddi.Type = gc.Types[gc.TINT32]
   126  			if c > 4 {
   127  				nodsi.Xoffset = 0
   128  				noddi.Xoffset = 0
   129  				gmove(&nodsi, &cx)
   130  				gmove(&cx, &noddi)
   131  			}
   132  
   133  			nodsi.Xoffset = c - 4
   134  			noddi.Xoffset = c - 4
   135  			gmove(&nodsi, &cx)
   136  			gmove(&cx, &noddi)
   137  		} else {
   138  			nodsi.Op = gc.OINDREG
   139  			noddi.Op = gc.OINDREG
   140  			cx.Type = gc.Types[gc.TINT64]
   141  			nodsi.Type = gc.Types[gc.TINT64]
   142  			noddi.Type = gc.Types[gc.TINT64]
   143  			nodsi.Xoffset = c - 8
   144  			noddi.Xoffset = c - 8
   145  			gmove(&nodsi, &cx)
   146  			gmove(&cx, &noddi)
   147  		}
   148  	}
   149  
   150  	restx(&cx, &oldcx)
   151  }