github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/amd64/cgen.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  )
    12  
    13  func blockcopy(n, ns *gc.Node, osrc, odst, w int64) {
    14  	var noddi gc.Node
    15  	gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI)
    16  	var nodsi gc.Node
    17  	gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI)
    18  
    19  	var nodl gc.Node
    20  	var nodr gc.Node
    21  	if n.Ullman >= ns.Ullman {
    22  		gc.Agenr(n, &nodr, &nodsi)
    23  		if ns.Op == gc.ONAME {
    24  			gc.Gvardef(ns)
    25  		}
    26  		gc.Agenr(ns, &nodl, &noddi)
    27  	} else {
    28  		if ns.Op == gc.ONAME {
    29  			gc.Gvardef(ns)
    30  		}
    31  		gc.Agenr(ns, &nodl, &noddi)
    32  		gc.Agenr(n, &nodr, &nodsi)
    33  	}
    34  
    35  	if nodl.Reg != x86.REG_DI {
    36  		gmove(&nodl, &noddi)
    37  	}
    38  	if nodr.Reg != x86.REG_SI {
    39  		gmove(&nodr, &nodsi)
    40  	}
    41  	gc.Regfree(&nodl)
    42  	gc.Regfree(&nodr)
    43  
    44  	c := w % 8 // bytes
    45  	q := w / 8 // quads
    46  
    47  	var oldcx gc.Node
    48  	var cx gc.Node
    49  	savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64])
    50  
    51  	// if we are copying forward on the stack and
    52  	// the src and dst overlap, then reverse direction
    53  	if osrc < odst && odst < osrc+w {
    54  		// reverse direction
    55  		gins(x86.ASTD, nil, nil) // set direction flag
    56  		if c > 0 {
    57  			gconreg(addptr, w-1, x86.REG_SI)
    58  			gconreg(addptr, w-1, x86.REG_DI)
    59  
    60  			gconreg(movptr, c, x86.REG_CX)
    61  			gins(x86.AREP, nil, nil)   // repeat
    62  			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
    63  		}
    64  
    65  		if q > 0 {
    66  			if c > 0 {
    67  				gconreg(addptr, -7, x86.REG_SI)
    68  				gconreg(addptr, -7, x86.REG_DI)
    69  			} else {
    70  				gconreg(addptr, w-8, x86.REG_SI)
    71  				gconreg(addptr, w-8, x86.REG_DI)
    72  			}
    73  
    74  			gconreg(movptr, q, x86.REG_CX)
    75  			gins(x86.AREP, nil, nil)   // repeat
    76  			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)-
    77  		}
    78  
    79  		// we leave with the flag clear
    80  		gins(x86.ACLD, nil, nil)
    81  	} else {
    82  		// normal direction
    83  		if q > 128 || (gc.Nacl && q >= 4) || (obj.Getgoos() == "plan9" && q >= 4) {
    84  			gconreg(movptr, q, x86.REG_CX)
    85  			gins(x86.AREP, nil, nil)   // repeat
    86  			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
    87  		} else if q >= 4 {
    88  			var oldx0 gc.Node
    89  			var x0 gc.Node
    90  			savex(x86.REG_X0, &x0, &oldx0, nil, gc.Types[gc.TFLOAT64])
    91  
    92  			p := gins(obj.ADUFFCOPY, nil, nil)
    93  			p.To.Type = obj.TYPE_ADDR
    94  			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
    95  
    96  			// 64 blocks taking 14 bytes each
    97  			// see ../../../../runtime/mkduff.go
    98  			p.To.Offset = 14 * (64 - q/2)
    99  			restx(&x0, &oldx0)
   100  
   101  			if q%2 != 0 {
   102  				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
   103  			}
   104  		} else if !gc.Nacl && c == 0 {
   105  			// We don't need the MOVSQ side-effect of updating SI and DI,
   106  			// and issuing a sequence of MOVQs directly is faster.
   107  			nodsi.Op = gc.OINDREG
   108  
   109  			noddi.Op = gc.OINDREG
   110  			for q > 0 {
   111  				gmove(&nodsi, &cx) // MOVQ x+(SI),CX
   112  				gmove(&cx, &noddi) // MOVQ CX,x+(DI)
   113  				nodsi.Xoffset += 8
   114  				noddi.Xoffset += 8
   115  				q--
   116  			}
   117  		} else {
   118  			for q > 0 {
   119  				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
   120  				q--
   121  			}
   122  		}
   123  
   124  		// copy the remaining c bytes
   125  		if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) {
   126  			for c > 0 {
   127  				gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
   128  				c--
   129  			}
   130  		} else if w < 8 || c <= 4 {
   131  			nodsi.Op = gc.OINDREG
   132  			noddi.Op = gc.OINDREG
   133  			cx.Type = gc.Types[gc.TINT32]
   134  			nodsi.Type = gc.Types[gc.TINT32]
   135  			noddi.Type = gc.Types[gc.TINT32]
   136  			if c > 4 {
   137  				nodsi.Xoffset = 0
   138  				noddi.Xoffset = 0
   139  				gmove(&nodsi, &cx)
   140  				gmove(&cx, &noddi)
   141  			}
   142  
   143  			nodsi.Xoffset = c - 4
   144  			noddi.Xoffset = c - 4
   145  			gmove(&nodsi, &cx)
   146  			gmove(&cx, &noddi)
   147  		} else {
   148  			nodsi.Op = gc.OINDREG
   149  			noddi.Op = gc.OINDREG
   150  			cx.Type = gc.Types[gc.TINT64]
   151  			nodsi.Type = gc.Types[gc.TINT64]
   152  			noddi.Type = gc.Types[gc.TINT64]
   153  			nodsi.Xoffset = c - 8
   154  			noddi.Xoffset = c - 8
   155  			gmove(&nodsi, &cx)
   156  			gmove(&cx, &noddi)
   157  		}
   158  	}
   159  
   160  	restx(&cx, &oldcx)
   161  }