github.com/mh-cbon/go@v0.0.0-20160603070303-9e112a3fe4c0/src/cmd/compile/internal/amd64/cgen.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/x86" 11 ) 12 13 func blockcopy(n, ns *gc.Node, osrc, odst, w int64) { 14 var noddi gc.Node 15 gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI) 16 var nodsi gc.Node 17 gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI) 18 19 var nodl gc.Node 20 var nodr gc.Node 21 if n.Ullman >= ns.Ullman { 22 gc.Agenr(n, &nodr, &nodsi) 23 if ns.Op == gc.ONAME { 24 gc.Gvardef(ns) 25 } 26 gc.Agenr(ns, &nodl, &noddi) 27 } else { 28 if ns.Op == gc.ONAME { 29 gc.Gvardef(ns) 30 } 31 gc.Agenr(ns, &nodl, &noddi) 32 gc.Agenr(n, &nodr, &nodsi) 33 } 34 35 if nodl.Reg != x86.REG_DI { 36 gmove(&nodl, &noddi) 37 } 38 if nodr.Reg != x86.REG_SI { 39 gmove(&nodr, &nodsi) 40 } 41 gc.Regfree(&nodl) 42 gc.Regfree(&nodr) 43 44 c := w % 8 // bytes 45 q := w / 8 // quads 46 47 var oldcx gc.Node 48 var cx gc.Node 49 savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64]) 50 51 // if we are copying forward on the stack and 52 // the src and dst overlap, then reverse direction 53 if osrc < odst && odst < osrc+w { 54 // reverse direction 55 gins(x86.ASTD, nil, nil) // set direction flag 56 if c > 0 { 57 gconreg(addptr, w-1, x86.REG_SI) 58 gconreg(addptr, w-1, x86.REG_DI) 59 60 gconreg(movptr, c, x86.REG_CX) 61 gins(x86.AREP, nil, nil) // repeat 62 gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- 63 } 64 65 if q > 0 { 66 if c > 0 { 67 gconreg(addptr, -7, x86.REG_SI) 68 gconreg(addptr, -7, x86.REG_DI) 69 } else { 70 gconreg(addptr, w-8, x86.REG_SI) 71 gconreg(addptr, w-8, x86.REG_DI) 72 } 73 74 gconreg(movptr, q, x86.REG_CX) 75 gins(x86.AREP, nil, nil) // repeat 76 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)- 77 } 78 79 // we leave with the flag clear 80 gins(x86.ACLD, nil, nil) 81 } else { 82 // normal direction 83 if q > 128 || (gc.Nacl && q >= 4) || (obj.Getgoos() == "plan9" && q >= 4) { 84 gconreg(movptr, q, x86.REG_CX) 85 gins(x86.AREP, nil, nil) // repeat 86 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ 87 } else if q >= 4 { 88 var oldx0 gc.Node 89 var x0 gc.Node 90 savex(x86.REG_X0, &x0, &oldx0, nil, gc.Types[gc.TFLOAT64]) 91 92 p := gins(obj.ADUFFCOPY, nil, nil) 93 p.To.Type = obj.TYPE_ADDR 94 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 95 96 // 64 blocks taking 14 bytes each 97 // see ../../../../runtime/mkduff.go 98 p.To.Offset = 14 * (64 - q/2) 99 restx(&x0, &oldx0) 100 101 if q%2 != 0 { 102 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ 103 } 104 } else if !gc.Nacl && c == 0 { 105 // We don't need the MOVSQ side-effect of updating SI and DI, 106 // and issuing a sequence of MOVQs directly is faster. 107 nodsi.Op = gc.OINDREG 108 109 noddi.Op = gc.OINDREG 110 for q > 0 { 111 gmove(&nodsi, &cx) // MOVQ x+(SI),CX 112 gmove(&cx, &noddi) // MOVQ CX,x+(DI) 113 nodsi.Xoffset += 8 114 noddi.Xoffset += 8 115 q-- 116 } 117 } else { 118 for q > 0 { 119 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ 120 q-- 121 } 122 } 123 124 // copy the remaining c bytes 125 if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) { 126 for c > 0 { 127 gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ 128 c-- 129 } 130 } else if w < 8 || c <= 4 { 131 nodsi.Op = gc.OINDREG 132 noddi.Op = gc.OINDREG 133 cx.Type = gc.Types[gc.TINT32] 134 nodsi.Type = gc.Types[gc.TINT32] 135 noddi.Type = gc.Types[gc.TINT32] 136 if c > 4 { 137 nodsi.Xoffset = 0 138 noddi.Xoffset = 0 139 gmove(&nodsi, &cx) 140 gmove(&cx, &noddi) 141 } 142 143 nodsi.Xoffset = c - 4 144 noddi.Xoffset = c - 4 145 gmove(&nodsi, &cx) 146 gmove(&cx, &noddi) 147 } else { 148 nodsi.Op = gc.OINDREG 149 noddi.Op = gc.OINDREG 150 cx.Type = gc.Types[gc.TINT64] 151 nodsi.Type = gc.Types[gc.TINT64] 152 noddi.Type = gc.Types[gc.TINT64] 153 nodsi.Xoffset = c - 8 154 noddi.Xoffset = c - 8 155 gmove(&nodsi, &cx) 156 gmove(&cx, &noddi) 157 } 158 } 159 160 restx(&cx, &oldcx) 161 }