github.com/cloudwego/frugal@v0.1.15/internal/atm/ssa/pass_fusion_amd64.go (about) 1 /* 2 * Copyright 2022 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package ssa 18 19 import ( 20 `github.com/cloudwego/frugal/internal/atm/abi` 21 `github.com/cloudwego/frugal/internal/cpu` 22 `github.com/cloudwego/frugal/internal/rt` 23 ) 24 25 // Fusion fuses simple instructions into more complex one, to reduce the instruction count. 26 type Fusion struct{} 27 28 func (Fusion) flagsafe(bb *BasicBlock, ins IrNode) bool { 29 i := -1 30 p := IrNode(nil) 31 32 /* find the instruction */ 33 for i, p = range bb.Ins { 34 if p == ins { 35 break 36 } 37 } 38 39 /* not found, the instruction is in another basic 40 * block, which cannot guarantee it's flags preserving */ 41 if p != ins { 42 return false 43 } 44 45 /* check for instructions after it, only some instructions that are 46 * known to preserve flags, all other instructions are assumed to clobber */ 47 for _, p = range bb.Ins[i + 1:] { 48 switch p.(type) { 49 case *IrAMD64_INT : break 50 case *IrAMD64_LEA : break 51 case *IrAMD64_BSWAP : break 52 case *IrAMD64_MOVSLQ : break 53 case *IrAMD64_MOV_abs : break 54 case *IrAMD64_MOV_ptr : break 55 case *IrAMD64_MOV_reg : break 56 case *IrAMD64_MOV_load : break 57 case *IrAMD64_MOV_store_r : break 58 case *IrAMD64_MOV_store_i : break 59 case *IrAMD64_MOV_load_be : break 60 case *IrAMD64_MOV_store_be : break 61 case *IrAMD64_CALL_gcwb : break 62 default : return false 63 } 64 } 65 66 /* everything checked fine */ 67 return true 68 } 69 70 func (self Fusion) Apply(cfg *CFG) { 71 done := false 72 defs := make(map[Reg]IrNode) 73 74 /* leaq {mem}, %r1 ; op {disp}(%r1), %r2 --> op {disp}{mem}, %r2 75 * leaq {off1}(%r0), %r2 ; op {off2}(%r1,%r2), %r3 --> op {off1+off2}(%r1,%r0), %r3 76 * leaq {off1}(%r0), %r1 ; op {off2}(%r1,%r2,{scale}), %r3 --> op {off1+off2}(%r0,%r2,{scale}), %r3 77 * addsub $imm, %r1, %r2 ; op {disp}(%r3,%r2), %r4 --> op {disp+imm}(%r3,%r1), %r4 78 * movabsq $imm, %r1 ; op {disp}(%r0,%r1,{scale}), %r2 --> op {disp+imm*scale}(%r0), %r2 */ 79 fusemem := func(m *Mem) { 80 if m.I == Rz { 81 if ins, ok := defs[m.M].(*IrAMD64_LEA); ok { 82 if x := int64(m.D) + int64(ins.M.D); isi32(x) { 83 m.M = ins.M.M 84 m.I = ins.M.I 85 m.S = ins.M.S 86 m.D = int32(x) 87 done = false 88 } 89 } 90 } else { 91 if ins, ok := defs[m.M].(*IrAMD64_LEA); ok && ins.M.I == Rz { 92 if x := int64(m.D) + int64(ins.M.D); isi32(x) { 93 m.M = ins.M.M 94 m.D = int32(x) 95 done = false 96 } 97 } else if ins, ok := defs[m.I].(*IrAMD64_LEA); ok && m.S == 1 && ins.M.I == Rz { 98 if x := int64(m.D) + int64(ins.M.D); isi32(x) { 99 m.I = ins.M.M 100 m.D = int32(x) 101 done = false 102 } 103 } else if ins, ok := defs[m.I].(*IrAMD64_BinOp_ri); ok && m.S == 1 && ins.Op.IsAdditive() { 104 if x := int64(m.D) + int64(ins.Y * ins.Op.ScaleFactor()); isi32(x) { 105 m.I = ins.X 106 m.D = int32(x) 107 done = false 108 } 109 } else if ins, ok := defs[m.I].(*IrAMD64_MOV_abs); ok { 110 if x := int64(m.D) + ins.V; isi32(x) { 111 m.I = Rz 112 m.D = int32(x) 113 done = false 114 } 115 } 116 } 117 } 118 119 /* retry until no more modifications */ 120 for !done { 121 done = true 122 rt.MapClear(defs) 123 124 /* pseudo-definition for zero registers */ 125 defs[Rz] = &IrAMD64_MOV_abs { R: Rz, V: 0 } 126 defs[Pn] = &IrAMD64_MOV_ptr { R: Rz, P: nil } 127 128 /* check every block */ 129 for _, bb := range cfg.PostOrder().Reversed() { 130 var r *Reg 131 var ok bool 132 133 /* mark all the definitions in Phi nodes */ 134 for _, v := range bb.Phi { 135 for _, r = range v.Definitions() { 136 if _, ok = defs[*r]; !ok { 137 defs[*r] = v 138 } else if r.Kind() != K_zero { 139 panic("register redefined: " + r.String()) 140 } 141 } 142 } 143 144 /* scan all the instructions */ 145 for i, v := range bb.Ins { 146 var m IrAMD64_MemOp 147 var d IrDefinitions 148 149 /* fuse memory addresses in instructions */ 150 if m, ok = v.(IrAMD64_MemOp); ok { 151 fusemem(m.MemOp()) 152 } 153 154 /* fuse instructions if possible */ 155 switch p := v.(type) { 156 default: { 157 break 158 } 159 160 /* movx {mem}, %r0; bswapx %r0, %r1 --> movbex {mem}, %r1 */ 161 case *IrAMD64_BSWAP: { 162 if ins, ok := defs[p.V].(*IrAMD64_MOV_load); ok && ins.N != 1 && cpu.HasMOVBE { 163 done = false 164 bb.Ins[i] = &IrAMD64_MOV_load_be { R: p.R, M: ins.M, N: ins.N } 165 } 166 } 167 168 /* movq {i32}, %r0; movx %r0, {mem} --> movx {i32}, {mem} 169 * movq {p32}, %r0; movx %r0, {mem} --> movx {p32}, {mem} 170 * bswapx %r0, %r1; movx %r1, {mm1} --> movbex %r0, {mm1} */ 171 case *IrAMD64_MOV_store_r: { 172 if ins, ok := defs[p.R].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 173 done = false 174 bb.Ins[i] = &IrAMD64_MOV_store_i { V: int32(ins.V), M: p.M, N: p.N } 175 } else if ins, ok := defs[p.R].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize { 176 done = false 177 bb.Ins[i] = &IrAMD64_MOV_store_p { P: ins.P, M: p.M } 178 } else if ins, ok := defs[p.R].(*IrAMD64_BSWAP); ok && p.N != 1 && cpu.HasMOVBE { 179 done = false 180 bb.Ins[i] = &IrAMD64_MOV_store_be { R: ins.V, M: p.M, N: p.N } 181 } 182 } 183 184 /* movq {i32}, %r0; binop %r0, %r1 --> binop {i32}, %r1 185 * movq {mem}, %r0; binop %r0, %r1 --> binop {mem}, %r1 */ 186 case *IrAMD64_BinOp_rr: { 187 if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 188 done = false 189 bb.Ins[i] = &IrAMD64_BinOp_ri { R: p.R, X: p.X, Y: int32(ins.V), Op: p.Op } 190 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_load); ok && ins.N == abi.PtrSize { 191 done = false 192 bb.Ins[i] = &IrAMD64_BinOp_rm { R: p.R, X: p.X, Y: ins.M, Op: p.Op } 193 } 194 } 195 196 /* movq {u8}, %r0; btsq %r0, %r1; setc %r2 --> btsq {u8}, %r1; setc %r2 */ 197 case *IrAMD64_BTSQ_rr: { 198 if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isu8(ins.V) { 199 done = false 200 bb.Ins[i] = &IrAMD64_BTSQ_ri { T: p.T, S: p.S, X: p.X, Y: uint8(ins.V) } 201 } 202 } 203 204 /* movq {i32}, %r0; cmpq %r0, %r1 --> cmpq {i32}, %r1 205 * movx {ptr}, %p0; cmpq %p0, %p1 --> cmpq {ptr}, %p1 206 * movq {mem}, %r0; cmpq %r0, %r1 --> cmpx {mem}, %r1 207 * movq {i32}, %r1; cmpq %r0, %r1 --> cmpq %r0, {i32} 208 * movq {ptr}, %p1; cmpq %p0, %p1 --> cmpq %p0, {ptr} 209 * movx {mem}, %r1; cmpq %r0, %r1 --> cmpx %r0, {mem} */ 210 case *IrAMD64_CMPQ_rr: { 211 if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 212 done = false 213 bb.Ins[i] = &IrAMD64_CMPQ_ir { R: p.R, X: int32(ins.V), Y: p.Y, Op: p.Op } 214 } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok { 215 done = false 216 bb.Ins[i] = &IrAMD64_CMPQ_pr { R: p.R, X: ins.P, Y: p.Y, Op: p.Op } 217 } else if ins, ok := defs[p.X].(*IrAMD64_MOV_load); ok && ins.N != 16 { 218 done = false 219 bb.Ins[i] = &IrAMD64_CMPQ_mr { R: p.R, X: ins.M, Y: p.Y, Op: p.Op, N: ins.N } 220 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 221 done = false 222 bb.Ins[i] = &IrAMD64_CMPQ_ri { R: p.R, X: p.X, Y: int32(ins.V), Op: p.Op } 223 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok { 224 done = false 225 bb.Ins[i] = &IrAMD64_CMPQ_rp { R: p.R, X: p.X, Y: ins.P, Op: p.Op } 226 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_load); ok && ins.N != 16 { 227 done = false 228 bb.Ins[i] = &IrAMD64_CMPQ_rm { R: p.R, X: p.X, Y: ins.M, Op: p.Op, N: ins.N } 229 } 230 } 231 232 /* movq {i32}, %r0; cmpx %r0, {mem} --> cmpx {i32}, {mem} 233 * movq {p32}, %p0; cmpq %p0, {mem} --> cmpq {p32}, {mem} */ 234 case *IrAMD64_CMPQ_rm: { 235 if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 236 done = false 237 bb.Ins[i] = &IrAMD64_CMPQ_im { R: p.R, X: int32(ins.V), Y: p.Y, Op: p.Op, N: p.N } 238 } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize { 239 done = false 240 bb.Ins[i] = &IrAMD64_CMPQ_pm { R: p.R, X: ins.P, Y: p.Y, Op: p.Op } 241 } 242 } 243 244 /* movq {i32}, %r0; cmpx {mem}, %r0 --> cmpx {mem}, {i32} 245 * movq {p32}, %p0; cmpq {mem}, %p0 --> cmpq {mem}, {p32} */ 246 case *IrAMD64_CMPQ_mr: { 247 if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 248 done = false 249 bb.Ins[i] = &IrAMD64_CMPQ_mi { R: p.R, X: p.X, Y: int32(ins.V), Op: p.Op, N: p.N } 250 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize { 251 done = false 252 bb.Ins[i] = &IrAMD64_CMPQ_mp { R: p.R, X: p.X, Y: ins.P, Op: p.Op } 253 } 254 } 255 } 256 257 /* mark all the definitions in instructions */ 258 if d, ok = v.(IrDefinitions); ok { 259 for _, r = range d.Definitions() { 260 if _, ok = defs[*r]; !ok { 261 defs[*r] = v 262 } else if r.Kind() != K_zero { 263 panic("register redefined: " + r.String()) 264 } 265 } 266 } 267 } 268 269 /* fuse memory operation in terminators */ 270 if m, ok := bb.Term.(IrAMD64_MemOp); ok { 271 fusemem(m.MemOp()) 272 } 273 274 /* fuse terminators if possible */ 275 switch p := bb.Term.(type) { 276 default: { 277 break 278 } 279 280 /* movq {i32}, %r0; cmpq %r0, %r1; jcc {label} --> cmpq {i32}, %r1; jcc {label} 281 * movq {ptr}, %p0; cmpq %p0, %p1; jcc {label} --> cmpq {ptr}, %p1; jcc {label} 282 * movx {mem}, %r0; cmpq %r0, %r1; jcc {label} --> cmpx {mem}, %r1; jcc {label} 283 * movq {i32}, %r1; cmpq %r0, %r1; jcc {label} --> cmpq %r0, {i32}; jcc {label} 284 * movq {ptr}, %p1; cmpq %p0, %p1; jcc {label} --> cmpq %p0, {ptr}; jcc {label} 285 * movx {mem}, %r1; cmpq %r0, %r1; jcc {label} --> cmpx %r0, {mem}; jcc {label} */ 286 case *IrAMD64_Jcc_rr: { 287 if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 288 done = false 289 bb.Term = &IrAMD64_Jcc_ir { X: int32(ins.V), Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op } 290 } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok { 291 done = false 292 bb.Term = &IrAMD64_Jcc_pr { X: ins.P, Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op } 293 } else if ins, ok := defs[p.X].(*IrAMD64_MOV_load); ok { 294 done = false 295 bb.Term = &IrAMD64_Jcc_mr { X: ins.M, Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op, N: ins.N } 296 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 297 done = false 298 bb.Term = &IrAMD64_Jcc_ri { X: p.X, Y: int32(ins.V), To: p.To, Ln: p.Ln, Op: p.Op } 299 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok { 300 done = false 301 bb.Term = &IrAMD64_Jcc_rp { X: p.X, Y: ins.P, To: p.To, Ln: p.Ln, Op: p.Op } 302 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_load); ok { 303 done = false 304 bb.Term = &IrAMD64_Jcc_rm { X: p.X, Y: ins.M, To: p.To, Ln: p.Ln, Op: p.Op, N: ins.N } 305 } 306 } 307 308 /* setcc %r0; cmpq %r0, $0; je {label} --> jncc {label} */ 309 case *IrAMD64_Jcc_ri: { 310 if p.Y == 0 && p.Op == IrAMD64_CmpEq { 311 if ins, ok := defs[p.X].(*IrAMD64_CMPQ_rr); ok { 312 done = false 313 bb.Term = &IrAMD64_Jcc_rr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated() } 314 } else if ins, ok := defs[p.X].(*IrAMD64_CMPQ_rm); ok { 315 done = false 316 bb.Term = &IrAMD64_Jcc_rm { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N } 317 } else if ins, ok := defs[p.X].(*IrAMD64_CMPQ_mr); ok { 318 done = false 319 bb.Term = &IrAMD64_Jcc_mr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N } 320 } else if ins, ok := defs[p.X].(*IrAMD64_BTSQ_rr); ok && p.X == ins.T && self.flagsafe(bb, ins) { 321 done = false 322 bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz 323 } else if ins, ok := defs[p.X].(*IrAMD64_BTSQ_ri); ok && p.X == ins.T && self.flagsafe(bb, ins) { 324 done = false 325 bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz 326 } 327 } 328 } 329 330 /* setcc %r0; cmpq $0, %r0; je {label} --> jncc {label} */ 331 case *IrAMD64_Jcc_ir: { 332 if p.Y == 0 && p.Op == IrAMD64_CmpEq { 333 if ins, ok := defs[p.Y].(*IrAMD64_CMPQ_rr); ok { 334 done = false 335 bb.Term = &IrAMD64_Jcc_rr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated() } 336 } else if ins, ok := defs[p.Y].(*IrAMD64_CMPQ_rm); ok { 337 done = false 338 bb.Term = &IrAMD64_Jcc_rm { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N } 339 } else if ins, ok := defs[p.Y].(*IrAMD64_CMPQ_mr); ok { 340 done = false 341 bb.Term = &IrAMD64_Jcc_mr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N } 342 } else if ins, ok := defs[p.Y].(*IrAMD64_BTSQ_rr); ok && p.Y == ins.T && self.flagsafe(bb, ins) { 343 done = false 344 bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz 345 } else if ins, ok := defs[p.Y].(*IrAMD64_BTSQ_ri); ok && p.Y == ins.T && self.flagsafe(bb, ins) { 346 done = false 347 bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz 348 } 349 } 350 } 351 352 /* movq {i32}, %r0; cmpq %r0, {mem}; jcc {label} --> cmpq {i32}, {mem}; jcc {label} 353 * movq {p32}, %p0; cmpq %p0, {mem}; jcc {label} --> cmpq {p32}, {mem}; jcc {label} */ 354 case *IrAMD64_Jcc_rm: { 355 if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 356 done = false 357 bb.Term = &IrAMD64_Jcc_im { X: int32(ins.V), Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op, N: p.N } 358 } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize { 359 done = false 360 bb.Term = &IrAMD64_Jcc_pm { X: ins.P, Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op } 361 } 362 } 363 364 /* movq {i32}, %r0; cmpq {mem}, %r0; jcc {label} --> cmpq {mem}, {i32}; jcc {label} 365 * movq {p32}, %p0; cmpq {mem}, %p0; jcc {label} --> cmpq {mem}, {p32}; jcc {label} */ 366 case *IrAMD64_Jcc_mr: { 367 if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) { 368 done = false 369 bb.Term = &IrAMD64_Jcc_mi { X: p.X, Y: int32(ins.V), To: p.To, Ln: p.Ln, Op: p.Op, N: p.N } 370 } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize { 371 done = false 372 bb.Term = &IrAMD64_Jcc_mp { X: p.X, Y: ins.P, To: p.To, Ln: p.Ln, Op: p.Op } 373 } 374 } 375 } 376 } 377 378 /* perform TDCE & reorder after each round */ 379 new(TDCE).Apply(cfg) 380 new(Reorder).Apply(cfg) 381 } 382 }