github.com/shogo82148/std@v1.22.1-0.20240327122250-4e474527810c/cmd/compile/internal/rangefunc/rewrite.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package rangefunc rewrites range-over-func to code that doesn't use range-over-funcs. 7 Rewriting the construct in the front end, before noder, means the functions generated during 8 the rewrite are available in a noder-generated representation for inlining by the back end. 9 10 # Theory of Operation 11 12 The basic idea is to rewrite 13 14 for x := range f { 15 ... 16 } 17 18 into 19 20 f(func(x T) bool { 21 ... 22 }) 23 24 But it's not usually that easy. 25 26 # Range variables 27 28 For a range not using :=, the assigned variables cannot be function parameters 29 in the generated body function. Instead, we allocate fake parameters and 30 start the body with an assignment. For example: 31 32 for expr1, expr2 = range f { 33 ... 34 } 35 36 becomes 37 38 f(func(#p1 T1, #p2 T2) bool { 39 expr1, expr2 = #p1, #p2 40 ... 41 }) 42 43 (All the generated variables have a # at the start to signal that they 44 are internal variables when looking at the generated code in a 45 debugger. Because variables have all been resolved to the specific 46 objects they represent, there is no danger of using plain "p1" and 47 colliding with a Go variable named "p1"; the # is just nice to have, 48 not for correctness.) 49 50 It can also happen that there are fewer range variables than function 51 arguments, in which case we end up with something like 52 53 f(func(x T1, _ T2) bool { 54 ... 55 }) 56 57 or 58 59 f(func(#p1 T1, #p2 T2, _ T3) bool { 60 expr1, expr2 = #p1, #p2 61 ... 62 }) 63 64 # Return 65 66 If the body contains a "break", that break turns into "return false", 67 to tell f to stop. And if the body contains a "continue", that turns 68 into "return true", to tell f to proceed with the next value. 69 Those are the easy cases. 70 71 If the body contains a return or a break/continue/goto L, then we need 72 to rewrite that into code that breaks out of the loop and then 73 triggers that control flow. In general we rewrite 74 75 for x := range f { 76 ... 77 } 78 79 into 80 81 { 82 var #next int 83 f(func(x T1) bool { 84 ... 85 return true 86 }) 87 ... check #next ... 88 } 89 90 The variable #next is an integer code that says what to do when f 91 returns. Each difficult statement sets #next and then returns false to 92 stop f. 93 94 A plain "return" rewrites to {#next = -1; return false}. 95 The return false breaks the loop. Then when f returns, the "check 96 #next" section includes 97 98 if #next == -1 { return } 99 100 which causes the return we want. 101 102 Return with arguments is more involved. We need somewhere to store the 103 arguments while we break out of f, so we add them to the var 104 declaration, like: 105 106 { 107 var ( 108 #next int 109 #r1 type1 110 #r2 type2 111 ) 112 f(func(x T1) bool { 113 ... 114 { 115 // return a, b 116 #r1, #r2 = a, b 117 #next = -2 118 return false 119 } 120 ... 121 return true 122 }) 123 if #next == -2 { return #r1, #r2 } 124 } 125 126 TODO: What about: 127 128 func f() (x bool) { 129 for range g(&x) { 130 return true 131 } 132 } 133 134 func g(p *bool) func(func() bool) { 135 return func(yield func() bool) { 136 yield() 137 // Is *p true or false here? 138 } 139 } 140 141 With this rewrite the "return true" is not visible after yield returns, 142 but maybe it should be? 143 144 # Checking 145 146 To permit checking that an iterator is well-behaved -- that is, that 147 it does not call the loop body again after it has returned false or 148 after the entire loop has exited (it might retain a copy of the body 149 function, or pass it to another goroutine) -- each generated loop has 150 its own #exitK flag that is checked before each iteration, and set both 151 at any early exit and after the iteration completes. 152 153 For example: 154 155 for x := range f { 156 ... 157 if ... { break } 158 ... 159 } 160 161 becomes 162 163 { 164 var #exit1 bool 165 f(func(x T1) bool { 166 if #exit1 { runtime.panicrangeexit() } 167 ... 168 if ... { #exit1 = true ; return false } 169 ... 170 return true 171 }) 172 #exit1 = true 173 } 174 175 # Nested Loops 176 177 So far we've only considered a single loop. If a function contains a 178 sequence of loops, each can be translated individually. But loops can 179 be nested. It would work to translate the innermost loop and then 180 translate the loop around it, and so on, except that there'd be a lot 181 of rewriting of rewritten code and the overall traversals could end up 182 taking time quadratic in the depth of the nesting. To avoid all that, 183 we use a single rewriting pass that handles a top-most range-over-func 184 loop and all the range-over-func loops it contains at the same time. 185 186 If we need to return from inside a doubly-nested loop, the rewrites 187 above stay the same, but the check after the inner loop only says 188 189 if #next < 0 { return false } 190 191 to stop the outer loop so it can do the actual return. That is, 192 193 for range f { 194 for range g { 195 ... 196 return a, b 197 ... 198 } 199 } 200 201 becomes 202 203 { 204 var ( 205 #next int 206 #r1 type1 207 #r2 type2 208 ) 209 var #exit1 bool 210 f(func() { 211 if #exit1 { runtime.panicrangeexit() } 212 var #exit2 bool 213 g(func() { 214 if #exit2 { runtime.panicrangeexit() } 215 ... 216 { 217 // return a, b 218 #r1, #r2 = a, b 219 #next = -2 220 #exit1, #exit2 = true, true 221 return false 222 } 223 ... 224 return true 225 }) 226 #exit2 = true 227 if #next < 0 { 228 return false 229 } 230 return true 231 }) 232 #exit1 = true 233 if #next == -2 { 234 return #r1, #r2 235 } 236 } 237 238 Note that the #next < 0 after the inner loop handles both kinds of 239 return with a single check. 240 241 # Labeled break/continue of range-over-func loops 242 243 For a labeled break or continue of an outer range-over-func, we 244 use positive #next values. Any such labeled break or continue 245 really means "do N breaks" or "do N breaks and 1 continue". 246 We encode that as perLoopStep*N or perLoopStep*N+1 respectively. 247 248 Loops that might need to propagate a labeled break or continue 249 add one or both of these to the #next checks: 250 251 if #next >= 2 { 252 #next -= 2 253 return false 254 } 255 256 if #next == 1 { 257 #next = 0 258 return true 259 } 260 261 For example 262 263 F: for range f { 264 for range g { 265 for range h { 266 ... 267 break F 268 ... 269 ... 270 continue F 271 ... 272 } 273 } 274 ... 275 } 276 277 becomes 278 279 { 280 var #next int 281 var #exit1 bool 282 f(func() { 283 if #exit1 { runtime.panicrangeexit() } 284 var #exit2 bool 285 g(func() { 286 if #exit2 { runtime.panicrangeexit() } 287 var #exit3 bool 288 h(func() { 289 if #exit3 { runtime.panicrangeexit() } 290 ... 291 { 292 // break F 293 #next = 4 294 #exit1, #exit2, #exit3 = true, true, true 295 return false 296 } 297 ... 298 { 299 // continue F 300 #next = 3 301 #exit2, #exit3 = true, true 302 return false 303 } 304 ... 305 return true 306 }) 307 #exit3 = true 308 if #next >= 2 { 309 #next -= 2 310 return false 311 } 312 return true 313 }) 314 #exit2 = true 315 if #next >= 2 { 316 #next -= 2 317 return false 318 } 319 if #next == 1 { 320 #next = 0 321 return true 322 } 323 ... 324 return true 325 }) 326 #exit1 = true 327 } 328 329 Note that the post-h checks only consider a break, 330 since no generated code tries to continue g. 331 332 # Gotos and other labeled break/continue 333 334 The final control flow translations are goto and break/continue of a 335 non-range-over-func statement. In both cases, we may need to break out 336 of one or more range-over-func loops before we can do the actual 337 control flow statement. Each such break/continue/goto L statement is 338 assigned a unique negative #next value (below -2, since -1 and -2 are 339 for the two kinds of return). Then the post-checks for a given loop 340 test for the specific codes that refer to labels directly targetable 341 from that block. Otherwise, the generic 342 343 if #next < 0 { return false } 344 345 check handles stopping the next loop to get one step closer to the label. 346 347 For example 348 349 Top: print("start\n") 350 for range f { 351 for range g { 352 ... 353 for range h { 354 ... 355 goto Top 356 ... 357 } 358 } 359 } 360 361 becomes 362 363 Top: print("start\n") 364 { 365 var #next int 366 var #exit1 bool 367 f(func() { 368 if #exit1 { runtime.panicrangeexit() } 369 var #exit2 bool 370 g(func() { 371 if #exit2 { runtime.panicrangeexit() } 372 ... 373 var #exit3 bool 374 h(func() { 375 if #exit3 { runtime.panicrangeexit() } 376 ... 377 { 378 // goto Top 379 #next = -3 380 #exit1, #exit2, #exit3 = true, true, true 381 return false 382 } 383 ... 384 return true 385 }) 386 #exit3 = true 387 if #next < 0 { 388 return false 389 } 390 return true 391 }) 392 #exit2 = true 393 if #next < 0 { 394 return false 395 } 396 return true 397 }) 398 #exit1 = true 399 if #next == -3 { 400 #next = 0 401 goto Top 402 } 403 } 404 405 Labeled break/continue to non-range-over-funcs are handled the same 406 way as goto. 407 408 # Defers 409 410 The last wrinkle is handling defer statements. If we have 411 412 for range f { 413 defer print("A") 414 } 415 416 we cannot rewrite that into 417 418 f(func() { 419 defer print("A") 420 }) 421 422 because the deferred code will run at the end of the iteration, not 423 the end of the containing function. To fix that, the runtime provides 424 a special hook that lets us obtain a defer "token" representing the 425 outer function and then use it in a later defer to attach the deferred 426 code to that outer function. 427 428 Normally, 429 430 defer print("A") 431 432 compiles to 433 434 runtime.deferproc(func() { print("A") }) 435 436 This changes in a range-over-func. For example: 437 438 for range f { 439 defer print("A") 440 } 441 442 compiles to 443 444 var #defers = runtime.deferrangefunc() 445 f(func() { 446 runtime.deferprocat(func() { print("A") }, #defers) 447 }) 448 449 For this rewriting phase, we insert the explicit initialization of 450 #defers and then attach the #defers variable to the CallStmt 451 representing the defer. That variable will be propagated to the 452 backend and will cause the backend to compile the defer using 453 deferprocat instead of an ordinary deferproc. 454 455 TODO: Could call runtime.deferrangefuncend after f. 456 */ 457 package rangefunc 458 459 import ( 460 "github.com/shogo82148/std/cmd/compile/internal/syntax" 461 "github.com/shogo82148/std/cmd/compile/internal/types2" 462 ) 463 464 // Rewrite rewrites all the range-over-funcs in the files. 465 func Rewrite(pkg *types2.Package, info *types2.Info, files []*syntax.File)