github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/mgc0.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector. 6 7 #include "runtime.h" 8 #include "arch_GOARCH.h" 9 #include "malloc.h" 10 #include "stack.h" 11 #include "mgc0.h" 12 #include "race.h" 13 #include "type.h" 14 #include "typekind.h" 15 #include "funcdata.h" 16 #include "../../cmd/ld/textflag.h" 17 18 enum { 19 Debug = 0, 20 DebugMark = 0, // run second pass to check mark 21 CollectStats = 0, 22 ScanStackByFrames = 1, 23 IgnorePreciseGC = 0, 24 25 // Four bits per word (see #defines below). 26 wordsPerBitmapWord = sizeof(void*)*8/4, 27 bitShift = sizeof(void*)*8/4, 28 29 handoffThreshold = 4, 30 IntermediateBufferCapacity = 64, 31 32 // Bits in type information 33 PRECISE = 1, 34 LOOP = 2, 35 PC_BITS = PRECISE | LOOP, 36 37 // Pointer map 38 BitsPerPointer = 2, 39 BitsNoPointer = 0, 40 BitsPointer = 1, 41 BitsIface = 2, 42 BitsEface = 3, 43 }; 44 45 // Bits in per-word bitmap. 46 // #defines because enum might not be able to hold the values. 47 // 48 // Each word in the bitmap describes wordsPerBitmapWord words 49 // of heap memory. There are 4 bitmap bits dedicated to each heap word, 50 // so on a 64-bit system there is one bitmap word per 16 heap words. 51 // The bits in the word are packed together by type first, then by 52 // heap location, so each 64-bit bitmap word consists of, from top to bottom, 53 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, 54 // then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits. 55 // This layout makes it easier to iterate over the bits of a given type. 56 // 57 // The bitmap starts at mheap.arena_start and extends *backward* from 58 // there. On a 64-bit system the off'th word in the arena is tracked by 59 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system, 60 // the only difference is that the divisor is 8.) 61 // 62 // To pull out the bits corresponding to a given pointer p, we use: 63 // 64 // off = p - (uintptr*)mheap.arena_start; // word offset 65 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1; 66 // shift = off % wordsPerBitmapWord 67 // bits = *b >> shift; 68 // /* then test bits & bitAllocated, bits & bitMarked, etc. */ 69 // 70 #define bitAllocated ((uintptr)1<<(bitShift*0)) 71 #define bitNoScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ 72 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ 73 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ 74 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ 75 76 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) 77 78 // Holding worldsema grants an M the right to try to stop the world. 79 // The procedure is: 80 // 81 // runtime·semacquire(&runtime·worldsema); 82 // m->gcing = 1; 83 // runtime·stoptheworld(); 84 // 85 // ... do stuff ... 86 // 87 // m->gcing = 0; 88 // runtime·semrelease(&runtime·worldsema); 89 // runtime·starttheworld(); 90 // 91 uint32 runtime·worldsema = 1; 92 93 typedef struct Obj Obj; 94 struct Obj 95 { 96 byte *p; // data pointer 97 uintptr n; // size of data in bytes 98 uintptr ti; // type info 99 }; 100 101 // The size of Workbuf is N*PageSize. 102 typedef struct Workbuf Workbuf; 103 struct Workbuf 104 { 105 #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr)) 106 LFNode node; // must be first 107 uintptr nobj; 108 Obj obj[SIZE/sizeof(Obj) - 1]; 109 uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)]; 110 #undef SIZE 111 }; 112 113 typedef struct Finalizer Finalizer; 114 struct Finalizer 115 { 116 FuncVal *fn; 117 void *arg; 118 uintptr nret; 119 Type *fint; 120 PtrType *ot; 121 }; 122 123 typedef struct FinBlock FinBlock; 124 struct FinBlock 125 { 126 FinBlock *alllink; 127 FinBlock *next; 128 int32 cnt; 129 int32 cap; 130 Finalizer fin[1]; 131 }; 132 133 extern byte data[]; 134 extern byte edata[]; 135 extern byte bss[]; 136 extern byte ebss[]; 137 138 extern byte gcdata[]; 139 extern byte gcbss[]; 140 141 static G *fing; 142 static FinBlock *finq; // list of finalizers that are to be executed 143 static FinBlock *finc; // cache of free blocks 144 static FinBlock *allfin; // list of all blocks 145 static Lock finlock; 146 static int32 fingwait; 147 148 static void runfinq(void); 149 static Workbuf* getempty(Workbuf*); 150 static Workbuf* getfull(Workbuf*); 151 static void putempty(Workbuf*); 152 static Workbuf* handoff(Workbuf*); 153 static void gchelperstart(void); 154 155 static struct { 156 uint64 full; // lock-free list of full blocks 157 uint64 empty; // lock-free list of empty blocks 158 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait 159 uint32 nproc; 160 volatile uint32 nwait; 161 volatile uint32 ndone; 162 volatile uint32 debugmarkdone; 163 Note alldone; 164 ParFor *markfor; 165 ParFor *sweepfor; 166 167 Lock; 168 byte *chunk; 169 uintptr nchunk; 170 171 Obj *roots; 172 uint32 nroot; 173 uint32 rootcap; 174 } work; 175 176 enum { 177 GC_DEFAULT_PTR = GC_NUM_INSTR, 178 GC_CHAN, 179 180 GC_NUM_INSTR2 181 }; 182 183 static struct { 184 struct { 185 uint64 sum; 186 uint64 cnt; 187 } ptr; 188 uint64 nbytes; 189 struct { 190 uint64 sum; 191 uint64 cnt; 192 uint64 notype; 193 uint64 typelookup; 194 } obj; 195 uint64 rescan; 196 uint64 rescanbytes; 197 uint64 instr[GC_NUM_INSTR2]; 198 uint64 putempty; 199 uint64 getfull; 200 struct { 201 uint64 foundbit; 202 uint64 foundword; 203 uint64 foundspan; 204 } flushptrbuf; 205 struct { 206 uint64 foundbit; 207 uint64 foundword; 208 uint64 foundspan; 209 } markonly; 210 } gcstats; 211 212 // markonly marks an object. It returns true if the object 213 // has been marked by this function, false otherwise. 214 // This function doesn't append the object to any buffer. 215 static bool 216 markonly(void *obj) 217 { 218 byte *p; 219 uintptr *bitp, bits, shift, x, xbits, off, j; 220 MSpan *s; 221 PageID k; 222 223 // Words outside the arena cannot be pointers. 224 if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used) 225 return false; 226 227 // obj may be a pointer to a live object. 228 // Try to find the beginning of the object. 229 230 // Round down to word boundary. 231 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 232 233 // Find bits for this word. 234 off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; 235 bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 236 shift = off % wordsPerBitmapWord; 237 xbits = *bitp; 238 bits = xbits >> shift; 239 240 // Pointing at the beginning of a block? 241 if((bits & (bitAllocated|bitBlockBoundary)) != 0) { 242 if(CollectStats) 243 runtime·xadd64(&gcstats.markonly.foundbit, 1); 244 goto found; 245 } 246 247 // Pointing just past the beginning? 248 // Scan backward a little to find a block boundary. 249 for(j=shift; j-->0; ) { 250 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 251 shift = j; 252 bits = xbits>>shift; 253 if(CollectStats) 254 runtime·xadd64(&gcstats.markonly.foundword, 1); 255 goto found; 256 } 257 } 258 259 // Otherwise consult span table to find beginning. 260 // (Manually inlined copy of MHeap_LookupMaybe.) 261 k = (uintptr)obj>>PageShift; 262 x = k; 263 if(sizeof(void*) == 8) 264 x -= (uintptr)runtime·mheap.arena_start>>PageShift; 265 s = runtime·mheap.spans[x]; 266 if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) 267 return false; 268 p = (byte*)((uintptr)s->start<<PageShift); 269 if(s->sizeclass == 0) { 270 obj = p; 271 } else { 272 uintptr size = s->elemsize; 273 int32 i = ((byte*)obj - p)/size; 274 obj = p+i*size; 275 } 276 277 // Now that we know the object header, reload bits. 278 off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; 279 bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 280 shift = off % wordsPerBitmapWord; 281 xbits = *bitp; 282 bits = xbits >> shift; 283 if(CollectStats) 284 runtime·xadd64(&gcstats.markonly.foundspan, 1); 285 286 found: 287 // Now we have bits, bitp, and shift correct for 288 // obj pointing at the base of the object. 289 // Only care about allocated and not marked. 290 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 291 return false; 292 if(work.nproc == 1) 293 *bitp |= bitMarked<<shift; 294 else { 295 for(;;) { 296 x = *bitp; 297 if(x & (bitMarked<<shift)) 298 return false; 299 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 300 break; 301 } 302 } 303 304 // The object is now marked 305 return true; 306 } 307 308 // PtrTarget is a structure used by intermediate buffers. 309 // The intermediate buffers hold GC data before it 310 // is moved/flushed to the work buffer (Workbuf). 311 // The size of an intermediate buffer is very small, 312 // such as 32 or 64 elements. 313 typedef struct PtrTarget PtrTarget; 314 struct PtrTarget 315 { 316 void *p; 317 uintptr ti; 318 }; 319 320 typedef struct BufferList BufferList; 321 struct BufferList 322 { 323 PtrTarget ptrtarget[IntermediateBufferCapacity]; 324 Obj obj[IntermediateBufferCapacity]; 325 uint32 busy; 326 byte pad[CacheLineSize]; 327 }; 328 #pragma dataflag NOPTR 329 static BufferList bufferList[MaxGcproc]; 330 331 static Type *itabtype; 332 333 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); 334 335 // flushptrbuf moves data from the PtrTarget buffer to the work buffer. 336 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned, 337 // while the work buffer contains blocks which have been marked 338 // and are prepared to be scanned by the garbage collector. 339 // 340 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer. 341 // 342 // A simplified drawing explaining how the todo-list moves from a structure to another: 343 // 344 // scanblock 345 // (find pointers) 346 // Obj ------> PtrTarget (pointer targets) 347 // ↑ | 348 // | | 349 // `----------' 350 // flushptrbuf 351 // (find block start, mark and enqueue) 352 static void 353 flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) 354 { 355 byte *p, *arena_start, *obj; 356 uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n; 357 MSpan *s; 358 PageID k; 359 Obj *wp; 360 Workbuf *wbuf; 361 PtrTarget *ptrbuf_end; 362 363 arena_start = runtime·mheap.arena_start; 364 365 wp = *_wp; 366 wbuf = *_wbuf; 367 nobj = *_nobj; 368 369 ptrbuf_end = *ptrbufpos; 370 n = ptrbuf_end - ptrbuf; 371 *ptrbufpos = ptrbuf; 372 373 if(CollectStats) { 374 runtime·xadd64(&gcstats.ptr.sum, n); 375 runtime·xadd64(&gcstats.ptr.cnt, 1); 376 } 377 378 // If buffer is nearly full, get a new one. 379 if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) { 380 if(wbuf != nil) 381 wbuf->nobj = nobj; 382 wbuf = getempty(wbuf); 383 wp = wbuf->obj; 384 nobj = 0; 385 386 if(n >= nelem(wbuf->obj)) 387 runtime·throw("ptrbuf has to be smaller than WorkBuf"); 388 } 389 390 // TODO(atom): This block is a branch of an if-then-else statement. 391 // The single-threaded branch may be added in a next CL. 392 { 393 // Multi-threaded version. 394 395 while(ptrbuf < ptrbuf_end) { 396 obj = ptrbuf->p; 397 ti = ptrbuf->ti; 398 ptrbuf++; 399 400 // obj belongs to interval [mheap.arena_start, mheap.arena_used). 401 if(Debug > 1) { 402 if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used) 403 runtime·throw("object is outside of mheap"); 404 } 405 406 // obj may be a pointer to a live object. 407 // Try to find the beginning of the object. 408 409 // Round down to word boundary. 410 if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) { 411 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 412 ti = 0; 413 } 414 415 // Find bits for this word. 416 off = (uintptr*)obj - (uintptr*)arena_start; 417 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 418 shift = off % wordsPerBitmapWord; 419 xbits = *bitp; 420 bits = xbits >> shift; 421 422 // Pointing at the beginning of a block? 423 if((bits & (bitAllocated|bitBlockBoundary)) != 0) { 424 if(CollectStats) 425 runtime·xadd64(&gcstats.flushptrbuf.foundbit, 1); 426 goto found; 427 } 428 429 ti = 0; 430 431 // Pointing just past the beginning? 432 // Scan backward a little to find a block boundary. 433 for(j=shift; j-->0; ) { 434 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 435 obj = (byte*)obj - (shift-j)*PtrSize; 436 shift = j; 437 bits = xbits>>shift; 438 if(CollectStats) 439 runtime·xadd64(&gcstats.flushptrbuf.foundword, 1); 440 goto found; 441 } 442 } 443 444 // Otherwise consult span table to find beginning. 445 // (Manually inlined copy of MHeap_LookupMaybe.) 446 k = (uintptr)obj>>PageShift; 447 x = k; 448 if(sizeof(void*) == 8) 449 x -= (uintptr)arena_start>>PageShift; 450 s = runtime·mheap.spans[x]; 451 if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) 452 continue; 453 p = (byte*)((uintptr)s->start<<PageShift); 454 if(s->sizeclass == 0) { 455 obj = p; 456 } else { 457 size = s->elemsize; 458 int32 i = ((byte*)obj - p)/size; 459 obj = p+i*size; 460 } 461 462 // Now that we know the object header, reload bits. 463 off = (uintptr*)obj - (uintptr*)arena_start; 464 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 465 shift = off % wordsPerBitmapWord; 466 xbits = *bitp; 467 bits = xbits >> shift; 468 if(CollectStats) 469 runtime·xadd64(&gcstats.flushptrbuf.foundspan, 1); 470 471 found: 472 // Now we have bits, bitp, and shift correct for 473 // obj pointing at the base of the object. 474 // Only care about allocated and not marked. 475 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 476 continue; 477 if(work.nproc == 1) 478 *bitp |= bitMarked<<shift; 479 else { 480 for(;;) { 481 x = *bitp; 482 if(x & (bitMarked<<shift)) 483 goto continue_obj; 484 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 485 break; 486 } 487 } 488 489 // If object has no pointers, don't need to scan further. 490 if((bits & bitNoScan) != 0) 491 continue; 492 493 // Ask span about size class. 494 // (Manually inlined copy of MHeap_Lookup.) 495 x = (uintptr)obj >> PageShift; 496 if(sizeof(void*) == 8) 497 x -= (uintptr)arena_start>>PageShift; 498 s = runtime·mheap.spans[x]; 499 500 PREFETCH(obj); 501 502 *wp = (Obj){obj, s->elemsize, ti}; 503 wp++; 504 nobj++; 505 continue_obj:; 506 } 507 508 // If another proc wants a pointer, give it some. 509 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 510 wbuf->nobj = nobj; 511 wbuf = handoff(wbuf); 512 nobj = wbuf->nobj; 513 wp = wbuf->obj + nobj; 514 } 515 } 516 517 *_wp = wp; 518 *_wbuf = wbuf; 519 *_nobj = nobj; 520 } 521 522 static void 523 flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) 524 { 525 uintptr nobj, off; 526 Obj *wp, obj; 527 Workbuf *wbuf; 528 Obj *objbuf_end; 529 530 wp = *_wp; 531 wbuf = *_wbuf; 532 nobj = *_nobj; 533 534 objbuf_end = *objbufpos; 535 *objbufpos = objbuf; 536 537 while(objbuf < objbuf_end) { 538 obj = *objbuf++; 539 540 // Align obj.b to a word boundary. 541 off = (uintptr)obj.p & (PtrSize-1); 542 if(off != 0) { 543 obj.p += PtrSize - off; 544 obj.n -= PtrSize - off; 545 obj.ti = 0; 546 } 547 548 if(obj.p == nil || obj.n == 0) 549 continue; 550 551 // If buffer is full, get a new one. 552 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 553 if(wbuf != nil) 554 wbuf->nobj = nobj; 555 wbuf = getempty(wbuf); 556 wp = wbuf->obj; 557 nobj = 0; 558 } 559 560 *wp = obj; 561 wp++; 562 nobj++; 563 } 564 565 // If another proc wants a pointer, give it some. 566 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 567 wbuf->nobj = nobj; 568 wbuf = handoff(wbuf); 569 nobj = wbuf->nobj; 570 wp = wbuf->obj + nobj; 571 } 572 573 *_wp = wp; 574 *_wbuf = wbuf; 575 *_nobj = nobj; 576 } 577 578 // Program that scans the whole block and treats every block element as a potential pointer 579 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; 580 581 // Hchan program 582 static uintptr chanProg[2] = {0, GC_CHAN}; 583 584 // Local variables of a program fragment or loop 585 typedef struct Frame Frame; 586 struct Frame { 587 uintptr count, elemsize, b; 588 uintptr *loop_or_ret; 589 }; 590 591 // Sanity check for the derived type info objti. 592 static void 593 checkptr(void *obj, uintptr objti) 594 { 595 uintptr *pc1, *pc2, type, tisize, i, j, x; 596 byte *objstart; 597 Type *t; 598 MSpan *s; 599 600 if(!Debug) 601 runtime·throw("checkptr is debug only"); 602 603 if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used) 604 return; 605 type = runtime·gettype(obj); 606 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 607 if(t == nil) 608 return; 609 x = (uintptr)obj >> PageShift; 610 if(sizeof(void*) == 8) 611 x -= (uintptr)(runtime·mheap.arena_start)>>PageShift; 612 s = runtime·mheap.spans[x]; 613 objstart = (byte*)((uintptr)s->start<<PageShift); 614 if(s->sizeclass != 0) { 615 i = ((byte*)obj - objstart)/s->elemsize; 616 objstart += i*s->elemsize; 617 } 618 tisize = *(uintptr*)objti; 619 // Sanity check for object size: it should fit into the memory block. 620 if((byte*)obj + tisize > objstart + s->elemsize) { 621 runtime·printf("object of type '%S' at %p/%p does not fit in block %p/%p\n", 622 *t->string, obj, tisize, objstart, s->elemsize); 623 runtime·throw("invalid gc type info"); 624 } 625 if(obj != objstart) 626 return; 627 // If obj points to the beginning of the memory block, 628 // check type info as well. 629 if(t->string == nil || 630 // Gob allocates unsafe pointers for indirection. 631 (runtime·strcmp(t->string->str, (byte*)"unsafe.Pointer") && 632 // Runtime and gc think differently about closures. 633 runtime·strstr(t->string->str, (byte*)"struct { F uintptr") != t->string->str)) { 634 pc1 = (uintptr*)objti; 635 pc2 = (uintptr*)t->gc; 636 // A simple best-effort check until first GC_END. 637 for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) { 638 if(pc1[j] != pc2[j]) { 639 runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n", 640 t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]); 641 runtime·throw("invalid gc type info"); 642 } 643 } 644 } 645 } 646 647 // scanblock scans a block of n bytes starting at pointer b for references 648 // to other objects, scanning any it finds recursively until there are no 649 // unscanned objects left. Instead of using an explicit recursion, it keeps 650 // a work list in the Workbuf* structures and loops in the main function 651 // body. Keeping an explicit work list is easier on the stack allocator and 652 // more efficient. 653 // 654 // wbuf: current work buffer 655 // wp: storage for next queued pointer (write pointer) 656 // nobj: number of queued objects 657 static void 658 scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) 659 { 660 byte *b, *arena_start, *arena_used; 661 uintptr n, i, end_b, elemsize, size, ti, objti, count, type; 662 uintptr *pc, precise_type, nominal_size; 663 uintptr *chan_ret, chancap; 664 void *obj; 665 Type *t; 666 Slice *sliceptr; 667 Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4]; 668 BufferList *scanbuffers; 669 PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos; 670 Obj *objbuf, *objbuf_end, *objbufpos; 671 Eface *eface; 672 Iface *iface; 673 Hchan *chan; 674 ChanType *chantype; 675 676 if(sizeof(Workbuf) % PageSize != 0) 677 runtime·throw("scanblock: size of Workbuf is suboptimal"); 678 679 // Memory arena parameters. 680 arena_start = runtime·mheap.arena_start; 681 arena_used = runtime·mheap.arena_used; 682 683 stack_ptr = stack+nelem(stack)-1; 684 685 precise_type = false; 686 nominal_size = 0; 687 688 // Allocate ptrbuf 689 { 690 scanbuffers = &bufferList[m->helpgc]; 691 ptrbuf = &scanbuffers->ptrtarget[0]; 692 ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget); 693 objbuf = &scanbuffers->obj[0]; 694 objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj); 695 } 696 697 ptrbufpos = ptrbuf; 698 objbufpos = objbuf; 699 700 // (Silence the compiler) 701 chan = nil; 702 chantype = nil; 703 chan_ret = nil; 704 705 goto next_block; 706 707 for(;;) { 708 // Each iteration scans the block b of length n, queueing pointers in 709 // the work buffer. 710 if(Debug > 1) { 711 runtime·printf("scanblock %p %D\n", b, (int64)n); 712 } 713 714 if(CollectStats) { 715 runtime·xadd64(&gcstats.nbytes, n); 716 runtime·xadd64(&gcstats.obj.sum, nobj); 717 runtime·xadd64(&gcstats.obj.cnt, 1); 718 } 719 720 if(ti != 0) { 721 pc = (uintptr*)(ti & ~(uintptr)PC_BITS); 722 precise_type = (ti & PRECISE); 723 stack_top.elemsize = pc[0]; 724 if(!precise_type) 725 nominal_size = pc[0]; 726 if(ti & LOOP) { 727 stack_top.count = 0; // 0 means an infinite number of iterations 728 stack_top.loop_or_ret = pc+1; 729 } else { 730 stack_top.count = 1; 731 } 732 if(Debug) { 733 // Simple sanity check for provided type info ti: 734 // The declared size of the object must be not larger than the actual size 735 // (it can be smaller due to inferior pointers). 736 // It's difficult to make a comprehensive check due to inferior pointers, 737 // reflection, gob, etc. 738 if(pc[0] > n) { 739 runtime·printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n); 740 runtime·throw("invalid gc type info"); 741 } 742 } 743 } else if(UseSpanType) { 744 if(CollectStats) 745 runtime·xadd64(&gcstats.obj.notype, 1); 746 747 type = runtime·gettype(b); 748 if(type != 0) { 749 if(CollectStats) 750 runtime·xadd64(&gcstats.obj.typelookup, 1); 751 752 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 753 switch(type & (PtrSize-1)) { 754 case TypeInfo_SingleObject: 755 pc = (uintptr*)t->gc; 756 precise_type = true; // type information about 'b' is precise 757 stack_top.count = 1; 758 stack_top.elemsize = pc[0]; 759 break; 760 case TypeInfo_Array: 761 pc = (uintptr*)t->gc; 762 if(pc[0] == 0) 763 goto next_block; 764 precise_type = true; // type information about 'b' is precise 765 stack_top.count = 0; // 0 means an infinite number of iterations 766 stack_top.elemsize = pc[0]; 767 stack_top.loop_or_ret = pc+1; 768 break; 769 case TypeInfo_Chan: 770 chan = (Hchan*)b; 771 chantype = (ChanType*)t; 772 chan_ret = nil; 773 pc = chanProg; 774 break; 775 default: 776 runtime·throw("scanblock: invalid type"); 777 return; 778 } 779 } else { 780 pc = defaultProg; 781 } 782 } else { 783 pc = defaultProg; 784 } 785 786 if(IgnorePreciseGC) 787 pc = defaultProg; 788 789 pc++; 790 stack_top.b = (uintptr)b; 791 792 end_b = (uintptr)b + n - PtrSize; 793 794 for(;;) { 795 if(CollectStats) 796 runtime·xadd64(&gcstats.instr[pc[0]], 1); 797 798 obj = nil; 799 objti = 0; 800 switch(pc[0]) { 801 case GC_PTR: 802 obj = *(void**)(stack_top.b + pc[1]); 803 objti = pc[2]; 804 pc += 3; 805 if(Debug) 806 checkptr(obj, objti); 807 break; 808 809 case GC_SLICE: 810 sliceptr = (Slice*)(stack_top.b + pc[1]); 811 if(sliceptr->cap != 0) { 812 obj = sliceptr->array; 813 // Can't use slice element type for scanning, 814 // because if it points to an array embedded 815 // in the beginning of a struct, 816 // we will scan the whole struct as the slice. 817 // So just obtain type info from heap. 818 } 819 pc += 3; 820 break; 821 822 case GC_APTR: 823 obj = *(void**)(stack_top.b + pc[1]); 824 pc += 2; 825 break; 826 827 case GC_STRING: 828 obj = *(void**)(stack_top.b + pc[1]); 829 markonly(obj); 830 pc += 2; 831 continue; 832 833 case GC_EFACE: 834 eface = (Eface*)(stack_top.b + pc[1]); 835 pc += 2; 836 if(eface->type == nil) 837 continue; 838 839 // eface->type 840 t = eface->type; 841 if((void*)t >= arena_start && (void*)t < arena_used) { 842 *ptrbufpos++ = (PtrTarget){t, 0}; 843 if(ptrbufpos == ptrbuf_end) 844 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 845 } 846 847 // eface->data 848 if(eface->data >= arena_start && eface->data < arena_used) { 849 if(t->size <= sizeof(void*)) { 850 if((t->kind & KindNoPointers)) 851 continue; 852 853 obj = eface->data; 854 if((t->kind & ~KindNoPointers) == KindPtr) 855 objti = (uintptr)((PtrType*)t)->elem->gc; 856 } else { 857 obj = eface->data; 858 objti = (uintptr)t->gc; 859 } 860 } 861 break; 862 863 case GC_IFACE: 864 iface = (Iface*)(stack_top.b + pc[1]); 865 pc += 2; 866 if(iface->tab == nil) 867 continue; 868 869 // iface->tab 870 if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) { 871 *ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc}; 872 if(ptrbufpos == ptrbuf_end) 873 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 874 } 875 876 // iface->data 877 if(iface->data >= arena_start && iface->data < arena_used) { 878 t = iface->tab->type; 879 if(t->size <= sizeof(void*)) { 880 if((t->kind & KindNoPointers)) 881 continue; 882 883 obj = iface->data; 884 if((t->kind & ~KindNoPointers) == KindPtr) 885 objti = (uintptr)((PtrType*)t)->elem->gc; 886 } else { 887 obj = iface->data; 888 objti = (uintptr)t->gc; 889 } 890 } 891 break; 892 893 case GC_DEFAULT_PTR: 894 while(stack_top.b <= end_b) { 895 obj = *(byte**)stack_top.b; 896 stack_top.b += PtrSize; 897 if(obj >= arena_start && obj < arena_used) { 898 *ptrbufpos++ = (PtrTarget){obj, 0}; 899 if(ptrbufpos == ptrbuf_end) 900 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 901 } 902 } 903 goto next_block; 904 905 case GC_END: 906 if(--stack_top.count != 0) { 907 // Next iteration of a loop if possible. 908 stack_top.b += stack_top.elemsize; 909 if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) { 910 pc = stack_top.loop_or_ret; 911 continue; 912 } 913 i = stack_top.b; 914 } else { 915 // Stack pop if possible. 916 if(stack_ptr+1 < stack+nelem(stack)) { 917 pc = stack_top.loop_or_ret; 918 stack_top = *(++stack_ptr); 919 continue; 920 } 921 i = (uintptr)b + nominal_size; 922 } 923 if(!precise_type) { 924 // Quickly scan [b+i,b+n) for possible pointers. 925 for(; i<=end_b; i+=PtrSize) { 926 if(*(byte**)i != nil) { 927 // Found a value that may be a pointer. 928 // Do a rescan of the entire block. 929 enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj); 930 if(CollectStats) { 931 runtime·xadd64(&gcstats.rescan, 1); 932 runtime·xadd64(&gcstats.rescanbytes, n); 933 } 934 break; 935 } 936 } 937 } 938 goto next_block; 939 940 case GC_ARRAY_START: 941 i = stack_top.b + pc[1]; 942 count = pc[2]; 943 elemsize = pc[3]; 944 pc += 4; 945 946 // Stack push. 947 *stack_ptr-- = stack_top; 948 stack_top = (Frame){count, elemsize, i, pc}; 949 continue; 950 951 case GC_ARRAY_NEXT: 952 if(--stack_top.count != 0) { 953 stack_top.b += stack_top.elemsize; 954 pc = stack_top.loop_or_ret; 955 } else { 956 // Stack pop. 957 stack_top = *(++stack_ptr); 958 pc += 1; 959 } 960 continue; 961 962 case GC_CALL: 963 // Stack push. 964 *stack_ptr-- = stack_top; 965 stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/}; 966 pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction 967 continue; 968 969 case GC_REGION: 970 obj = (void*)(stack_top.b + pc[1]); 971 size = pc[2]; 972 objti = pc[3]; 973 pc += 4; 974 975 *objbufpos++ = (Obj){obj, size, objti}; 976 if(objbufpos == objbuf_end) 977 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 978 continue; 979 980 case GC_CHAN_PTR: 981 chan = *(Hchan**)(stack_top.b + pc[1]); 982 if(chan == nil) { 983 pc += 3; 984 continue; 985 } 986 if(markonly(chan)) { 987 chantype = (ChanType*)pc[2]; 988 if(!(chantype->elem->kind & KindNoPointers)) { 989 // Start chanProg. 990 chan_ret = pc+3; 991 pc = chanProg+1; 992 continue; 993 } 994 } 995 pc += 3; 996 continue; 997 998 case GC_CHAN: 999 // There are no heap pointers in struct Hchan, 1000 // so we can ignore the leading sizeof(Hchan) bytes. 1001 if(!(chantype->elem->kind & KindNoPointers)) { 1002 // Channel's buffer follows Hchan immediately in memory. 1003 // Size of buffer (cap(c)) is second int in the chan struct. 1004 chancap = ((uintgo*)chan)[1]; 1005 if(chancap > 0) { 1006 // TODO(atom): split into two chunks so that only the 1007 // in-use part of the circular buffer is scanned. 1008 // (Channel routines zero the unused part, so the current 1009 // code does not lead to leaks, it's just a little inefficient.) 1010 *objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, chancap*chantype->elem->size, 1011 (uintptr)chantype->elem->gc | PRECISE | LOOP}; 1012 if(objbufpos == objbuf_end) 1013 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1014 } 1015 } 1016 if(chan_ret == nil) 1017 goto next_block; 1018 pc = chan_ret; 1019 continue; 1020 1021 default: 1022 runtime·throw("scanblock: invalid GC instruction"); 1023 return; 1024 } 1025 1026 if(obj >= arena_start && obj < arena_used) { 1027 *ptrbufpos++ = (PtrTarget){obj, objti}; 1028 if(ptrbufpos == ptrbuf_end) 1029 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 1030 } 1031 } 1032 1033 next_block: 1034 // Done scanning [b, b+n). Prepare for the next iteration of 1035 // the loop by setting b, n, ti to the parameters for the next block. 1036 1037 if(nobj == 0) { 1038 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 1039 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1040 1041 if(nobj == 0) { 1042 if(!keepworking) { 1043 if(wbuf) 1044 putempty(wbuf); 1045 goto endscan; 1046 } 1047 // Emptied our buffer: refill. 1048 wbuf = getfull(wbuf); 1049 if(wbuf == nil) 1050 goto endscan; 1051 nobj = wbuf->nobj; 1052 wp = wbuf->obj + wbuf->nobj; 1053 } 1054 } 1055 1056 // Fetch b from the work buffer. 1057 --wp; 1058 b = wp->p; 1059 n = wp->n; 1060 ti = wp->ti; 1061 nobj--; 1062 } 1063 1064 endscan:; 1065 } 1066 1067 // debug_scanblock is the debug copy of scanblock. 1068 // it is simpler, slower, single-threaded, recursive, 1069 // and uses bitSpecial as the mark bit. 1070 static void 1071 debug_scanblock(byte *b, uintptr n) 1072 { 1073 byte *obj, *p; 1074 void **vp; 1075 uintptr size, *bitp, bits, shift, i, xbits, off; 1076 MSpan *s; 1077 1078 if(!DebugMark) 1079 runtime·throw("debug_scanblock without DebugMark"); 1080 1081 if((intptr)n < 0) { 1082 runtime·printf("debug_scanblock %p %D\n", b, (int64)n); 1083 runtime·throw("debug_scanblock"); 1084 } 1085 1086 // Align b to a word boundary. 1087 off = (uintptr)b & (PtrSize-1); 1088 if(off != 0) { 1089 b += PtrSize - off; 1090 n -= PtrSize - off; 1091 } 1092 1093 vp = (void**)b; 1094 n /= PtrSize; 1095 for(i=0; i<n; i++) { 1096 obj = (byte*)vp[i]; 1097 1098 // Words outside the arena cannot be pointers. 1099 if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used) 1100 continue; 1101 1102 // Round down to word boundary. 1103 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 1104 1105 // Consult span table to find beginning. 1106 s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj); 1107 if(s == nil) 1108 continue; 1109 1110 p = (byte*)((uintptr)s->start<<PageShift); 1111 size = s->elemsize; 1112 if(s->sizeclass == 0) { 1113 obj = p; 1114 } else { 1115 int32 i = ((byte*)obj - p)/size; 1116 obj = p+i*size; 1117 } 1118 1119 // Now that we know the object header, reload bits. 1120 off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; 1121 bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 1122 shift = off % wordsPerBitmapWord; 1123 xbits = *bitp; 1124 bits = xbits >> shift; 1125 1126 // Now we have bits, bitp, and shift correct for 1127 // obj pointing at the base of the object. 1128 // If not allocated or already marked, done. 1129 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked 1130 continue; 1131 *bitp |= bitSpecial<<shift; 1132 if(!(bits & bitMarked)) 1133 runtime·printf("found unmarked block %p in %p\n", obj, vp+i); 1134 1135 // If object has no pointers, don't need to scan further. 1136 if((bits & bitNoScan) != 0) 1137 continue; 1138 1139 debug_scanblock(obj, size); 1140 } 1141 } 1142 1143 // Append obj to the work buffer. 1144 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer. 1145 static void 1146 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj) 1147 { 1148 uintptr nobj, off; 1149 Obj *wp; 1150 Workbuf *wbuf; 1151 1152 if(Debug > 1) 1153 runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti); 1154 1155 // Align obj.b to a word boundary. 1156 off = (uintptr)obj.p & (PtrSize-1); 1157 if(off != 0) { 1158 obj.p += PtrSize - off; 1159 obj.n -= PtrSize - off; 1160 obj.ti = 0; 1161 } 1162 1163 if(obj.p == nil || obj.n == 0) 1164 return; 1165 1166 // Load work buffer state 1167 wp = *_wp; 1168 wbuf = *_wbuf; 1169 nobj = *_nobj; 1170 1171 // If another proc wants a pointer, give it some. 1172 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 1173 wbuf->nobj = nobj; 1174 wbuf = handoff(wbuf); 1175 nobj = wbuf->nobj; 1176 wp = wbuf->obj + nobj; 1177 } 1178 1179 // If buffer is full, get a new one. 1180 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 1181 if(wbuf != nil) 1182 wbuf->nobj = nobj; 1183 wbuf = getempty(wbuf); 1184 wp = wbuf->obj; 1185 nobj = 0; 1186 } 1187 1188 *wp = obj; 1189 wp++; 1190 nobj++; 1191 1192 // Save work buffer state 1193 *_wp = wp; 1194 *_wbuf = wbuf; 1195 *_nobj = nobj; 1196 } 1197 1198 static void 1199 markroot(ParFor *desc, uint32 i) 1200 { 1201 Obj *wp; 1202 Workbuf *wbuf; 1203 uintptr nobj; 1204 1205 USED(&desc); 1206 wp = nil; 1207 wbuf = nil; 1208 nobj = 0; 1209 enqueue(work.roots[i], &wbuf, &wp, &nobj); 1210 scanblock(wbuf, wp, nobj, false); 1211 } 1212 1213 // Get an empty work buffer off the work.empty list, 1214 // allocating new buffers as needed. 1215 static Workbuf* 1216 getempty(Workbuf *b) 1217 { 1218 if(b != nil) 1219 runtime·lfstackpush(&work.full, &b->node); 1220 b = (Workbuf*)runtime·lfstackpop(&work.empty); 1221 if(b == nil) { 1222 // Need to allocate. 1223 runtime·lock(&work); 1224 if(work.nchunk < sizeof *b) { 1225 work.nchunk = 1<<20; 1226 work.chunk = runtime·SysAlloc(work.nchunk); 1227 if(work.chunk == nil) 1228 runtime·throw("runtime: cannot allocate memory"); 1229 } 1230 b = (Workbuf*)work.chunk; 1231 work.chunk += sizeof *b; 1232 work.nchunk -= sizeof *b; 1233 runtime·unlock(&work); 1234 } 1235 b->nobj = 0; 1236 return b; 1237 } 1238 1239 static void 1240 putempty(Workbuf *b) 1241 { 1242 if(CollectStats) 1243 runtime·xadd64(&gcstats.putempty, 1); 1244 1245 runtime·lfstackpush(&work.empty, &b->node); 1246 } 1247 1248 // Get a full work buffer off the work.full list, or return nil. 1249 static Workbuf* 1250 getfull(Workbuf *b) 1251 { 1252 int32 i; 1253 1254 if(CollectStats) 1255 runtime·xadd64(&gcstats.getfull, 1); 1256 1257 if(b != nil) 1258 runtime·lfstackpush(&work.empty, &b->node); 1259 b = (Workbuf*)runtime·lfstackpop(&work.full); 1260 if(b != nil || work.nproc == 1) 1261 return b; 1262 1263 runtime·xadd(&work.nwait, +1); 1264 for(i=0;; i++) { 1265 if(work.full != 0) { 1266 runtime·xadd(&work.nwait, -1); 1267 b = (Workbuf*)runtime·lfstackpop(&work.full); 1268 if(b != nil) 1269 return b; 1270 runtime·xadd(&work.nwait, +1); 1271 } 1272 if(work.nwait == work.nproc) 1273 return nil; 1274 if(i < 10) { 1275 m->gcstats.nprocyield++; 1276 runtime·procyield(20); 1277 } else if(i < 20) { 1278 m->gcstats.nosyield++; 1279 runtime·osyield(); 1280 } else { 1281 m->gcstats.nsleep++; 1282 runtime·usleep(100); 1283 } 1284 } 1285 } 1286 1287 static Workbuf* 1288 handoff(Workbuf *b) 1289 { 1290 int32 n; 1291 Workbuf *b1; 1292 1293 // Make new buffer with half of b's pointers. 1294 b1 = getempty(nil); 1295 n = b->nobj/2; 1296 b->nobj -= n; 1297 b1->nobj = n; 1298 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); 1299 m->gcstats.nhandoff++; 1300 m->gcstats.nhandoffcnt += n; 1301 1302 // Put b on full list - let first half of b get stolen. 1303 runtime·lfstackpush(&work.full, &b->node); 1304 return b1; 1305 } 1306 1307 static void 1308 addroot(Obj obj) 1309 { 1310 uint32 cap; 1311 Obj *new; 1312 1313 if(work.nroot >= work.rootcap) { 1314 cap = PageSize/sizeof(Obj); 1315 if(cap < 2*work.rootcap) 1316 cap = 2*work.rootcap; 1317 new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj)); 1318 if(new == nil) 1319 runtime·throw("runtime: cannot allocate memory"); 1320 if(work.roots != nil) { 1321 runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj)); 1322 runtime·SysFree(work.roots, work.rootcap*sizeof(Obj)); 1323 } 1324 work.roots = new; 1325 work.rootcap = cap; 1326 } 1327 work.roots[work.nroot] = obj; 1328 work.nroot++; 1329 } 1330 1331 extern byte pclntab[]; // base for f->ptrsoff 1332 1333 typedef struct BitVector BitVector; 1334 struct BitVector 1335 { 1336 int32 n; 1337 uint32 data[]; 1338 }; 1339 1340 // Scans an interface data value when the interface type indicates 1341 // that it is a pointer. 1342 static void 1343 scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue) 1344 { 1345 Itab *tab; 1346 Type *type; 1347 1348 if(afterprologue) { 1349 if(bits == BitsIface) { 1350 tab = *(Itab**)scanp; 1351 if(tab->type->size <= sizeof(void*) && (tab->type->kind & KindNoPointers)) 1352 return; 1353 } else { // bits == BitsEface 1354 type = *(Type**)scanp; 1355 if(type->size <= sizeof(void*) && (type->kind & KindNoPointers)) 1356 return; 1357 } 1358 } 1359 addroot((Obj){scanp+PtrSize, PtrSize, 0}); 1360 } 1361 1362 // Starting from scanp, scans words corresponding to set bits. 1363 static void 1364 scanbitvector(byte *scanp, BitVector *bv, bool afterprologue) 1365 { 1366 uintptr word, bits; 1367 uint32 *wordp; 1368 int32 i, remptrs; 1369 1370 wordp = bv->data; 1371 for(remptrs = bv->n; remptrs > 0; remptrs -= 32) { 1372 word = *wordp++; 1373 if(remptrs < 32) 1374 i = remptrs; 1375 else 1376 i = 32; 1377 i /= BitsPerPointer; 1378 for(; i > 0; i--) { 1379 bits = word & 3; 1380 if(bits != BitsNoPointer && *(void**)scanp != nil) 1381 if(bits == BitsPointer) 1382 addroot((Obj){scanp, PtrSize, 0}); 1383 else 1384 scaninterfacedata(bits, scanp, afterprologue); 1385 word >>= BitsPerPointer; 1386 scanp += PtrSize; 1387 } 1388 } 1389 } 1390 1391 // Scan a stack frame: local variables and function arguments/results. 1392 static void 1393 addframeroots(Stkframe *frame, void*) 1394 { 1395 Func *f; 1396 BitVector *args, *locals; 1397 uintptr size; 1398 bool afterprologue; 1399 1400 f = frame->fn; 1401 1402 // Scan local variables if stack frame has been allocated. 1403 // Use pointer information if known. 1404 afterprologue = (frame->varp > (byte*)frame->sp); 1405 if(afterprologue) { 1406 locals = runtime·funcdata(f, FUNCDATA_GCLocals); 1407 if(locals == nil) { 1408 // No locals information, scan everything. 1409 size = frame->varp - (byte*)frame->sp; 1410 addroot((Obj){frame->varp - size, size, 0}); 1411 } else if(locals->n < 0) { 1412 // Locals size information, scan just the 1413 // locals. 1414 size = -locals->n; 1415 addroot((Obj){frame->varp - size, size, 0}); 1416 } else if(locals->n > 0) { 1417 // Locals bitmap information, scan just the 1418 // pointers in locals. 1419 size = (locals->n*PtrSize) / BitsPerPointer; 1420 scanbitvector(frame->varp - size, locals, afterprologue); 1421 } 1422 } 1423 1424 // Scan arguments. 1425 // Use pointer information if known. 1426 args = runtime·funcdata(f, FUNCDATA_GCArgs); 1427 if(args != nil && args->n > 0) 1428 scanbitvector(frame->argp, args, false); 1429 else 1430 addroot((Obj){frame->argp, frame->arglen, 0}); 1431 } 1432 1433 static void 1434 addstackroots(G *gp) 1435 { 1436 M *mp; 1437 int32 n; 1438 Stktop *stk; 1439 uintptr sp, guard, pc, lr; 1440 void *base; 1441 uintptr size; 1442 1443 stk = (Stktop*)gp->stackbase; 1444 guard = gp->stackguard; 1445 1446 if(gp == g) 1447 runtime·throw("can't scan our own stack"); 1448 if((mp = gp->m) != nil && mp->helpgc) 1449 runtime·throw("can't scan gchelper stack"); 1450 if(gp->syscallstack != (uintptr)nil) { 1451 // Scanning another goroutine that is about to enter or might 1452 // have just exited a system call. It may be executing code such 1453 // as schedlock and may have needed to start a new stack segment. 1454 // Use the stack segment and stack pointer at the time of 1455 // the system call instead, since that won't change underfoot. 1456 sp = gp->syscallsp; 1457 pc = gp->syscallpc; 1458 lr = 0; 1459 stk = (Stktop*)gp->syscallstack; 1460 guard = gp->syscallguard; 1461 } else { 1462 // Scanning another goroutine's stack. 1463 // The goroutine is usually asleep (the world is stopped). 1464 sp = gp->sched.sp; 1465 pc = gp->sched.pc; 1466 lr = gp->sched.lr; 1467 1468 // For function about to start, context argument is a root too. 1469 if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil)) 1470 addroot((Obj){base, size, 0}); 1471 } 1472 if(ScanStackByFrames) { 1473 USED(stk); 1474 USED(guard); 1475 runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, addframeroots, nil, false); 1476 } else { 1477 USED(pc); 1478 n = 0; 1479 while(stk) { 1480 if(sp < guard-StackGuard || (uintptr)stk < sp) { 1481 runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); 1482 runtime·throw("scanstack"); 1483 } 1484 addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP}); 1485 sp = stk->gobuf.sp; 1486 guard = stk->stackguard; 1487 stk = (Stktop*)stk->stackbase; 1488 n++; 1489 } 1490 } 1491 } 1492 1493 static void 1494 addfinroots(void *v) 1495 { 1496 uintptr size; 1497 void *base; 1498 1499 size = 0; 1500 if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base)) 1501 runtime·throw("mark - finalizer inconsistency"); 1502 1503 // do not mark the finalizer block itself. just mark the things it points at. 1504 addroot((Obj){base, size, 0}); 1505 } 1506 1507 static void 1508 addroots(void) 1509 { 1510 G *gp; 1511 FinBlock *fb; 1512 MSpan *s, **allspans; 1513 uint32 spanidx; 1514 1515 work.nroot = 0; 1516 1517 // data & bss 1518 // TODO(atom): load balancing 1519 addroot((Obj){data, edata - data, (uintptr)gcdata}); 1520 addroot((Obj){bss, ebss - bss, (uintptr)gcbss}); 1521 1522 // MSpan.types 1523 allspans = runtime·mheap.allspans; 1524 for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) { 1525 s = allspans[spanidx]; 1526 if(s->state == MSpanInUse) { 1527 // The garbage collector ignores type pointers stored in MSpan.types: 1528 // - Compiler-generated types are stored outside of heap. 1529 // - The reflect package has runtime-generated types cached in its data structures. 1530 // The garbage collector relies on finding the references via that cache. 1531 switch(s->types.compression) { 1532 case MTypes_Empty: 1533 case MTypes_Single: 1534 break; 1535 case MTypes_Words: 1536 case MTypes_Bytes: 1537 markonly((byte*)s->types.data); 1538 break; 1539 } 1540 } 1541 } 1542 1543 // stacks 1544 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { 1545 switch(gp->status){ 1546 default: 1547 runtime·printf("unexpected G.status %d\n", gp->status); 1548 runtime·throw("mark - bad status"); 1549 case Gdead: 1550 break; 1551 case Grunning: 1552 runtime·throw("mark - world not stopped"); 1553 case Grunnable: 1554 case Gsyscall: 1555 case Gwaiting: 1556 addstackroots(gp); 1557 break; 1558 } 1559 } 1560 1561 runtime·walkfintab(addfinroots); 1562 1563 for(fb=allfin; fb; fb=fb->alllink) 1564 addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0}); 1565 } 1566 1567 static bool 1568 handlespecial(byte *p, uintptr size) 1569 { 1570 FuncVal *fn; 1571 uintptr nret; 1572 PtrType *ot; 1573 Type *fint; 1574 FinBlock *block; 1575 Finalizer *f; 1576 1577 if(!runtime·getfinalizer(p, true, &fn, &nret, &fint, &ot)) { 1578 runtime·setblockspecial(p, false); 1579 runtime·MProf_Free(p, size); 1580 return false; 1581 } 1582 1583 runtime·lock(&finlock); 1584 if(finq == nil || finq->cnt == finq->cap) { 1585 if(finc == nil) { 1586 finc = runtime·persistentalloc(PageSize, 0); 1587 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; 1588 finc->alllink = allfin; 1589 allfin = finc; 1590 } 1591 block = finc; 1592 finc = block->next; 1593 block->next = finq; 1594 finq = block; 1595 } 1596 f = &finq->fin[finq->cnt]; 1597 finq->cnt++; 1598 f->fn = fn; 1599 f->nret = nret; 1600 f->fint = fint; 1601 f->ot = ot; 1602 f->arg = p; 1603 runtime·unlock(&finlock); 1604 return true; 1605 } 1606 1607 // Sweep frees or collects finalizers for blocks not marked in the mark phase. 1608 // It clears the mark bits in preparation for the next GC round. 1609 static void 1610 sweepspan(ParFor *desc, uint32 idx) 1611 { 1612 int32 cl, n, npages; 1613 uintptr size; 1614 byte *p; 1615 MCache *c; 1616 byte *arena_start; 1617 MLink head, *end; 1618 int32 nfree; 1619 byte *type_data; 1620 byte compression; 1621 uintptr type_data_inc; 1622 MSpan *s; 1623 1624 USED(&desc); 1625 s = runtime·mheap.allspans[idx]; 1626 if(s->state != MSpanInUse) 1627 return; 1628 arena_start = runtime·mheap.arena_start; 1629 p = (byte*)(s->start << PageShift); 1630 cl = s->sizeclass; 1631 size = s->elemsize; 1632 if(cl == 0) { 1633 n = 1; 1634 } else { 1635 // Chunk full of small blocks. 1636 npages = runtime·class_to_allocnpages[cl]; 1637 n = (npages << PageShift) / size; 1638 } 1639 nfree = 0; 1640 end = &head; 1641 c = m->mcache; 1642 1643 type_data = (byte*)s->types.data; 1644 type_data_inc = sizeof(uintptr); 1645 compression = s->types.compression; 1646 switch(compression) { 1647 case MTypes_Bytes: 1648 type_data += 8*sizeof(uintptr); 1649 type_data_inc = 1; 1650 break; 1651 } 1652 1653 // Sweep through n objects of given size starting at p. 1654 // This thread owns the span now, so it can manipulate 1655 // the block bitmap without atomic operations. 1656 for(; n > 0; n--, p += size, type_data+=type_data_inc) { 1657 uintptr off, *bitp, shift, bits; 1658 1659 off = (uintptr*)p - (uintptr*)arena_start; 1660 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1661 shift = off % wordsPerBitmapWord; 1662 bits = *bitp>>shift; 1663 1664 if((bits & bitAllocated) == 0) 1665 continue; 1666 1667 if((bits & bitMarked) != 0) { 1668 if(DebugMark) { 1669 if(!(bits & bitSpecial)) 1670 runtime·printf("found spurious mark on %p\n", p); 1671 *bitp &= ~(bitSpecial<<shift); 1672 } 1673 *bitp &= ~(bitMarked<<shift); 1674 continue; 1675 } 1676 1677 // Special means it has a finalizer or is being profiled. 1678 // In DebugMark mode, the bit has been coopted so 1679 // we have to assume all blocks are special. 1680 if(DebugMark || (bits & bitSpecial) != 0) { 1681 if(handlespecial(p, size)) 1682 continue; 1683 } 1684 1685 // Mark freed; restore block boundary bit. 1686 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 1687 1688 if(cl == 0) { 1689 // Free large span. 1690 runtime·unmarkspan(p, 1<<PageShift); 1691 *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing 1692 runtime·MHeap_Free(&runtime·mheap, s, 1); 1693 c->local_nlargefree++; 1694 c->local_largefree += size; 1695 } else { 1696 // Free small object. 1697 switch(compression) { 1698 case MTypes_Words: 1699 *(uintptr*)type_data = 0; 1700 break; 1701 case MTypes_Bytes: 1702 *(byte*)type_data = 0; 1703 break; 1704 } 1705 if(size > sizeof(uintptr)) 1706 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed" 1707 1708 end->next = (MLink*)p; 1709 end = (MLink*)p; 1710 nfree++; 1711 } 1712 } 1713 1714 if(nfree) { 1715 c->local_nsmallfree[cl] += nfree; 1716 c->local_cachealloc -= nfree * size; 1717 runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end); 1718 } 1719 } 1720 1721 static void 1722 dumpspan(uint32 idx) 1723 { 1724 int32 sizeclass, n, npages, i, column; 1725 uintptr size; 1726 byte *p; 1727 byte *arena_start; 1728 MSpan *s; 1729 bool allocated, special; 1730 1731 s = runtime·mheap.allspans[idx]; 1732 if(s->state != MSpanInUse) 1733 return; 1734 arena_start = runtime·mheap.arena_start; 1735 p = (byte*)(s->start << PageShift); 1736 sizeclass = s->sizeclass; 1737 size = s->elemsize; 1738 if(sizeclass == 0) { 1739 n = 1; 1740 } else { 1741 npages = runtime·class_to_allocnpages[sizeclass]; 1742 n = (npages << PageShift) / size; 1743 } 1744 1745 runtime·printf("%p .. %p:\n", p, p+n*size); 1746 column = 0; 1747 for(; n>0; n--, p+=size) { 1748 uintptr off, *bitp, shift, bits; 1749 1750 off = (uintptr*)p - (uintptr*)arena_start; 1751 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1752 shift = off % wordsPerBitmapWord; 1753 bits = *bitp>>shift; 1754 1755 allocated = ((bits & bitAllocated) != 0); 1756 special = ((bits & bitSpecial) != 0); 1757 1758 for(i=0; i<size; i+=sizeof(void*)) { 1759 if(column == 0) { 1760 runtime·printf("\t"); 1761 } 1762 if(i == 0) { 1763 runtime·printf(allocated ? "(" : "["); 1764 runtime·printf(special ? "@" : ""); 1765 runtime·printf("%p: ", p+i); 1766 } else { 1767 runtime·printf(" "); 1768 } 1769 1770 runtime·printf("%p", *(void**)(p+i)); 1771 1772 if(i+sizeof(void*) >= size) { 1773 runtime·printf(allocated ? ") " : "] "); 1774 } 1775 1776 column++; 1777 if(column == 8) { 1778 runtime·printf("\n"); 1779 column = 0; 1780 } 1781 } 1782 } 1783 runtime·printf("\n"); 1784 } 1785 1786 // A debugging function to dump the contents of memory 1787 void 1788 runtime·memorydump(void) 1789 { 1790 uint32 spanidx; 1791 1792 for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) { 1793 dumpspan(spanidx); 1794 } 1795 } 1796 1797 void 1798 runtime·gchelper(void) 1799 { 1800 gchelperstart(); 1801 1802 // parallel mark for over gc roots 1803 runtime·parfordo(work.markfor); 1804 1805 // help other threads scan secondary blocks 1806 scanblock(nil, nil, 0, true); 1807 1808 if(DebugMark) { 1809 // wait while the main thread executes mark(debug_scanblock) 1810 while(runtime·atomicload(&work.debugmarkdone) == 0) 1811 runtime·usleep(10); 1812 } 1813 1814 runtime·parfordo(work.sweepfor); 1815 bufferList[m->helpgc].busy = 0; 1816 if(runtime·xadd(&work.ndone, +1) == work.nproc-1) 1817 runtime·notewakeup(&work.alldone); 1818 } 1819 1820 #define GcpercentUnknown (-2) 1821 1822 // Initialized from $GOGC. GOGC=off means no gc. 1823 // 1824 // Next gc is after we've allocated an extra amount of 1825 // memory proportional to the amount already in use. 1826 // If gcpercent=100 and we're using 4M, we'll gc again 1827 // when we get to 8M. This keeps the gc cost in linear 1828 // proportion to the allocation cost. Adjusting gcpercent 1829 // just changes the linear constant (and also the amount of 1830 // extra memory used). 1831 static int32 gcpercent = GcpercentUnknown; 1832 1833 static void 1834 cachestats(void) 1835 { 1836 MCache *c; 1837 P *p, **pp; 1838 1839 for(pp=runtime·allp; p=*pp; pp++) { 1840 c = p->mcache; 1841 if(c==nil) 1842 continue; 1843 runtime·purgecachedstats(c); 1844 } 1845 } 1846 1847 static void 1848 updatememstats(GCStats *stats) 1849 { 1850 M *mp; 1851 MSpan *s; 1852 MCache *c; 1853 P *p, **pp; 1854 int32 i; 1855 uint64 stacks_inuse, smallfree; 1856 uint64 *src, *dst; 1857 1858 if(stats) 1859 runtime·memclr((byte*)stats, sizeof(*stats)); 1860 stacks_inuse = 0; 1861 for(mp=runtime·allm; mp; mp=mp->alllink) { 1862 stacks_inuse += mp->stackinuse*FixedStack; 1863 if(stats) { 1864 src = (uint64*)&mp->gcstats; 1865 dst = (uint64*)stats; 1866 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++) 1867 dst[i] += src[i]; 1868 runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); 1869 } 1870 } 1871 mstats.stacks_inuse = stacks_inuse; 1872 1873 // Calculate memory allocator stats. 1874 // During program execution we only count number of frees and amount of freed memory. 1875 // Current number of alive object in the heap and amount of alive heap memory 1876 // are calculated by scanning all spans. 1877 // Total number of mallocs is calculated as number of frees plus number of alive objects. 1878 // Similarly, total amount of allocated memory is calculated as amount of freed memory 1879 // plus amount of alive heap memory. 1880 mstats.alloc = 0; 1881 mstats.total_alloc = 0; 1882 mstats.nmalloc = 0; 1883 mstats.nfree = 0; 1884 for(i = 0; i < nelem(mstats.by_size); i++) { 1885 mstats.by_size[i].nmalloc = 0; 1886 mstats.by_size[i].nfree = 0; 1887 } 1888 1889 // Flush MCache's to MCentral. 1890 for(pp=runtime·allp; p=*pp; pp++) { 1891 c = p->mcache; 1892 if(c==nil) 1893 continue; 1894 runtime·MCache_ReleaseAll(c); 1895 } 1896 1897 // Aggregate local stats. 1898 cachestats(); 1899 1900 // Scan all spans and count number of alive objects. 1901 for(i = 0; i < runtime·mheap.nspan; i++) { 1902 s = runtime·mheap.allspans[i]; 1903 if(s->state != MSpanInUse) 1904 continue; 1905 if(s->sizeclass == 0) { 1906 mstats.nmalloc++; 1907 mstats.alloc += s->elemsize; 1908 } else { 1909 mstats.nmalloc += s->ref; 1910 mstats.by_size[s->sizeclass].nmalloc += s->ref; 1911 mstats.alloc += s->ref*s->elemsize; 1912 } 1913 } 1914 1915 // Aggregate by size class. 1916 smallfree = 0; 1917 mstats.nfree = runtime·mheap.nlargefree; 1918 for(i = 0; i < nelem(mstats.by_size); i++) { 1919 mstats.nfree += runtime·mheap.nsmallfree[i]; 1920 mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i]; 1921 mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i]; 1922 smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i]; 1923 } 1924 mstats.nmalloc += mstats.nfree; 1925 1926 // Calculate derived stats. 1927 mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree; 1928 mstats.heap_alloc = mstats.alloc; 1929 mstats.heap_objects = mstats.nmalloc - mstats.nfree; 1930 } 1931 1932 // Structure of arguments passed to function gc(). 1933 // This allows the arguments to be passed via runtime·mcall. 1934 struct gc_args 1935 { 1936 int64 start_time; // start time of GC in ns (just before stoptheworld) 1937 }; 1938 1939 static void gc(struct gc_args *args); 1940 static void mgc(G *gp); 1941 1942 static int32 1943 readgogc(void) 1944 { 1945 byte *p; 1946 1947 p = runtime·getenv("GOGC"); 1948 if(p == nil || p[0] == '\0') 1949 return 100; 1950 if(runtime·strcmp(p, (byte*)"off") == 0) 1951 return -1; 1952 return runtime·atoi(p); 1953 } 1954 1955 static FuncVal runfinqv = {runfinq}; 1956 1957 void 1958 runtime·gc(int32 force) 1959 { 1960 struct gc_args a; 1961 int32 i; 1962 1963 // The atomic operations are not atomic if the uint64s 1964 // are not aligned on uint64 boundaries. This has been 1965 // a problem in the past. 1966 if((((uintptr)&work.empty) & 7) != 0) 1967 runtime·throw("runtime: gc work buffer is misaligned"); 1968 if((((uintptr)&work.full) & 7) != 0) 1969 runtime·throw("runtime: gc work buffer is misaligned"); 1970 1971 // The gc is turned off (via enablegc) until 1972 // the bootstrap has completed. 1973 // Also, malloc gets called in the guts 1974 // of a number of libraries that might be 1975 // holding locks. To avoid priority inversion 1976 // problems, don't bother trying to run gc 1977 // while holding a lock. The next mallocgc 1978 // without a lock will do the gc instead. 1979 if(!mstats.enablegc || g == m->g0 || m->locks > 0 || runtime·panicking) 1980 return; 1981 1982 if(gcpercent == GcpercentUnknown) { // first time through 1983 runtime·lock(&runtime·mheap); 1984 if(gcpercent == GcpercentUnknown) 1985 gcpercent = readgogc(); 1986 runtime·unlock(&runtime·mheap); 1987 } 1988 if(gcpercent < 0) 1989 return; 1990 1991 runtime·semacquire(&runtime·worldsema, false); 1992 if(!force && mstats.heap_alloc < mstats.next_gc) { 1993 // typically threads which lost the race to grab 1994 // worldsema exit here when gc is done. 1995 runtime·semrelease(&runtime·worldsema); 1996 return; 1997 } 1998 1999 // Ok, we're doing it! Stop everybody else 2000 a.start_time = runtime·nanotime(); 2001 m->gcing = 1; 2002 runtime·stoptheworld(); 2003 2004 // Run gc on the g0 stack. We do this so that the g stack 2005 // we're currently running on will no longer change. Cuts 2006 // the root set down a bit (g0 stacks are not scanned, and 2007 // we don't need to scan gc's internal state). Also an 2008 // enabler for copyable stacks. 2009 for(i = 0; i < (runtime·debug.gctrace > 1 ? 2 : 1); i++) { 2010 // switch to g0, call gc(&a), then switch back 2011 g->param = &a; 2012 g->status = Gwaiting; 2013 g->waitreason = "garbage collection"; 2014 runtime·mcall(mgc); 2015 // record a new start time in case we're going around again 2016 a.start_time = runtime·nanotime(); 2017 } 2018 2019 // all done 2020 m->gcing = 0; 2021 m->locks++; 2022 runtime·semrelease(&runtime·worldsema); 2023 runtime·starttheworld(); 2024 m->locks--; 2025 2026 // now that gc is done, kick off finalizer thread if needed 2027 if(finq != nil) { 2028 runtime·lock(&finlock); 2029 // kick off or wake up goroutine to run queued finalizers 2030 if(fing == nil) 2031 fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc); 2032 else if(fingwait) { 2033 fingwait = 0; 2034 runtime·ready(fing); 2035 } 2036 runtime·unlock(&finlock); 2037 } 2038 // give the queued finalizers, if any, a chance to run 2039 runtime·gosched(); 2040 } 2041 2042 static void 2043 mgc(G *gp) 2044 { 2045 gc(gp->param); 2046 gp->param = nil; 2047 gp->status = Grunning; 2048 runtime·gogo(&gp->sched); 2049 } 2050 2051 static void 2052 gc(struct gc_args *args) 2053 { 2054 int64 t0, t1, t2, t3, t4; 2055 uint64 heap0, heap1, obj0, obj1, ninstr; 2056 GCStats stats; 2057 M *mp; 2058 uint32 i; 2059 Eface eface; 2060 2061 t0 = args->start_time; 2062 2063 if(CollectStats) 2064 runtime·memclr((byte*)&gcstats, sizeof(gcstats)); 2065 2066 for(mp=runtime·allm; mp; mp=mp->alllink) 2067 runtime·settype_flush(mp); 2068 2069 heap0 = 0; 2070 obj0 = 0; 2071 if(runtime·debug.gctrace) { 2072 updatememstats(nil); 2073 heap0 = mstats.heap_alloc; 2074 obj0 = mstats.nmalloc - mstats.nfree; 2075 } 2076 2077 m->locks++; // disable gc during mallocs in parforalloc 2078 if(work.markfor == nil) 2079 work.markfor = runtime·parforalloc(MaxGcproc); 2080 if(work.sweepfor == nil) 2081 work.sweepfor = runtime·parforalloc(MaxGcproc); 2082 m->locks--; 2083 2084 if(itabtype == nil) { 2085 // get C pointer to the Go type "itab" 2086 runtime·gc_itab_ptr(&eface); 2087 itabtype = ((PtrType*)eface.type)->elem; 2088 } 2089 2090 work.nwait = 0; 2091 work.ndone = 0; 2092 work.debugmarkdone = 0; 2093 work.nproc = runtime·gcprocs(); 2094 addroots(); 2095 runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot); 2096 runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan); 2097 if(work.nproc > 1) { 2098 runtime·noteclear(&work.alldone); 2099 runtime·helpgc(work.nproc); 2100 } 2101 2102 t1 = runtime·nanotime(); 2103 2104 gchelperstart(); 2105 runtime·parfordo(work.markfor); 2106 scanblock(nil, nil, 0, true); 2107 2108 if(DebugMark) { 2109 for(i=0; i<work.nroot; i++) 2110 debug_scanblock(work.roots[i].p, work.roots[i].n); 2111 runtime·atomicstore(&work.debugmarkdone, 1); 2112 } 2113 t2 = runtime·nanotime(); 2114 2115 runtime·parfordo(work.sweepfor); 2116 bufferList[m->helpgc].busy = 0; 2117 t3 = runtime·nanotime(); 2118 2119 if(work.nproc > 1) 2120 runtime·notesleep(&work.alldone); 2121 2122 cachestats(); 2123 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; 2124 2125 t4 = runtime·nanotime(); 2126 mstats.last_gc = t4; 2127 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0; 2128 mstats.pause_total_ns += t4 - t0; 2129 mstats.numgc++; 2130 if(mstats.debuggc) 2131 runtime·printf("pause %D\n", t4-t0); 2132 2133 if(runtime·debug.gctrace) { 2134 updatememstats(&stats); 2135 heap1 = mstats.heap_alloc; 2136 obj1 = mstats.nmalloc - mstats.nfree; 2137 2138 stats.nprocyield += work.sweepfor->nprocyield; 2139 stats.nosyield += work.sweepfor->nosyield; 2140 stats.nsleep += work.sweepfor->nsleep; 2141 2142 runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," 2143 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", 2144 mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000, 2145 heap0>>20, heap1>>20, obj0, obj1, 2146 mstats.nmalloc, mstats.nfree, 2147 stats.nhandoff, stats.nhandoffcnt, 2148 work.sweepfor->nsteal, work.sweepfor->nstealcnt, 2149 stats.nprocyield, stats.nosyield, stats.nsleep); 2150 if(CollectStats) { 2151 runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n", 2152 gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup); 2153 if(gcstats.ptr.cnt != 0) 2154 runtime·printf("avg ptrbufsize: %D (%D/%D)\n", 2155 gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt); 2156 if(gcstats.obj.cnt != 0) 2157 runtime·printf("avg nobj: %D (%D/%D)\n", 2158 gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt); 2159 runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes); 2160 2161 runtime·printf("instruction counts:\n"); 2162 ninstr = 0; 2163 for(i=0; i<nelem(gcstats.instr); i++) { 2164 runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]); 2165 ninstr += gcstats.instr[i]; 2166 } 2167 runtime·printf("\ttotal:\t%D\n", ninstr); 2168 2169 runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); 2170 2171 runtime·printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan); 2172 runtime·printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan); 2173 } 2174 } 2175 2176 runtime·MProf_GC(); 2177 } 2178 2179 void 2180 runtime·ReadMemStats(MStats *stats) 2181 { 2182 // Have to acquire worldsema to stop the world, 2183 // because stoptheworld can only be used by 2184 // one goroutine at a time, and there might be 2185 // a pending garbage collection already calling it. 2186 runtime·semacquire(&runtime·worldsema, false); 2187 m->gcing = 1; 2188 runtime·stoptheworld(); 2189 updatememstats(nil); 2190 *stats = mstats; 2191 m->gcing = 0; 2192 m->locks++; 2193 runtime·semrelease(&runtime·worldsema); 2194 runtime·starttheworld(); 2195 m->locks--; 2196 } 2197 2198 void 2199 runtime∕debug·readGCStats(Slice *pauses) 2200 { 2201 uint64 *p; 2202 uint32 i, n; 2203 2204 // Calling code in runtime/debug should make the slice large enough. 2205 if(pauses->cap < nelem(mstats.pause_ns)+3) 2206 runtime·throw("runtime: short slice passed to readGCStats"); 2207 2208 // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. 2209 p = (uint64*)pauses->array; 2210 runtime·lock(&runtime·mheap); 2211 n = mstats.numgc; 2212 if(n > nelem(mstats.pause_ns)) 2213 n = nelem(mstats.pause_ns); 2214 2215 // The pause buffer is circular. The most recent pause is at 2216 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward 2217 // from there to go back farther in time. We deliver the times 2218 // most recent first (in p[0]). 2219 for(i=0; i<n; i++) 2220 p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)]; 2221 2222 p[n] = mstats.last_gc; 2223 p[n+1] = mstats.numgc; 2224 p[n+2] = mstats.pause_total_ns; 2225 runtime·unlock(&runtime·mheap); 2226 pauses->len = n+3; 2227 } 2228 2229 void 2230 runtime∕debug·setGCPercent(intgo in, intgo out) 2231 { 2232 runtime·lock(&runtime·mheap); 2233 if(gcpercent == GcpercentUnknown) 2234 gcpercent = readgogc(); 2235 out = gcpercent; 2236 if(in < 0) 2237 in = -1; 2238 gcpercent = in; 2239 runtime·unlock(&runtime·mheap); 2240 FLUSH(&out); 2241 } 2242 2243 static void 2244 gchelperstart(void) 2245 { 2246 if(m->helpgc < 0 || m->helpgc >= MaxGcproc) 2247 runtime·throw("gchelperstart: bad m->helpgc"); 2248 if(runtime·xchg(&bufferList[m->helpgc].busy, 1)) 2249 runtime·throw("gchelperstart: already busy"); 2250 if(g != m->g0) 2251 runtime·throw("gchelper not running on g0 stack"); 2252 } 2253 2254 static void 2255 runfinq(void) 2256 { 2257 Finalizer *f; 2258 FinBlock *fb, *next; 2259 byte *frame; 2260 uint32 framesz, framecap, i; 2261 Eface *ef, ef1; 2262 2263 frame = nil; 2264 framecap = 0; 2265 for(;;) { 2266 runtime·lock(&finlock); 2267 fb = finq; 2268 finq = nil; 2269 if(fb == nil) { 2270 fingwait = 1; 2271 runtime·park(runtime·unlock, &finlock, "finalizer wait"); 2272 continue; 2273 } 2274 runtime·unlock(&finlock); 2275 if(raceenabled) 2276 runtime·racefingo(); 2277 for(; fb; fb=next) { 2278 next = fb->next; 2279 for(i=0; i<fb->cnt; i++) { 2280 f = &fb->fin[i]; 2281 framesz = sizeof(Eface) + f->nret; 2282 if(framecap < framesz) { 2283 runtime·free(frame); 2284 // The frame does not contain pointers interesting for GC, 2285 // all not yet finalized objects are stored in finc. 2286 // If we do not mark it as FlagNoScan, 2287 // the last finalized object is not collected. 2288 frame = runtime·mallocgc(framesz, 0, FlagNoScan|FlagNoInvokeGC); 2289 framecap = framesz; 2290 } 2291 if(f->fint == nil) 2292 runtime·throw("missing type in runfinq"); 2293 if(f->fint->kind == KindPtr) { 2294 // direct use of pointer 2295 *(void**)frame = f->arg; 2296 } else if(((InterfaceType*)f->fint)->mhdr.len == 0) { 2297 // convert to empty interface 2298 ef = (Eface*)frame; 2299 ef->type = f->ot; 2300 ef->data = f->arg; 2301 } else { 2302 // convert to interface with methods, via empty interface. 2303 ef1.type = f->ot; 2304 ef1.data = f->arg; 2305 if(!runtime·ifaceE2I2((InterfaceType*)f->fint, ef1, (Iface*)frame)) 2306 runtime·throw("invalid type conversion in runfinq"); 2307 } 2308 reflect·call(f->fn, frame, framesz); 2309 f->fn = nil; 2310 f->arg = nil; 2311 f->ot = nil; 2312 } 2313 fb->cnt = 0; 2314 fb->next = finc; 2315 finc = fb; 2316 } 2317 runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible 2318 } 2319 } 2320 2321 // mark the block at v of size n as allocated. 2322 // If noscan is true, mark it as not needing scanning. 2323 void 2324 runtime·markallocated(void *v, uintptr n, bool noscan) 2325 { 2326 uintptr *b, obits, bits, off, shift; 2327 2328 if(0) 2329 runtime·printf("markallocated %p+%p\n", v, n); 2330 2331 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2332 runtime·throw("markallocated: bad pointer"); 2333 2334 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset 2335 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2336 shift = off % wordsPerBitmapWord; 2337 2338 for(;;) { 2339 obits = *b; 2340 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift); 2341 if(noscan) 2342 bits |= bitNoScan<<shift; 2343 if(runtime·gomaxprocs == 1) { 2344 *b = bits; 2345 break; 2346 } else { 2347 // more than one goroutine is potentially running: use atomic op 2348 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2349 break; 2350 } 2351 } 2352 } 2353 2354 // mark the block at v of size n as freed. 2355 void 2356 runtime·markfreed(void *v, uintptr n) 2357 { 2358 uintptr *b, obits, bits, off, shift; 2359 2360 if(0) 2361 runtime·printf("markallocated %p+%p\n", v, n); 2362 2363 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2364 runtime·throw("markallocated: bad pointer"); 2365 2366 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset 2367 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2368 shift = off % wordsPerBitmapWord; 2369 2370 for(;;) { 2371 obits = *b; 2372 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 2373 if(runtime·gomaxprocs == 1) { 2374 *b = bits; 2375 break; 2376 } else { 2377 // more than one goroutine is potentially running: use atomic op 2378 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2379 break; 2380 } 2381 } 2382 } 2383 2384 // check that the block at v of size n is marked freed. 2385 void 2386 runtime·checkfreed(void *v, uintptr n) 2387 { 2388 uintptr *b, bits, off, shift; 2389 2390 if(!runtime·checking) 2391 return; 2392 2393 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2394 return; // not allocated, so okay 2395 2396 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset 2397 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2398 shift = off % wordsPerBitmapWord; 2399 2400 bits = *b>>shift; 2401 if((bits & bitAllocated) != 0) { 2402 runtime·printf("checkfreed %p+%p: off=%p have=%p\n", 2403 v, n, off, bits & bitMask); 2404 runtime·throw("checkfreed: not freed"); 2405 } 2406 } 2407 2408 // mark the span of memory at v as having n blocks of the given size. 2409 // if leftover is true, there is left over space at the end of the span. 2410 void 2411 runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) 2412 { 2413 uintptr *b, off, shift; 2414 byte *p; 2415 2416 if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2417 runtime·throw("markspan: bad pointer"); 2418 2419 p = v; 2420 if(leftover) // mark a boundary just past end of last block too 2421 n++; 2422 for(; n-- > 0; p += size) { 2423 // Okay to use non-atomic ops here, because we control 2424 // the entire span, and each bitmap word has bits for only 2425 // one span, so no other goroutines are changing these 2426 // bitmap words. 2427 off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset 2428 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2429 shift = off % wordsPerBitmapWord; 2430 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 2431 } 2432 } 2433 2434 // unmark the span of memory at v of length n bytes. 2435 void 2436 runtime·unmarkspan(void *v, uintptr n) 2437 { 2438 uintptr *p, *b, off; 2439 2440 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2441 runtime·throw("markspan: bad pointer"); 2442 2443 p = v; 2444 off = p - (uintptr*)runtime·mheap.arena_start; // word offset 2445 if(off % wordsPerBitmapWord != 0) 2446 runtime·throw("markspan: unaligned pointer"); 2447 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2448 n /= PtrSize; 2449 if(n%wordsPerBitmapWord != 0) 2450 runtime·throw("unmarkspan: unaligned length"); 2451 // Okay to use non-atomic ops here, because we control 2452 // the entire span, and each bitmap word has bits for only 2453 // one span, so no other goroutines are changing these 2454 // bitmap words. 2455 n /= wordsPerBitmapWord; 2456 while(n-- > 0) 2457 *b-- = 0; 2458 } 2459 2460 bool 2461 runtime·blockspecial(void *v) 2462 { 2463 uintptr *b, off, shift; 2464 2465 if(DebugMark) 2466 return true; 2467 2468 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; 2469 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2470 shift = off % wordsPerBitmapWord; 2471 2472 return (*b & (bitSpecial<<shift)) != 0; 2473 } 2474 2475 void 2476 runtime·setblockspecial(void *v, bool s) 2477 { 2478 uintptr *b, off, shift, bits, obits; 2479 2480 if(DebugMark) 2481 return; 2482 2483 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; 2484 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2485 shift = off % wordsPerBitmapWord; 2486 2487 for(;;) { 2488 obits = *b; 2489 if(s) 2490 bits = obits | (bitSpecial<<shift); 2491 else 2492 bits = obits & ~(bitSpecial<<shift); 2493 if(runtime·gomaxprocs == 1) { 2494 *b = bits; 2495 break; 2496 } else { 2497 // more than one goroutine is potentially running: use atomic op 2498 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2499 break; 2500 } 2501 } 2502 } 2503 2504 void 2505 runtime·MHeap_MapBits(MHeap *h) 2506 { 2507 // Caller has added extra mappings to the arena. 2508 // Add extra mappings of bitmap words as needed. 2509 // We allocate extra bitmap pieces in chunks of bitmapChunk. 2510 enum { 2511 bitmapChunk = 8192 2512 }; 2513 uintptr n; 2514 2515 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; 2516 n = ROUND(n, bitmapChunk); 2517 if(h->bitmap_mapped >= n) 2518 return; 2519 2520 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); 2521 h->bitmap_mapped = n; 2522 }