github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/mgc0.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector. 6 7 #include "runtime.h" 8 #include "arch_GOARCH.h" 9 #include "malloc.h" 10 #include "stack.h" 11 #include "mgc0.h" 12 #include "race.h" 13 #include "type.h" 14 #include "typekind.h" 15 #include "funcdata.h" 16 #include "../../cmd/ld/textflag.h" 17 18 enum { 19 Debug = 0, 20 DebugMark = 0, // run second pass to check mark 21 CollectStats = 0, 22 ScanStackByFrames = 0, 23 IgnorePreciseGC = 0, 24 25 // Four bits per word (see #defines below). 26 wordsPerBitmapWord = sizeof(void*)*8/4, 27 bitShift = sizeof(void*)*8/4, 28 29 handoffThreshold = 4, 30 IntermediateBufferCapacity = 64, 31 32 // Bits in type information 33 PRECISE = 1, 34 LOOP = 2, 35 PC_BITS = PRECISE | LOOP, 36 37 // Pointer map 38 BitsPerPointer = 2, 39 BitsNoPointer = 0, 40 BitsPointer = 1, 41 BitsIface = 2, 42 BitsEface = 3, 43 }; 44 45 // Bits in per-word bitmap. 46 // #defines because enum might not be able to hold the values. 47 // 48 // Each word in the bitmap describes wordsPerBitmapWord words 49 // of heap memory. There are 4 bitmap bits dedicated to each heap word, 50 // so on a 64-bit system there is one bitmap word per 16 heap words. 51 // The bits in the word are packed together by type first, then by 52 // heap location, so each 64-bit bitmap word consists of, from top to bottom, 53 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, 54 // then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits. 55 // This layout makes it easier to iterate over the bits of a given type. 56 // 57 // The bitmap starts at mheap.arena_start and extends *backward* from 58 // there. On a 64-bit system the off'th word in the arena is tracked by 59 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system, 60 // the only difference is that the divisor is 8.) 61 // 62 // To pull out the bits corresponding to a given pointer p, we use: 63 // 64 // off = p - (uintptr*)mheap.arena_start; // word offset 65 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1; 66 // shift = off % wordsPerBitmapWord 67 // bits = *b >> shift; 68 // /* then test bits & bitAllocated, bits & bitMarked, etc. */ 69 // 70 #define bitAllocated ((uintptr)1<<(bitShift*0)) 71 #define bitNoScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ 72 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ 73 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ 74 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ 75 76 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) 77 78 // Holding worldsema grants an M the right to try to stop the world. 79 // The procedure is: 80 // 81 // runtime·semacquire(&runtime·worldsema); 82 // m->gcing = 1; 83 // runtime·stoptheworld(); 84 // 85 // ... do stuff ... 86 // 87 // m->gcing = 0; 88 // runtime·semrelease(&runtime·worldsema); 89 // runtime·starttheworld(); 90 // 91 uint32 runtime·worldsema = 1; 92 93 typedef struct Obj Obj; 94 struct Obj 95 { 96 byte *p; // data pointer 97 uintptr n; // size of data in bytes 98 uintptr ti; // type info 99 }; 100 101 // The size of Workbuf is N*PageSize. 102 typedef struct Workbuf Workbuf; 103 struct Workbuf 104 { 105 #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr)) 106 LFNode node; // must be first 107 uintptr nobj; 108 Obj obj[SIZE/sizeof(Obj) - 1]; 109 uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)]; 110 #undef SIZE 111 }; 112 113 typedef struct Finalizer Finalizer; 114 struct Finalizer 115 { 116 FuncVal *fn; 117 void *arg; 118 uintptr nret; 119 Type *fint; 120 PtrType *ot; 121 }; 122 123 typedef struct FinBlock FinBlock; 124 struct FinBlock 125 { 126 FinBlock *alllink; 127 FinBlock *next; 128 int32 cnt; 129 int32 cap; 130 Finalizer fin[1]; 131 }; 132 133 extern byte data[]; 134 extern byte edata[]; 135 extern byte bss[]; 136 extern byte ebss[]; 137 138 extern byte gcdata[]; 139 extern byte gcbss[]; 140 141 static G *fing; 142 static FinBlock *finq; // list of finalizers that are to be executed 143 static FinBlock *finc; // cache of free blocks 144 static FinBlock *allfin; // list of all blocks 145 static Lock finlock; 146 static int32 fingwait; 147 148 static void runfinq(void); 149 static Workbuf* getempty(Workbuf*); 150 static Workbuf* getfull(Workbuf*); 151 static void putempty(Workbuf*); 152 static Workbuf* handoff(Workbuf*); 153 static void gchelperstart(void); 154 155 static struct { 156 uint64 full; // lock-free list of full blocks 157 uint64 empty; // lock-free list of empty blocks 158 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait 159 uint32 nproc; 160 volatile uint32 nwait; 161 volatile uint32 ndone; 162 volatile uint32 debugmarkdone; 163 Note alldone; 164 ParFor *markfor; 165 ParFor *sweepfor; 166 167 Lock; 168 byte *chunk; 169 uintptr nchunk; 170 171 Obj *roots; 172 uint32 nroot; 173 uint32 rootcap; 174 } work; 175 176 enum { 177 GC_DEFAULT_PTR = GC_NUM_INSTR, 178 GC_CHAN, 179 180 GC_NUM_INSTR2 181 }; 182 183 static struct { 184 struct { 185 uint64 sum; 186 uint64 cnt; 187 } ptr; 188 uint64 nbytes; 189 struct { 190 uint64 sum; 191 uint64 cnt; 192 uint64 notype; 193 uint64 typelookup; 194 } obj; 195 uint64 rescan; 196 uint64 rescanbytes; 197 uint64 instr[GC_NUM_INSTR2]; 198 uint64 putempty; 199 uint64 getfull; 200 struct { 201 uint64 foundbit; 202 uint64 foundword; 203 uint64 foundspan; 204 } flushptrbuf; 205 struct { 206 uint64 foundbit; 207 uint64 foundword; 208 uint64 foundspan; 209 } markonly; 210 } gcstats; 211 212 // markonly marks an object. It returns true if the object 213 // has been marked by this function, false otherwise. 214 // This function doesn't append the object to any buffer. 215 static bool 216 markonly(void *obj) 217 { 218 byte *p; 219 uintptr *bitp, bits, shift, x, xbits, off, j; 220 MSpan *s; 221 PageID k; 222 223 // Words outside the arena cannot be pointers. 224 if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used) 225 return false; 226 227 // obj may be a pointer to a live object. 228 // Try to find the beginning of the object. 229 230 // Round down to word boundary. 231 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 232 233 // Find bits for this word. 234 off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; 235 bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 236 shift = off % wordsPerBitmapWord; 237 xbits = *bitp; 238 bits = xbits >> shift; 239 240 // Pointing at the beginning of a block? 241 if((bits & (bitAllocated|bitBlockBoundary)) != 0) { 242 if(CollectStats) 243 runtime·xadd64(&gcstats.markonly.foundbit, 1); 244 goto found; 245 } 246 247 // Pointing just past the beginning? 248 // Scan backward a little to find a block boundary. 249 for(j=shift; j-->0; ) { 250 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 251 shift = j; 252 bits = xbits>>shift; 253 if(CollectStats) 254 runtime·xadd64(&gcstats.markonly.foundword, 1); 255 goto found; 256 } 257 } 258 259 // Otherwise consult span table to find beginning. 260 // (Manually inlined copy of MHeap_LookupMaybe.) 261 k = (uintptr)obj>>PageShift; 262 x = k; 263 if(sizeof(void*) == 8) 264 x -= (uintptr)runtime·mheap.arena_start>>PageShift; 265 s = runtime·mheap.spans[x]; 266 if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) 267 return false; 268 p = (byte*)((uintptr)s->start<<PageShift); 269 if(s->sizeclass == 0) { 270 obj = p; 271 } else { 272 uintptr size = s->elemsize; 273 int32 i = ((byte*)obj - p)/size; 274 obj = p+i*size; 275 } 276 277 // Now that we know the object header, reload bits. 278 off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; 279 bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 280 shift = off % wordsPerBitmapWord; 281 xbits = *bitp; 282 bits = xbits >> shift; 283 if(CollectStats) 284 runtime·xadd64(&gcstats.markonly.foundspan, 1); 285 286 found: 287 // Now we have bits, bitp, and shift correct for 288 // obj pointing at the base of the object. 289 // Only care about allocated and not marked. 290 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 291 return false; 292 if(work.nproc == 1) 293 *bitp |= bitMarked<<shift; 294 else { 295 for(;;) { 296 x = *bitp; 297 if(x & (bitMarked<<shift)) 298 return false; 299 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 300 break; 301 } 302 } 303 304 // The object is now marked 305 return true; 306 } 307 308 // PtrTarget is a structure used by intermediate buffers. 309 // The intermediate buffers hold GC data before it 310 // is moved/flushed to the work buffer (Workbuf). 311 // The size of an intermediate buffer is very small, 312 // such as 32 or 64 elements. 313 typedef struct PtrTarget PtrTarget; 314 struct PtrTarget 315 { 316 void *p; 317 uintptr ti; 318 }; 319 320 typedef struct BufferList BufferList; 321 struct BufferList 322 { 323 PtrTarget ptrtarget[IntermediateBufferCapacity]; 324 Obj obj[IntermediateBufferCapacity]; 325 uint32 busy; 326 byte pad[CacheLineSize]; 327 }; 328 #pragma dataflag NOPTR 329 static BufferList bufferList[MaxGcproc]; 330 331 static Type *itabtype; 332 333 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); 334 335 // flushptrbuf moves data from the PtrTarget buffer to the work buffer. 336 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned, 337 // while the work buffer contains blocks which have been marked 338 // and are prepared to be scanned by the garbage collector. 339 // 340 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer. 341 // 342 // A simplified drawing explaining how the todo-list moves from a structure to another: 343 // 344 // scanblock 345 // (find pointers) 346 // Obj ------> PtrTarget (pointer targets) 347 // ↑ | 348 // | | 349 // `----------' 350 // flushptrbuf 351 // (find block start, mark and enqueue) 352 static void 353 flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) 354 { 355 byte *p, *arena_start, *obj; 356 uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n; 357 MSpan *s; 358 PageID k; 359 Obj *wp; 360 Workbuf *wbuf; 361 PtrTarget *ptrbuf_end; 362 363 arena_start = runtime·mheap.arena_start; 364 365 wp = *_wp; 366 wbuf = *_wbuf; 367 nobj = *_nobj; 368 369 ptrbuf_end = *ptrbufpos; 370 n = ptrbuf_end - ptrbuf; 371 *ptrbufpos = ptrbuf; 372 373 if(CollectStats) { 374 runtime·xadd64(&gcstats.ptr.sum, n); 375 runtime·xadd64(&gcstats.ptr.cnt, 1); 376 } 377 378 // If buffer is nearly full, get a new one. 379 if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) { 380 if(wbuf != nil) 381 wbuf->nobj = nobj; 382 wbuf = getempty(wbuf); 383 wp = wbuf->obj; 384 nobj = 0; 385 386 if(n >= nelem(wbuf->obj)) 387 runtime·throw("ptrbuf has to be smaller than WorkBuf"); 388 } 389 390 // TODO(atom): This block is a branch of an if-then-else statement. 391 // The single-threaded branch may be added in a next CL. 392 { 393 // Multi-threaded version. 394 395 while(ptrbuf < ptrbuf_end) { 396 obj = ptrbuf->p; 397 ti = ptrbuf->ti; 398 ptrbuf++; 399 400 // obj belongs to interval [mheap.arena_start, mheap.arena_used). 401 if(Debug > 1) { 402 if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used) 403 runtime·throw("object is outside of mheap"); 404 } 405 406 // obj may be a pointer to a live object. 407 // Try to find the beginning of the object. 408 409 // Round down to word boundary. 410 if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) { 411 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 412 ti = 0; 413 } 414 415 // Find bits for this word. 416 off = (uintptr*)obj - (uintptr*)arena_start; 417 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 418 shift = off % wordsPerBitmapWord; 419 xbits = *bitp; 420 bits = xbits >> shift; 421 422 // Pointing at the beginning of a block? 423 if((bits & (bitAllocated|bitBlockBoundary)) != 0) { 424 if(CollectStats) 425 runtime·xadd64(&gcstats.flushptrbuf.foundbit, 1); 426 goto found; 427 } 428 429 ti = 0; 430 431 // Pointing just past the beginning? 432 // Scan backward a little to find a block boundary. 433 for(j=shift; j-->0; ) { 434 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 435 obj = (byte*)obj - (shift-j)*PtrSize; 436 shift = j; 437 bits = xbits>>shift; 438 if(CollectStats) 439 runtime·xadd64(&gcstats.flushptrbuf.foundword, 1); 440 goto found; 441 } 442 } 443 444 // Otherwise consult span table to find beginning. 445 // (Manually inlined copy of MHeap_LookupMaybe.) 446 k = (uintptr)obj>>PageShift; 447 x = k; 448 if(sizeof(void*) == 8) 449 x -= (uintptr)arena_start>>PageShift; 450 s = runtime·mheap.spans[x]; 451 if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) 452 continue; 453 p = (byte*)((uintptr)s->start<<PageShift); 454 if(s->sizeclass == 0) { 455 obj = p; 456 } else { 457 size = s->elemsize; 458 int32 i = ((byte*)obj - p)/size; 459 obj = p+i*size; 460 } 461 462 // Now that we know the object header, reload bits. 463 off = (uintptr*)obj - (uintptr*)arena_start; 464 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 465 shift = off % wordsPerBitmapWord; 466 xbits = *bitp; 467 bits = xbits >> shift; 468 if(CollectStats) 469 runtime·xadd64(&gcstats.flushptrbuf.foundspan, 1); 470 471 found: 472 // Now we have bits, bitp, and shift correct for 473 // obj pointing at the base of the object. 474 // Only care about allocated and not marked. 475 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 476 continue; 477 if(work.nproc == 1) 478 *bitp |= bitMarked<<shift; 479 else { 480 for(;;) { 481 x = *bitp; 482 if(x & (bitMarked<<shift)) 483 goto continue_obj; 484 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 485 break; 486 } 487 } 488 489 // If object has no pointers, don't need to scan further. 490 if((bits & bitNoScan) != 0) 491 continue; 492 493 // Ask span about size class. 494 // (Manually inlined copy of MHeap_Lookup.) 495 x = (uintptr)obj >> PageShift; 496 if(sizeof(void*) == 8) 497 x -= (uintptr)arena_start>>PageShift; 498 s = runtime·mheap.spans[x]; 499 500 PREFETCH(obj); 501 502 *wp = (Obj){obj, s->elemsize, ti}; 503 wp++; 504 nobj++; 505 continue_obj:; 506 } 507 508 // If another proc wants a pointer, give it some. 509 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 510 wbuf->nobj = nobj; 511 wbuf = handoff(wbuf); 512 nobj = wbuf->nobj; 513 wp = wbuf->obj + nobj; 514 } 515 } 516 517 *_wp = wp; 518 *_wbuf = wbuf; 519 *_nobj = nobj; 520 } 521 522 static void 523 flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) 524 { 525 uintptr nobj, off; 526 Obj *wp, obj; 527 Workbuf *wbuf; 528 Obj *objbuf_end; 529 530 wp = *_wp; 531 wbuf = *_wbuf; 532 nobj = *_nobj; 533 534 objbuf_end = *objbufpos; 535 *objbufpos = objbuf; 536 537 while(objbuf < objbuf_end) { 538 obj = *objbuf++; 539 540 // Align obj.b to a word boundary. 541 off = (uintptr)obj.p & (PtrSize-1); 542 if(off != 0) { 543 obj.p += PtrSize - off; 544 obj.n -= PtrSize - off; 545 obj.ti = 0; 546 } 547 548 if(obj.p == nil || obj.n == 0) 549 continue; 550 551 // If buffer is full, get a new one. 552 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 553 if(wbuf != nil) 554 wbuf->nobj = nobj; 555 wbuf = getempty(wbuf); 556 wp = wbuf->obj; 557 nobj = 0; 558 } 559 560 *wp = obj; 561 wp++; 562 nobj++; 563 } 564 565 // If another proc wants a pointer, give it some. 566 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 567 wbuf->nobj = nobj; 568 wbuf = handoff(wbuf); 569 nobj = wbuf->nobj; 570 wp = wbuf->obj + nobj; 571 } 572 573 *_wp = wp; 574 *_wbuf = wbuf; 575 *_nobj = nobj; 576 } 577 578 // Program that scans the whole block and treats every block element as a potential pointer 579 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; 580 581 // Hchan program 582 static uintptr chanProg[2] = {0, GC_CHAN}; 583 584 // Local variables of a program fragment or loop 585 typedef struct Frame Frame; 586 struct Frame { 587 uintptr count, elemsize, b; 588 uintptr *loop_or_ret; 589 }; 590 591 // Sanity check for the derived type info objti. 592 static void 593 checkptr(void *obj, uintptr objti) 594 { 595 uintptr *pc1, *pc2, type, tisize, i, j, x; 596 byte *objstart; 597 Type *t; 598 MSpan *s; 599 600 if(!Debug) 601 runtime·throw("checkptr is debug only"); 602 603 if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used) 604 return; 605 type = runtime·gettype(obj); 606 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 607 if(t == nil) 608 return; 609 x = (uintptr)obj >> PageShift; 610 if(sizeof(void*) == 8) 611 x -= (uintptr)(runtime·mheap.arena_start)>>PageShift; 612 s = runtime·mheap.spans[x]; 613 objstart = (byte*)((uintptr)s->start<<PageShift); 614 if(s->sizeclass != 0) { 615 i = ((byte*)obj - objstart)/s->elemsize; 616 objstart += i*s->elemsize; 617 } 618 tisize = *(uintptr*)objti; 619 // Sanity check for object size: it should fit into the memory block. 620 if((byte*)obj + tisize > objstart + s->elemsize) { 621 runtime·printf("object of type '%S' at %p/%p does not fit in block %p/%p\n", 622 *t->string, obj, tisize, objstart, s->elemsize); 623 runtime·throw("invalid gc type info"); 624 } 625 if(obj != objstart) 626 return; 627 // If obj points to the beginning of the memory block, 628 // check type info as well. 629 if(t->string == nil || 630 // Gob allocates unsafe pointers for indirection. 631 (runtime·strcmp(t->string->str, (byte*)"unsafe.Pointer") && 632 // Runtime and gc think differently about closures. 633 runtime·strstr(t->string->str, (byte*)"struct { F uintptr") != t->string->str)) { 634 pc1 = (uintptr*)objti; 635 pc2 = (uintptr*)t->gc; 636 // A simple best-effort check until first GC_END. 637 for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) { 638 if(pc1[j] != pc2[j]) { 639 runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n", 640 t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]); 641 runtime·throw("invalid gc type info"); 642 } 643 } 644 } 645 } 646 647 // scanblock scans a block of n bytes starting at pointer b for references 648 // to other objects, scanning any it finds recursively until there are no 649 // unscanned objects left. Instead of using an explicit recursion, it keeps 650 // a work list in the Workbuf* structures and loops in the main function 651 // body. Keeping an explicit work list is easier on the stack allocator and 652 // more efficient. 653 // 654 // wbuf: current work buffer 655 // wp: storage for next queued pointer (write pointer) 656 // nobj: number of queued objects 657 static void 658 scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) 659 { 660 byte *b, *arena_start, *arena_used; 661 uintptr n, i, end_b, elemsize, size, ti, objti, count, type; 662 uintptr *pc, precise_type, nominal_size; 663 uintptr *chan_ret, chancap; 664 void *obj; 665 Type *t; 666 Slice *sliceptr; 667 Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4]; 668 BufferList *scanbuffers; 669 PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos; 670 Obj *objbuf, *objbuf_end, *objbufpos; 671 Eface *eface; 672 Iface *iface; 673 Hchan *chan; 674 ChanType *chantype; 675 676 if(sizeof(Workbuf) % PageSize != 0) 677 runtime·throw("scanblock: size of Workbuf is suboptimal"); 678 679 // Memory arena parameters. 680 arena_start = runtime·mheap.arena_start; 681 arena_used = runtime·mheap.arena_used; 682 683 stack_ptr = stack+nelem(stack)-1; 684 685 precise_type = false; 686 nominal_size = 0; 687 688 // Allocate ptrbuf 689 { 690 scanbuffers = &bufferList[m->helpgc]; 691 ptrbuf = &scanbuffers->ptrtarget[0]; 692 ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget); 693 objbuf = &scanbuffers->obj[0]; 694 objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj); 695 } 696 697 ptrbufpos = ptrbuf; 698 objbufpos = objbuf; 699 700 // (Silence the compiler) 701 chan = nil; 702 chantype = nil; 703 chan_ret = nil; 704 705 goto next_block; 706 707 for(;;) { 708 // Each iteration scans the block b of length n, queueing pointers in 709 // the work buffer. 710 if(Debug > 1) { 711 runtime·printf("scanblock %p %D\n", b, (int64)n); 712 } 713 714 if(CollectStats) { 715 runtime·xadd64(&gcstats.nbytes, n); 716 runtime·xadd64(&gcstats.obj.sum, nobj); 717 runtime·xadd64(&gcstats.obj.cnt, 1); 718 } 719 720 if(ti != 0) { 721 pc = (uintptr*)(ti & ~(uintptr)PC_BITS); 722 precise_type = (ti & PRECISE); 723 stack_top.elemsize = pc[0]; 724 if(!precise_type) 725 nominal_size = pc[0]; 726 if(ti & LOOP) { 727 stack_top.count = 0; // 0 means an infinite number of iterations 728 stack_top.loop_or_ret = pc+1; 729 } else { 730 stack_top.count = 1; 731 } 732 if(Debug) { 733 // Simple sanity check for provided type info ti: 734 // The declared size of the object must be not larger than the actual size 735 // (it can be smaller due to inferior pointers). 736 // It's difficult to make a comprehensive check due to inferior pointers, 737 // reflection, gob, etc. 738 if(pc[0] > n) { 739 runtime·printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n); 740 runtime·throw("invalid gc type info"); 741 } 742 } 743 } else if(UseSpanType) { 744 if(CollectStats) 745 runtime·xadd64(&gcstats.obj.notype, 1); 746 747 type = runtime·gettype(b); 748 if(type != 0) { 749 if(CollectStats) 750 runtime·xadd64(&gcstats.obj.typelookup, 1); 751 752 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 753 switch(type & (PtrSize-1)) { 754 case TypeInfo_SingleObject: 755 pc = (uintptr*)t->gc; 756 precise_type = true; // type information about 'b' is precise 757 stack_top.count = 1; 758 stack_top.elemsize = pc[0]; 759 break; 760 case TypeInfo_Array: 761 pc = (uintptr*)t->gc; 762 if(pc[0] == 0) 763 goto next_block; 764 precise_type = true; // type information about 'b' is precise 765 stack_top.count = 0; // 0 means an infinite number of iterations 766 stack_top.elemsize = pc[0]; 767 stack_top.loop_or_ret = pc+1; 768 break; 769 case TypeInfo_Chan: 770 chan = (Hchan*)b; 771 chantype = (ChanType*)t; 772 chan_ret = nil; 773 pc = chanProg; 774 break; 775 default: 776 runtime·throw("scanblock: invalid type"); 777 return; 778 } 779 } else { 780 pc = defaultProg; 781 } 782 } else { 783 pc = defaultProg; 784 } 785 786 if(IgnorePreciseGC) 787 pc = defaultProg; 788 789 pc++; 790 stack_top.b = (uintptr)b; 791 792 end_b = (uintptr)b + n - PtrSize; 793 794 for(;;) { 795 if(CollectStats) 796 runtime·xadd64(&gcstats.instr[pc[0]], 1); 797 798 obj = nil; 799 objti = 0; 800 switch(pc[0]) { 801 case GC_PTR: 802 obj = *(void**)(stack_top.b + pc[1]); 803 objti = pc[2]; 804 pc += 3; 805 if(Debug) 806 checkptr(obj, objti); 807 break; 808 809 case GC_SLICE: 810 sliceptr = (Slice*)(stack_top.b + pc[1]); 811 if(sliceptr->cap != 0) { 812 obj = sliceptr->array; 813 // Can't use slice element type for scanning, 814 // because if it points to an array embedded 815 // in the beginning of a struct, 816 // we will scan the whole struct as the slice. 817 // So just obtain type info from heap. 818 } 819 pc += 3; 820 break; 821 822 case GC_APTR: 823 obj = *(void**)(stack_top.b + pc[1]); 824 pc += 2; 825 break; 826 827 case GC_STRING: 828 obj = *(void**)(stack_top.b + pc[1]); 829 markonly(obj); 830 pc += 2; 831 continue; 832 833 case GC_EFACE: 834 eface = (Eface*)(stack_top.b + pc[1]); 835 pc += 2; 836 if(eface->type == nil) 837 continue; 838 839 // eface->type 840 t = eface->type; 841 if((void*)t >= arena_start && (void*)t < arena_used) { 842 *ptrbufpos++ = (PtrTarget){t, 0}; 843 if(ptrbufpos == ptrbuf_end) 844 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 845 } 846 847 // eface->data 848 if(eface->data >= arena_start && eface->data < arena_used) { 849 if(t->size <= sizeof(void*)) { 850 if((t->kind & KindNoPointers)) 851 continue; 852 853 obj = eface->data; 854 if((t->kind & ~KindNoPointers) == KindPtr) 855 objti = (uintptr)((PtrType*)t)->elem->gc; 856 } else { 857 obj = eface->data; 858 objti = (uintptr)t->gc; 859 } 860 } 861 break; 862 863 case GC_IFACE: 864 iface = (Iface*)(stack_top.b + pc[1]); 865 pc += 2; 866 if(iface->tab == nil) 867 continue; 868 869 // iface->tab 870 if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) { 871 *ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc}; 872 if(ptrbufpos == ptrbuf_end) 873 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 874 } 875 876 // iface->data 877 if(iface->data >= arena_start && iface->data < arena_used) { 878 t = iface->tab->type; 879 if(t->size <= sizeof(void*)) { 880 if((t->kind & KindNoPointers)) 881 continue; 882 883 obj = iface->data; 884 if((t->kind & ~KindNoPointers) == KindPtr) 885 objti = (uintptr)((PtrType*)t)->elem->gc; 886 } else { 887 obj = iface->data; 888 objti = (uintptr)t->gc; 889 } 890 } 891 break; 892 893 case GC_DEFAULT_PTR: 894 while(stack_top.b <= end_b) { 895 obj = *(byte**)stack_top.b; 896 stack_top.b += PtrSize; 897 if(obj >= arena_start && obj < arena_used) { 898 *ptrbufpos++ = (PtrTarget){obj, 0}; 899 if(ptrbufpos == ptrbuf_end) 900 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 901 } 902 } 903 goto next_block; 904 905 case GC_END: 906 if(--stack_top.count != 0) { 907 // Next iteration of a loop if possible. 908 stack_top.b += stack_top.elemsize; 909 if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) { 910 pc = stack_top.loop_or_ret; 911 continue; 912 } 913 i = stack_top.b; 914 } else { 915 // Stack pop if possible. 916 if(stack_ptr+1 < stack+nelem(stack)) { 917 pc = stack_top.loop_or_ret; 918 stack_top = *(++stack_ptr); 919 continue; 920 } 921 i = (uintptr)b + nominal_size; 922 } 923 if(!precise_type) { 924 // Quickly scan [b+i,b+n) for possible pointers. 925 for(; i<=end_b; i+=PtrSize) { 926 if(*(byte**)i != nil) { 927 // Found a value that may be a pointer. 928 // Do a rescan of the entire block. 929 enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj); 930 if(CollectStats) { 931 runtime·xadd64(&gcstats.rescan, 1); 932 runtime·xadd64(&gcstats.rescanbytes, n); 933 } 934 break; 935 } 936 } 937 } 938 goto next_block; 939 940 case GC_ARRAY_START: 941 i = stack_top.b + pc[1]; 942 count = pc[2]; 943 elemsize = pc[3]; 944 pc += 4; 945 946 // Stack push. 947 *stack_ptr-- = stack_top; 948 stack_top = (Frame){count, elemsize, i, pc}; 949 continue; 950 951 case GC_ARRAY_NEXT: 952 if(--stack_top.count != 0) { 953 stack_top.b += stack_top.elemsize; 954 pc = stack_top.loop_or_ret; 955 } else { 956 // Stack pop. 957 stack_top = *(++stack_ptr); 958 pc += 1; 959 } 960 continue; 961 962 case GC_CALL: 963 // Stack push. 964 *stack_ptr-- = stack_top; 965 stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/}; 966 pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction 967 continue; 968 969 case GC_REGION: 970 obj = (void*)(stack_top.b + pc[1]); 971 size = pc[2]; 972 objti = pc[3]; 973 pc += 4; 974 975 *objbufpos++ = (Obj){obj, size, objti}; 976 if(objbufpos == objbuf_end) 977 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 978 continue; 979 980 case GC_CHAN_PTR: 981 chan = *(Hchan**)(stack_top.b + pc[1]); 982 if(chan == nil) { 983 pc += 3; 984 continue; 985 } 986 if(markonly(chan)) { 987 chantype = (ChanType*)pc[2]; 988 if(!(chantype->elem->kind & KindNoPointers)) { 989 // Start chanProg. 990 chan_ret = pc+3; 991 pc = chanProg+1; 992 continue; 993 } 994 } 995 pc += 3; 996 continue; 997 998 case GC_CHAN: 999 // There are no heap pointers in struct Hchan, 1000 // so we can ignore the leading sizeof(Hchan) bytes. 1001 if(!(chantype->elem->kind & KindNoPointers)) { 1002 // Channel's buffer follows Hchan immediately in memory. 1003 // Size of buffer (cap(c)) is second int in the chan struct. 1004 chancap = ((uintgo*)chan)[1]; 1005 if(chancap > 0) { 1006 // TODO(atom): split into two chunks so that only the 1007 // in-use part of the circular buffer is scanned. 1008 // (Channel routines zero the unused part, so the current 1009 // code does not lead to leaks, it's just a little inefficient.) 1010 *objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, chancap*chantype->elem->size, 1011 (uintptr)chantype->elem->gc | PRECISE | LOOP}; 1012 if(objbufpos == objbuf_end) 1013 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1014 } 1015 } 1016 if(chan_ret == nil) 1017 goto next_block; 1018 pc = chan_ret; 1019 continue; 1020 1021 default: 1022 runtime·throw("scanblock: invalid GC instruction"); 1023 return; 1024 } 1025 1026 if(obj >= arena_start && obj < arena_used) { 1027 *ptrbufpos++ = (PtrTarget){obj, objti}; 1028 if(ptrbufpos == ptrbuf_end) 1029 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 1030 } 1031 } 1032 1033 next_block: 1034 // Done scanning [b, b+n). Prepare for the next iteration of 1035 // the loop by setting b, n, ti to the parameters for the next block. 1036 1037 if(nobj == 0) { 1038 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 1039 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1040 1041 if(nobj == 0) { 1042 if(!keepworking) { 1043 if(wbuf) 1044 putempty(wbuf); 1045 goto endscan; 1046 } 1047 // Emptied our buffer: refill. 1048 wbuf = getfull(wbuf); 1049 if(wbuf == nil) 1050 goto endscan; 1051 nobj = wbuf->nobj; 1052 wp = wbuf->obj + wbuf->nobj; 1053 } 1054 } 1055 1056 // Fetch b from the work buffer. 1057 --wp; 1058 b = wp->p; 1059 n = wp->n; 1060 ti = wp->ti; 1061 nobj--; 1062 } 1063 1064 endscan:; 1065 } 1066 1067 // debug_scanblock is the debug copy of scanblock. 1068 // it is simpler, slower, single-threaded, recursive, 1069 // and uses bitSpecial as the mark bit. 1070 static void 1071 debug_scanblock(byte *b, uintptr n) 1072 { 1073 byte *obj, *p; 1074 void **vp; 1075 uintptr size, *bitp, bits, shift, i, xbits, off; 1076 MSpan *s; 1077 1078 if(!DebugMark) 1079 runtime·throw("debug_scanblock without DebugMark"); 1080 1081 if((intptr)n < 0) { 1082 runtime·printf("debug_scanblock %p %D\n", b, (int64)n); 1083 runtime·throw("debug_scanblock"); 1084 } 1085 1086 // Align b to a word boundary. 1087 off = (uintptr)b & (PtrSize-1); 1088 if(off != 0) { 1089 b += PtrSize - off; 1090 n -= PtrSize - off; 1091 } 1092 1093 vp = (void**)b; 1094 n /= PtrSize; 1095 for(i=0; i<n; i++) { 1096 obj = (byte*)vp[i]; 1097 1098 // Words outside the arena cannot be pointers. 1099 if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used) 1100 continue; 1101 1102 // Round down to word boundary. 1103 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 1104 1105 // Consult span table to find beginning. 1106 s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj); 1107 if(s == nil) 1108 continue; 1109 1110 p = (byte*)((uintptr)s->start<<PageShift); 1111 size = s->elemsize; 1112 if(s->sizeclass == 0) { 1113 obj = p; 1114 } else { 1115 int32 i = ((byte*)obj - p)/size; 1116 obj = p+i*size; 1117 } 1118 1119 // Now that we know the object header, reload bits. 1120 off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; 1121 bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 1122 shift = off % wordsPerBitmapWord; 1123 xbits = *bitp; 1124 bits = xbits >> shift; 1125 1126 // Now we have bits, bitp, and shift correct for 1127 // obj pointing at the base of the object. 1128 // If not allocated or already marked, done. 1129 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked 1130 continue; 1131 *bitp |= bitSpecial<<shift; 1132 if(!(bits & bitMarked)) 1133 runtime·printf("found unmarked block %p in %p\n", obj, vp+i); 1134 1135 // If object has no pointers, don't need to scan further. 1136 if((bits & bitNoScan) != 0) 1137 continue; 1138 1139 debug_scanblock(obj, size); 1140 } 1141 } 1142 1143 // Append obj to the work buffer. 1144 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer. 1145 static void 1146 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj) 1147 { 1148 uintptr nobj, off; 1149 Obj *wp; 1150 Workbuf *wbuf; 1151 1152 if(Debug > 1) 1153 runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti); 1154 1155 // Align obj.b to a word boundary. 1156 off = (uintptr)obj.p & (PtrSize-1); 1157 if(off != 0) { 1158 obj.p += PtrSize - off; 1159 obj.n -= PtrSize - off; 1160 obj.ti = 0; 1161 } 1162 1163 if(obj.p == nil || obj.n == 0) 1164 return; 1165 1166 // Load work buffer state 1167 wp = *_wp; 1168 wbuf = *_wbuf; 1169 nobj = *_nobj; 1170 1171 // If another proc wants a pointer, give it some. 1172 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 1173 wbuf->nobj = nobj; 1174 wbuf = handoff(wbuf); 1175 nobj = wbuf->nobj; 1176 wp = wbuf->obj + nobj; 1177 } 1178 1179 // If buffer is full, get a new one. 1180 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 1181 if(wbuf != nil) 1182 wbuf->nobj = nobj; 1183 wbuf = getempty(wbuf); 1184 wp = wbuf->obj; 1185 nobj = 0; 1186 } 1187 1188 *wp = obj; 1189 wp++; 1190 nobj++; 1191 1192 // Save work buffer state 1193 *_wp = wp; 1194 *_wbuf = wbuf; 1195 *_nobj = nobj; 1196 } 1197 1198 static void 1199 markroot(ParFor *desc, uint32 i) 1200 { 1201 Obj *wp; 1202 Workbuf *wbuf; 1203 uintptr nobj; 1204 1205 USED(&desc); 1206 wp = nil; 1207 wbuf = nil; 1208 nobj = 0; 1209 enqueue(work.roots[i], &wbuf, &wp, &nobj); 1210 scanblock(wbuf, wp, nobj, false); 1211 } 1212 1213 // Get an empty work buffer off the work.empty list, 1214 // allocating new buffers as needed. 1215 static Workbuf* 1216 getempty(Workbuf *b) 1217 { 1218 if(b != nil) 1219 runtime·lfstackpush(&work.full, &b->node); 1220 b = (Workbuf*)runtime·lfstackpop(&work.empty); 1221 if(b == nil) { 1222 // Need to allocate. 1223 runtime·lock(&work); 1224 if(work.nchunk < sizeof *b) { 1225 work.nchunk = 1<<20; 1226 work.chunk = runtime·SysAlloc(work.nchunk, &mstats.gc_sys); 1227 if(work.chunk == nil) 1228 runtime·throw("runtime: cannot allocate memory"); 1229 } 1230 b = (Workbuf*)work.chunk; 1231 work.chunk += sizeof *b; 1232 work.nchunk -= sizeof *b; 1233 runtime·unlock(&work); 1234 } 1235 b->nobj = 0; 1236 return b; 1237 } 1238 1239 static void 1240 putempty(Workbuf *b) 1241 { 1242 if(CollectStats) 1243 runtime·xadd64(&gcstats.putempty, 1); 1244 1245 runtime·lfstackpush(&work.empty, &b->node); 1246 } 1247 1248 // Get a full work buffer off the work.full list, or return nil. 1249 static Workbuf* 1250 getfull(Workbuf *b) 1251 { 1252 int32 i; 1253 1254 if(CollectStats) 1255 runtime·xadd64(&gcstats.getfull, 1); 1256 1257 if(b != nil) 1258 runtime·lfstackpush(&work.empty, &b->node); 1259 b = (Workbuf*)runtime·lfstackpop(&work.full); 1260 if(b != nil || work.nproc == 1) 1261 return b; 1262 1263 runtime·xadd(&work.nwait, +1); 1264 for(i=0;; i++) { 1265 if(work.full != 0) { 1266 runtime·xadd(&work.nwait, -1); 1267 b = (Workbuf*)runtime·lfstackpop(&work.full); 1268 if(b != nil) 1269 return b; 1270 runtime·xadd(&work.nwait, +1); 1271 } 1272 if(work.nwait == work.nproc) 1273 return nil; 1274 if(i < 10) { 1275 m->gcstats.nprocyield++; 1276 runtime·procyield(20); 1277 } else if(i < 20) { 1278 m->gcstats.nosyield++; 1279 runtime·osyield(); 1280 } else { 1281 m->gcstats.nsleep++; 1282 runtime·usleep(100); 1283 } 1284 } 1285 } 1286 1287 static Workbuf* 1288 handoff(Workbuf *b) 1289 { 1290 int32 n; 1291 Workbuf *b1; 1292 1293 // Make new buffer with half of b's pointers. 1294 b1 = getempty(nil); 1295 n = b->nobj/2; 1296 b->nobj -= n; 1297 b1->nobj = n; 1298 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); 1299 m->gcstats.nhandoff++; 1300 m->gcstats.nhandoffcnt += n; 1301 1302 // Put b on full list - let first half of b get stolen. 1303 runtime·lfstackpush(&work.full, &b->node); 1304 return b1; 1305 } 1306 1307 static void 1308 addroot(Obj obj) 1309 { 1310 uint32 cap; 1311 Obj *new; 1312 1313 if(work.nroot >= work.rootcap) { 1314 cap = PageSize/sizeof(Obj); 1315 if(cap < 2*work.rootcap) 1316 cap = 2*work.rootcap; 1317 new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj), &mstats.gc_sys); 1318 if(new == nil) 1319 runtime·throw("runtime: cannot allocate memory"); 1320 if(work.roots != nil) { 1321 runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj)); 1322 runtime·SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys); 1323 } 1324 work.roots = new; 1325 work.rootcap = cap; 1326 } 1327 work.roots[work.nroot] = obj; 1328 work.nroot++; 1329 } 1330 1331 extern byte pclntab[]; // base for f->ptrsoff 1332 1333 typedef struct BitVector BitVector; 1334 struct BitVector 1335 { 1336 int32 n; 1337 uint32 data[]; 1338 }; 1339 1340 // Scans an interface data value when the interface type indicates 1341 // that it is a pointer. 1342 static void 1343 scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue) 1344 { 1345 Itab *tab; 1346 Type *type; 1347 1348 if(runtime·precisestack && afterprologue) { 1349 if(bits == BitsIface) { 1350 tab = *(Itab**)scanp; 1351 if(tab->type->size <= sizeof(void*) && (tab->type->kind & KindNoPointers)) 1352 return; 1353 } else { // bits == BitsEface 1354 type = *(Type**)scanp; 1355 if(type->size <= sizeof(void*) && (type->kind & KindNoPointers)) 1356 return; 1357 } 1358 } 1359 addroot((Obj){scanp+PtrSize, PtrSize, 0}); 1360 } 1361 1362 // Starting from scanp, scans words corresponding to set bits. 1363 static void 1364 scanbitvector(byte *scanp, BitVector *bv, bool afterprologue) 1365 { 1366 uintptr word, bits; 1367 uint32 *wordp; 1368 int32 i, remptrs; 1369 1370 wordp = bv->data; 1371 for(remptrs = bv->n; remptrs > 0; remptrs -= 32) { 1372 word = *wordp++; 1373 if(remptrs < 32) 1374 i = remptrs; 1375 else 1376 i = 32; 1377 i /= BitsPerPointer; 1378 for(; i > 0; i--) { 1379 bits = word & 3; 1380 if(bits != BitsNoPointer && *(void**)scanp != nil) 1381 if(bits == BitsPointer) 1382 addroot((Obj){scanp, PtrSize, 0}); 1383 else 1384 scaninterfacedata(bits, scanp, afterprologue); 1385 word >>= BitsPerPointer; 1386 scanp += PtrSize; 1387 } 1388 } 1389 } 1390 1391 // Scan a stack frame: local variables and function arguments/results. 1392 static void 1393 addframeroots(Stkframe *frame, void*) 1394 { 1395 Func *f; 1396 BitVector *args, *locals; 1397 uintptr size; 1398 bool afterprologue; 1399 1400 f = frame->fn; 1401 1402 // Scan local variables if stack frame has been allocated. 1403 // Use pointer information if known. 1404 afterprologue = (frame->varp > (byte*)frame->sp); 1405 if(afterprologue) { 1406 locals = runtime·funcdata(f, FUNCDATA_GCLocals); 1407 if(locals == nil) { 1408 // No locals information, scan everything. 1409 size = frame->varp - (byte*)frame->sp; 1410 addroot((Obj){frame->varp - size, size, 0}); 1411 } else if(locals->n < 0) { 1412 // Locals size information, scan just the 1413 // locals. 1414 size = -locals->n; 1415 addroot((Obj){frame->varp - size, size, 0}); 1416 } else if(locals->n > 0) { 1417 // Locals bitmap information, scan just the 1418 // pointers in locals. 1419 size = (locals->n*PtrSize) / BitsPerPointer; 1420 scanbitvector(frame->varp - size, locals, afterprologue); 1421 } 1422 } 1423 1424 // Scan arguments. 1425 // Use pointer information if known. 1426 args = runtime·funcdata(f, FUNCDATA_GCArgs); 1427 if(args != nil && args->n > 0) 1428 scanbitvector(frame->argp, args, false); 1429 else 1430 addroot((Obj){frame->argp, frame->arglen, 0}); 1431 } 1432 1433 static void 1434 addstackroots(G *gp) 1435 { 1436 M *mp; 1437 int32 n; 1438 Stktop *stk; 1439 uintptr sp, guard, pc, lr; 1440 void *base; 1441 uintptr size; 1442 1443 stk = (Stktop*)gp->stackbase; 1444 guard = gp->stackguard; 1445 1446 if(gp == g) 1447 runtime·throw("can't scan our own stack"); 1448 if((mp = gp->m) != nil && mp->helpgc) 1449 runtime·throw("can't scan gchelper stack"); 1450 if(gp->syscallstack != (uintptr)nil) { 1451 // Scanning another goroutine that is about to enter or might 1452 // have just exited a system call. It may be executing code such 1453 // as schedlock and may have needed to start a new stack segment. 1454 // Use the stack segment and stack pointer at the time of 1455 // the system call instead, since that won't change underfoot. 1456 sp = gp->syscallsp; 1457 pc = gp->syscallpc; 1458 lr = 0; 1459 stk = (Stktop*)gp->syscallstack; 1460 guard = gp->syscallguard; 1461 } else { 1462 // Scanning another goroutine's stack. 1463 // The goroutine is usually asleep (the world is stopped). 1464 sp = gp->sched.sp; 1465 pc = gp->sched.pc; 1466 lr = gp->sched.lr; 1467 1468 // For function about to start, context argument is a root too. 1469 if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil)) 1470 addroot((Obj){base, size, 0}); 1471 } 1472 if(ScanStackByFrames) { 1473 USED(stk); 1474 USED(guard); 1475 runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, addframeroots, nil, false); 1476 } else { 1477 USED(lr); 1478 USED(pc); 1479 n = 0; 1480 while(stk) { 1481 if(sp < guard-StackGuard || (uintptr)stk < sp) { 1482 runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); 1483 runtime·throw("scanstack"); 1484 } 1485 addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP}); 1486 sp = stk->gobuf.sp; 1487 guard = stk->stackguard; 1488 stk = (Stktop*)stk->stackbase; 1489 n++; 1490 } 1491 } 1492 } 1493 1494 static void 1495 addfinroots(void *v) 1496 { 1497 uintptr size; 1498 void *base; 1499 1500 size = 0; 1501 if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base)) 1502 runtime·throw("mark - finalizer inconsistency"); 1503 1504 // do not mark the finalizer block itself. just mark the things it points at. 1505 addroot((Obj){base, size, 0}); 1506 } 1507 1508 static void 1509 addroots(void) 1510 { 1511 G *gp; 1512 FinBlock *fb; 1513 MSpan *s, **allspans; 1514 uint32 spanidx; 1515 1516 work.nroot = 0; 1517 1518 // data & bss 1519 // TODO(atom): load balancing 1520 addroot((Obj){data, edata - data, (uintptr)gcdata}); 1521 addroot((Obj){bss, ebss - bss, (uintptr)gcbss}); 1522 1523 // MSpan.types 1524 allspans = runtime·mheap.allspans; 1525 for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) { 1526 s = allspans[spanidx]; 1527 if(s->state == MSpanInUse) { 1528 // The garbage collector ignores type pointers stored in MSpan.types: 1529 // - Compiler-generated types are stored outside of heap. 1530 // - The reflect package has runtime-generated types cached in its data structures. 1531 // The garbage collector relies on finding the references via that cache. 1532 switch(s->types.compression) { 1533 case MTypes_Empty: 1534 case MTypes_Single: 1535 break; 1536 case MTypes_Words: 1537 case MTypes_Bytes: 1538 markonly((byte*)s->types.data); 1539 break; 1540 } 1541 } 1542 } 1543 1544 // stacks 1545 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { 1546 switch(gp->status){ 1547 default: 1548 runtime·printf("unexpected G.status %d\n", gp->status); 1549 runtime·throw("mark - bad status"); 1550 case Gdead: 1551 break; 1552 case Grunning: 1553 runtime·throw("mark - world not stopped"); 1554 case Grunnable: 1555 case Gsyscall: 1556 case Gwaiting: 1557 addstackroots(gp); 1558 break; 1559 } 1560 } 1561 1562 runtime·walkfintab(addfinroots); 1563 1564 for(fb=allfin; fb; fb=fb->alllink) 1565 addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0}); 1566 } 1567 1568 static bool 1569 handlespecial(byte *p, uintptr size) 1570 { 1571 FuncVal *fn; 1572 uintptr nret; 1573 PtrType *ot; 1574 Type *fint; 1575 FinBlock *block; 1576 Finalizer *f; 1577 1578 if(!runtime·getfinalizer(p, true, &fn, &nret, &fint, &ot)) { 1579 runtime·setblockspecial(p, false); 1580 runtime·MProf_Free(p, size); 1581 return false; 1582 } 1583 1584 runtime·lock(&finlock); 1585 if(finq == nil || finq->cnt == finq->cap) { 1586 if(finc == nil) { 1587 finc = runtime·persistentalloc(PageSize, 0, &mstats.gc_sys); 1588 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; 1589 finc->alllink = allfin; 1590 allfin = finc; 1591 } 1592 block = finc; 1593 finc = block->next; 1594 block->next = finq; 1595 finq = block; 1596 } 1597 f = &finq->fin[finq->cnt]; 1598 finq->cnt++; 1599 f->fn = fn; 1600 f->nret = nret; 1601 f->fint = fint; 1602 f->ot = ot; 1603 f->arg = p; 1604 runtime·unlock(&finlock); 1605 return true; 1606 } 1607 1608 // Sweep frees or collects finalizers for blocks not marked in the mark phase. 1609 // It clears the mark bits in preparation for the next GC round. 1610 static void 1611 sweepspan(ParFor *desc, uint32 idx) 1612 { 1613 int32 cl, n, npages; 1614 uintptr size; 1615 byte *p; 1616 MCache *c; 1617 byte *arena_start; 1618 MLink head, *end; 1619 int32 nfree; 1620 byte *type_data; 1621 byte compression; 1622 uintptr type_data_inc; 1623 MSpan *s; 1624 1625 USED(&desc); 1626 s = runtime·mheap.allspans[idx]; 1627 if(s->state != MSpanInUse) 1628 return; 1629 arena_start = runtime·mheap.arena_start; 1630 p = (byte*)(s->start << PageShift); 1631 cl = s->sizeclass; 1632 size = s->elemsize; 1633 if(cl == 0) { 1634 n = 1; 1635 } else { 1636 // Chunk full of small blocks. 1637 npages = runtime·class_to_allocnpages[cl]; 1638 n = (npages << PageShift) / size; 1639 } 1640 nfree = 0; 1641 end = &head; 1642 c = m->mcache; 1643 1644 type_data = (byte*)s->types.data; 1645 type_data_inc = sizeof(uintptr); 1646 compression = s->types.compression; 1647 switch(compression) { 1648 case MTypes_Bytes: 1649 type_data += 8*sizeof(uintptr); 1650 type_data_inc = 1; 1651 break; 1652 } 1653 1654 // Sweep through n objects of given size starting at p. 1655 // This thread owns the span now, so it can manipulate 1656 // the block bitmap without atomic operations. 1657 for(; n > 0; n--, p += size, type_data+=type_data_inc) { 1658 uintptr off, *bitp, shift, bits; 1659 1660 off = (uintptr*)p - (uintptr*)arena_start; 1661 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1662 shift = off % wordsPerBitmapWord; 1663 bits = *bitp>>shift; 1664 1665 if((bits & bitAllocated) == 0) 1666 continue; 1667 1668 if((bits & bitMarked) != 0) { 1669 if(DebugMark) { 1670 if(!(bits & bitSpecial)) 1671 runtime·printf("found spurious mark on %p\n", p); 1672 *bitp &= ~(bitSpecial<<shift); 1673 } 1674 *bitp &= ~(bitMarked<<shift); 1675 continue; 1676 } 1677 1678 // Special means it has a finalizer or is being profiled. 1679 // In DebugMark mode, the bit has been coopted so 1680 // we have to assume all blocks are special. 1681 if(DebugMark || (bits & bitSpecial) != 0) { 1682 if(handlespecial(p, size)) 1683 continue; 1684 } 1685 1686 // Mark freed; restore block boundary bit. 1687 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 1688 1689 if(cl == 0) { 1690 // Free large span. 1691 runtime·unmarkspan(p, 1<<PageShift); 1692 *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing 1693 runtime·MHeap_Free(&runtime·mheap, s, 1); 1694 c->local_nlargefree++; 1695 c->local_largefree += size; 1696 } else { 1697 // Free small object. 1698 switch(compression) { 1699 case MTypes_Words: 1700 *(uintptr*)type_data = 0; 1701 break; 1702 case MTypes_Bytes: 1703 *(byte*)type_data = 0; 1704 break; 1705 } 1706 if(size > sizeof(uintptr)) 1707 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed" 1708 1709 end->next = (MLink*)p; 1710 end = (MLink*)p; 1711 nfree++; 1712 } 1713 } 1714 1715 if(nfree) { 1716 c->local_nsmallfree[cl] += nfree; 1717 c->local_cachealloc -= nfree * size; 1718 runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end); 1719 } 1720 } 1721 1722 static void 1723 dumpspan(uint32 idx) 1724 { 1725 int32 sizeclass, n, npages, i, column; 1726 uintptr size; 1727 byte *p; 1728 byte *arena_start; 1729 MSpan *s; 1730 bool allocated, special; 1731 1732 s = runtime·mheap.allspans[idx]; 1733 if(s->state != MSpanInUse) 1734 return; 1735 arena_start = runtime·mheap.arena_start; 1736 p = (byte*)(s->start << PageShift); 1737 sizeclass = s->sizeclass; 1738 size = s->elemsize; 1739 if(sizeclass == 0) { 1740 n = 1; 1741 } else { 1742 npages = runtime·class_to_allocnpages[sizeclass]; 1743 n = (npages << PageShift) / size; 1744 } 1745 1746 runtime·printf("%p .. %p:\n", p, p+n*size); 1747 column = 0; 1748 for(; n>0; n--, p+=size) { 1749 uintptr off, *bitp, shift, bits; 1750 1751 off = (uintptr*)p - (uintptr*)arena_start; 1752 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1753 shift = off % wordsPerBitmapWord; 1754 bits = *bitp>>shift; 1755 1756 allocated = ((bits & bitAllocated) != 0); 1757 special = ((bits & bitSpecial) != 0); 1758 1759 for(i=0; i<size; i+=sizeof(void*)) { 1760 if(column == 0) { 1761 runtime·printf("\t"); 1762 } 1763 if(i == 0) { 1764 runtime·printf(allocated ? "(" : "["); 1765 runtime·printf(special ? "@" : ""); 1766 runtime·printf("%p: ", p+i); 1767 } else { 1768 runtime·printf(" "); 1769 } 1770 1771 runtime·printf("%p", *(void**)(p+i)); 1772 1773 if(i+sizeof(void*) >= size) { 1774 runtime·printf(allocated ? ") " : "] "); 1775 } 1776 1777 column++; 1778 if(column == 8) { 1779 runtime·printf("\n"); 1780 column = 0; 1781 } 1782 } 1783 } 1784 runtime·printf("\n"); 1785 } 1786 1787 // A debugging function to dump the contents of memory 1788 void 1789 runtime·memorydump(void) 1790 { 1791 uint32 spanidx; 1792 1793 for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) { 1794 dumpspan(spanidx); 1795 } 1796 } 1797 1798 void 1799 runtime·gchelper(void) 1800 { 1801 int32 nproc; 1802 1803 gchelperstart(); 1804 1805 // parallel mark for over gc roots 1806 runtime·parfordo(work.markfor); 1807 1808 // help other threads scan secondary blocks 1809 scanblock(nil, nil, 0, true); 1810 1811 if(DebugMark) { 1812 // wait while the main thread executes mark(debug_scanblock) 1813 while(runtime·atomicload(&work.debugmarkdone) == 0) 1814 runtime·usleep(10); 1815 } 1816 1817 runtime·parfordo(work.sweepfor); 1818 bufferList[m->helpgc].busy = 0; 1819 nproc = work.nproc; // work.nproc can change right after we increment work.ndone 1820 if(runtime·xadd(&work.ndone, +1) == nproc-1) 1821 runtime·notewakeup(&work.alldone); 1822 } 1823 1824 #define GcpercentUnknown (-2) 1825 1826 // Initialized from $GOGC. GOGC=off means no gc. 1827 // 1828 // Next gc is after we've allocated an extra amount of 1829 // memory proportional to the amount already in use. 1830 // If gcpercent=100 and we're using 4M, we'll gc again 1831 // when we get to 8M. This keeps the gc cost in linear 1832 // proportion to the allocation cost. Adjusting gcpercent 1833 // just changes the linear constant (and also the amount of 1834 // extra memory used). 1835 static int32 gcpercent = GcpercentUnknown; 1836 1837 static void 1838 cachestats(void) 1839 { 1840 MCache *c; 1841 P *p, **pp; 1842 1843 for(pp=runtime·allp; p=*pp; pp++) { 1844 c = p->mcache; 1845 if(c==nil) 1846 continue; 1847 runtime·purgecachedstats(c); 1848 } 1849 } 1850 1851 static void 1852 updatememstats(GCStats *stats) 1853 { 1854 M *mp; 1855 MSpan *s; 1856 MCache *c; 1857 P *p, **pp; 1858 int32 i; 1859 uint64 stacks_inuse, smallfree; 1860 uint64 *src, *dst; 1861 1862 if(stats) 1863 runtime·memclr((byte*)stats, sizeof(*stats)); 1864 stacks_inuse = 0; 1865 for(mp=runtime·allm; mp; mp=mp->alllink) { 1866 stacks_inuse += mp->stackinuse*FixedStack; 1867 if(stats) { 1868 src = (uint64*)&mp->gcstats; 1869 dst = (uint64*)stats; 1870 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++) 1871 dst[i] += src[i]; 1872 runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); 1873 } 1874 } 1875 mstats.stacks_inuse = stacks_inuse; 1876 mstats.mcache_inuse = runtime·mheap.cachealloc.inuse; 1877 mstats.mspan_inuse = runtime·mheap.spanalloc.inuse; 1878 mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys + 1879 mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys; 1880 1881 // Calculate memory allocator stats. 1882 // During program execution we only count number of frees and amount of freed memory. 1883 // Current number of alive object in the heap and amount of alive heap memory 1884 // are calculated by scanning all spans. 1885 // Total number of mallocs is calculated as number of frees plus number of alive objects. 1886 // Similarly, total amount of allocated memory is calculated as amount of freed memory 1887 // plus amount of alive heap memory. 1888 mstats.alloc = 0; 1889 mstats.total_alloc = 0; 1890 mstats.nmalloc = 0; 1891 mstats.nfree = 0; 1892 for(i = 0; i < nelem(mstats.by_size); i++) { 1893 mstats.by_size[i].nmalloc = 0; 1894 mstats.by_size[i].nfree = 0; 1895 } 1896 1897 // Flush MCache's to MCentral. 1898 for(pp=runtime·allp; p=*pp; pp++) { 1899 c = p->mcache; 1900 if(c==nil) 1901 continue; 1902 runtime·MCache_ReleaseAll(c); 1903 } 1904 1905 // Aggregate local stats. 1906 cachestats(); 1907 1908 // Scan all spans and count number of alive objects. 1909 for(i = 0; i < runtime·mheap.nspan; i++) { 1910 s = runtime·mheap.allspans[i]; 1911 if(s->state != MSpanInUse) 1912 continue; 1913 if(s->sizeclass == 0) { 1914 mstats.nmalloc++; 1915 mstats.alloc += s->elemsize; 1916 } else { 1917 mstats.nmalloc += s->ref; 1918 mstats.by_size[s->sizeclass].nmalloc += s->ref; 1919 mstats.alloc += s->ref*s->elemsize; 1920 } 1921 } 1922 1923 // Aggregate by size class. 1924 smallfree = 0; 1925 mstats.nfree = runtime·mheap.nlargefree; 1926 for(i = 0; i < nelem(mstats.by_size); i++) { 1927 mstats.nfree += runtime·mheap.nsmallfree[i]; 1928 mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i]; 1929 mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i]; 1930 smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i]; 1931 } 1932 mstats.nmalloc += mstats.nfree; 1933 1934 // Calculate derived stats. 1935 mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree; 1936 mstats.heap_alloc = mstats.alloc; 1937 mstats.heap_objects = mstats.nmalloc - mstats.nfree; 1938 } 1939 1940 // Structure of arguments passed to function gc(). 1941 // This allows the arguments to be passed via runtime·mcall. 1942 struct gc_args 1943 { 1944 int64 start_time; // start time of GC in ns (just before stoptheworld) 1945 }; 1946 1947 static void gc(struct gc_args *args); 1948 static void mgc(G *gp); 1949 1950 static int32 1951 readgogc(void) 1952 { 1953 byte *p; 1954 1955 p = runtime·getenv("GOGC"); 1956 if(p == nil || p[0] == '\0') 1957 return 100; 1958 if(runtime·strcmp(p, (byte*)"off") == 0) 1959 return -1; 1960 return runtime·atoi(p); 1961 } 1962 1963 static FuncVal runfinqv = {runfinq}; 1964 1965 void 1966 runtime·gc(int32 force) 1967 { 1968 struct gc_args a; 1969 int32 i; 1970 1971 // The atomic operations are not atomic if the uint64s 1972 // are not aligned on uint64 boundaries. This has been 1973 // a problem in the past. 1974 if((((uintptr)&work.empty) & 7) != 0) 1975 runtime·throw("runtime: gc work buffer is misaligned"); 1976 if((((uintptr)&work.full) & 7) != 0) 1977 runtime·throw("runtime: gc work buffer is misaligned"); 1978 1979 // The gc is turned off (via enablegc) until 1980 // the bootstrap has completed. 1981 // Also, malloc gets called in the guts 1982 // of a number of libraries that might be 1983 // holding locks. To avoid priority inversion 1984 // problems, don't bother trying to run gc 1985 // while holding a lock. The next mallocgc 1986 // without a lock will do the gc instead. 1987 if(!mstats.enablegc || g == m->g0 || m->locks > 0 || runtime·panicking) 1988 return; 1989 1990 if(gcpercent == GcpercentUnknown) { // first time through 1991 runtime·lock(&runtime·mheap); 1992 if(gcpercent == GcpercentUnknown) 1993 gcpercent = readgogc(); 1994 runtime·unlock(&runtime·mheap); 1995 } 1996 if(gcpercent < 0) 1997 return; 1998 1999 runtime·semacquire(&runtime·worldsema, false); 2000 if(!force && mstats.heap_alloc < mstats.next_gc) { 2001 // typically threads which lost the race to grab 2002 // worldsema exit here when gc is done. 2003 runtime·semrelease(&runtime·worldsema); 2004 return; 2005 } 2006 2007 // Ok, we're doing it! Stop everybody else 2008 a.start_time = runtime·nanotime(); 2009 m->gcing = 1; 2010 runtime·stoptheworld(); 2011 2012 // Run gc on the g0 stack. We do this so that the g stack 2013 // we're currently running on will no longer change. Cuts 2014 // the root set down a bit (g0 stacks are not scanned, and 2015 // we don't need to scan gc's internal state). Also an 2016 // enabler for copyable stacks. 2017 for(i = 0; i < (runtime·debug.gctrace > 1 ? 2 : 1); i++) { 2018 // switch to g0, call gc(&a), then switch back 2019 g->param = &a; 2020 g->status = Gwaiting; 2021 g->waitreason = "garbage collection"; 2022 runtime·mcall(mgc); 2023 // record a new start time in case we're going around again 2024 a.start_time = runtime·nanotime(); 2025 } 2026 2027 // all done 2028 m->gcing = 0; 2029 m->locks++; 2030 runtime·semrelease(&runtime·worldsema); 2031 runtime·starttheworld(); 2032 m->locks--; 2033 2034 // now that gc is done, kick off finalizer thread if needed 2035 if(finq != nil) { 2036 runtime·lock(&finlock); 2037 // kick off or wake up goroutine to run queued finalizers 2038 if(fing == nil) 2039 fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc); 2040 else if(fingwait) { 2041 fingwait = 0; 2042 runtime·ready(fing); 2043 } 2044 runtime·unlock(&finlock); 2045 } 2046 // give the queued finalizers, if any, a chance to run 2047 runtime·gosched(); 2048 } 2049 2050 static void 2051 mgc(G *gp) 2052 { 2053 gc(gp->param); 2054 gp->param = nil; 2055 gp->status = Grunning; 2056 runtime·gogo(&gp->sched); 2057 } 2058 2059 static void 2060 gc(struct gc_args *args) 2061 { 2062 int64 t0, t1, t2, t3, t4; 2063 uint64 heap0, heap1, obj0, obj1, ninstr; 2064 GCStats stats; 2065 M *mp; 2066 uint32 i; 2067 Eface eface; 2068 2069 t0 = args->start_time; 2070 2071 if(CollectStats) 2072 runtime·memclr((byte*)&gcstats, sizeof(gcstats)); 2073 2074 for(mp=runtime·allm; mp; mp=mp->alllink) 2075 runtime·settype_flush(mp); 2076 2077 heap0 = 0; 2078 obj0 = 0; 2079 if(runtime·debug.gctrace) { 2080 updatememstats(nil); 2081 heap0 = mstats.heap_alloc; 2082 obj0 = mstats.nmalloc - mstats.nfree; 2083 } 2084 2085 m->locks++; // disable gc during mallocs in parforalloc 2086 if(work.markfor == nil) 2087 work.markfor = runtime·parforalloc(MaxGcproc); 2088 if(work.sweepfor == nil) 2089 work.sweepfor = runtime·parforalloc(MaxGcproc); 2090 m->locks--; 2091 2092 if(itabtype == nil) { 2093 // get C pointer to the Go type "itab" 2094 runtime·gc_itab_ptr(&eface); 2095 itabtype = ((PtrType*)eface.type)->elem; 2096 } 2097 2098 work.nwait = 0; 2099 work.ndone = 0; 2100 work.debugmarkdone = 0; 2101 work.nproc = runtime·gcprocs(); 2102 addroots(); 2103 runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot); 2104 runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan); 2105 if(work.nproc > 1) { 2106 runtime·noteclear(&work.alldone); 2107 runtime·helpgc(work.nproc); 2108 } 2109 2110 t1 = runtime·nanotime(); 2111 2112 gchelperstart(); 2113 runtime·parfordo(work.markfor); 2114 scanblock(nil, nil, 0, true); 2115 2116 if(DebugMark) { 2117 for(i=0; i<work.nroot; i++) 2118 debug_scanblock(work.roots[i].p, work.roots[i].n); 2119 runtime·atomicstore(&work.debugmarkdone, 1); 2120 } 2121 t2 = runtime·nanotime(); 2122 2123 runtime·parfordo(work.sweepfor); 2124 bufferList[m->helpgc].busy = 0; 2125 t3 = runtime·nanotime(); 2126 2127 if(work.nproc > 1) 2128 runtime·notesleep(&work.alldone); 2129 2130 cachestats(); 2131 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; 2132 2133 t4 = runtime·nanotime(); 2134 mstats.last_gc = t4; 2135 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0; 2136 mstats.pause_total_ns += t4 - t0; 2137 mstats.numgc++; 2138 if(mstats.debuggc) 2139 runtime·printf("pause %D\n", t4-t0); 2140 2141 if(runtime·debug.gctrace) { 2142 updatememstats(&stats); 2143 heap1 = mstats.heap_alloc; 2144 obj1 = mstats.nmalloc - mstats.nfree; 2145 2146 stats.nprocyield += work.sweepfor->nprocyield; 2147 stats.nosyield += work.sweepfor->nosyield; 2148 stats.nsleep += work.sweepfor->nsleep; 2149 2150 runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," 2151 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", 2152 mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000, 2153 heap0>>20, heap1>>20, obj0, obj1, 2154 mstats.nmalloc, mstats.nfree, 2155 stats.nhandoff, stats.nhandoffcnt, 2156 work.sweepfor->nsteal, work.sweepfor->nstealcnt, 2157 stats.nprocyield, stats.nosyield, stats.nsleep); 2158 if(CollectStats) { 2159 runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n", 2160 gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup); 2161 if(gcstats.ptr.cnt != 0) 2162 runtime·printf("avg ptrbufsize: %D (%D/%D)\n", 2163 gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt); 2164 if(gcstats.obj.cnt != 0) 2165 runtime·printf("avg nobj: %D (%D/%D)\n", 2166 gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt); 2167 runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes); 2168 2169 runtime·printf("instruction counts:\n"); 2170 ninstr = 0; 2171 for(i=0; i<nelem(gcstats.instr); i++) { 2172 runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]); 2173 ninstr += gcstats.instr[i]; 2174 } 2175 runtime·printf("\ttotal:\t%D\n", ninstr); 2176 2177 runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); 2178 2179 runtime·printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan); 2180 runtime·printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan); 2181 } 2182 } 2183 2184 runtime·MProf_GC(); 2185 } 2186 2187 void 2188 runtime·ReadMemStats(MStats *stats) 2189 { 2190 // Have to acquire worldsema to stop the world, 2191 // because stoptheworld can only be used by 2192 // one goroutine at a time, and there might be 2193 // a pending garbage collection already calling it. 2194 runtime·semacquire(&runtime·worldsema, false); 2195 m->gcing = 1; 2196 runtime·stoptheworld(); 2197 updatememstats(nil); 2198 *stats = mstats; 2199 m->gcing = 0; 2200 m->locks++; 2201 runtime·semrelease(&runtime·worldsema); 2202 runtime·starttheworld(); 2203 m->locks--; 2204 } 2205 2206 void 2207 runtime∕debug·readGCStats(Slice *pauses) 2208 { 2209 uint64 *p; 2210 uint32 i, n; 2211 2212 // Calling code in runtime/debug should make the slice large enough. 2213 if(pauses->cap < nelem(mstats.pause_ns)+3) 2214 runtime·throw("runtime: short slice passed to readGCStats"); 2215 2216 // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. 2217 p = (uint64*)pauses->array; 2218 runtime·lock(&runtime·mheap); 2219 n = mstats.numgc; 2220 if(n > nelem(mstats.pause_ns)) 2221 n = nelem(mstats.pause_ns); 2222 2223 // The pause buffer is circular. The most recent pause is at 2224 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward 2225 // from there to go back farther in time. We deliver the times 2226 // most recent first (in p[0]). 2227 for(i=0; i<n; i++) 2228 p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)]; 2229 2230 p[n] = mstats.last_gc; 2231 p[n+1] = mstats.numgc; 2232 p[n+2] = mstats.pause_total_ns; 2233 runtime·unlock(&runtime·mheap); 2234 pauses->len = n+3; 2235 } 2236 2237 void 2238 runtime∕debug·setGCPercent(intgo in, intgo out) 2239 { 2240 runtime·lock(&runtime·mheap); 2241 if(gcpercent == GcpercentUnknown) 2242 gcpercent = readgogc(); 2243 out = gcpercent; 2244 if(in < 0) 2245 in = -1; 2246 gcpercent = in; 2247 runtime·unlock(&runtime·mheap); 2248 FLUSH(&out); 2249 } 2250 2251 static void 2252 gchelperstart(void) 2253 { 2254 if(m->helpgc < 0 || m->helpgc >= MaxGcproc) 2255 runtime·throw("gchelperstart: bad m->helpgc"); 2256 if(runtime·xchg(&bufferList[m->helpgc].busy, 1)) 2257 runtime·throw("gchelperstart: already busy"); 2258 if(g != m->g0) 2259 runtime·throw("gchelper not running on g0 stack"); 2260 } 2261 2262 static void 2263 runfinq(void) 2264 { 2265 Finalizer *f; 2266 FinBlock *fb, *next; 2267 byte *frame; 2268 uint32 framesz, framecap, i; 2269 Eface *ef, ef1; 2270 2271 frame = nil; 2272 framecap = 0; 2273 for(;;) { 2274 runtime·lock(&finlock); 2275 fb = finq; 2276 finq = nil; 2277 if(fb == nil) { 2278 fingwait = 1; 2279 runtime·park(runtime·unlock, &finlock, "finalizer wait"); 2280 continue; 2281 } 2282 runtime·unlock(&finlock); 2283 if(raceenabled) 2284 runtime·racefingo(); 2285 for(; fb; fb=next) { 2286 next = fb->next; 2287 for(i=0; i<fb->cnt; i++) { 2288 f = &fb->fin[i]; 2289 framesz = sizeof(Eface) + f->nret; 2290 if(framecap < framesz) { 2291 runtime·free(frame); 2292 // The frame does not contain pointers interesting for GC, 2293 // all not yet finalized objects are stored in finc. 2294 // If we do not mark it as FlagNoScan, 2295 // the last finalized object is not collected. 2296 frame = runtime·mallocgc(framesz, 0, FlagNoScan|FlagNoInvokeGC); 2297 framecap = framesz; 2298 } 2299 if(f->fint == nil) 2300 runtime·throw("missing type in runfinq"); 2301 if(f->fint->kind == KindPtr) { 2302 // direct use of pointer 2303 *(void**)frame = f->arg; 2304 } else if(((InterfaceType*)f->fint)->mhdr.len == 0) { 2305 // convert to empty interface 2306 ef = (Eface*)frame; 2307 ef->type = f->ot; 2308 ef->data = f->arg; 2309 } else { 2310 // convert to interface with methods, via empty interface. 2311 ef1.type = f->ot; 2312 ef1.data = f->arg; 2313 if(!runtime·ifaceE2I2((InterfaceType*)f->fint, ef1, (Iface*)frame)) 2314 runtime·throw("invalid type conversion in runfinq"); 2315 } 2316 reflect·call(f->fn, frame, framesz); 2317 f->fn = nil; 2318 f->arg = nil; 2319 f->ot = nil; 2320 } 2321 fb->cnt = 0; 2322 fb->next = finc; 2323 finc = fb; 2324 } 2325 runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible 2326 } 2327 } 2328 2329 // mark the block at v of size n as allocated. 2330 // If noscan is true, mark it as not needing scanning. 2331 void 2332 runtime·markallocated(void *v, uintptr n, bool noscan) 2333 { 2334 uintptr *b, obits, bits, off, shift; 2335 2336 if(0) 2337 runtime·printf("markallocated %p+%p\n", v, n); 2338 2339 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2340 runtime·throw("markallocated: bad pointer"); 2341 2342 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset 2343 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2344 shift = off % wordsPerBitmapWord; 2345 2346 for(;;) { 2347 obits = *b; 2348 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift); 2349 if(noscan) 2350 bits |= bitNoScan<<shift; 2351 if(runtime·gomaxprocs == 1) { 2352 *b = bits; 2353 break; 2354 } else { 2355 // more than one goroutine is potentially running: use atomic op 2356 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2357 break; 2358 } 2359 } 2360 } 2361 2362 // mark the block at v of size n as freed. 2363 void 2364 runtime·markfreed(void *v, uintptr n) 2365 { 2366 uintptr *b, obits, bits, off, shift; 2367 2368 if(0) 2369 runtime·printf("markfreed %p+%p\n", v, n); 2370 2371 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2372 runtime·throw("markfreed: bad pointer"); 2373 2374 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset 2375 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2376 shift = off % wordsPerBitmapWord; 2377 2378 for(;;) { 2379 obits = *b; 2380 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 2381 if(runtime·gomaxprocs == 1) { 2382 *b = bits; 2383 break; 2384 } else { 2385 // more than one goroutine is potentially running: use atomic op 2386 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2387 break; 2388 } 2389 } 2390 } 2391 2392 // check that the block at v of size n is marked freed. 2393 void 2394 runtime·checkfreed(void *v, uintptr n) 2395 { 2396 uintptr *b, bits, off, shift; 2397 2398 if(!runtime·checking) 2399 return; 2400 2401 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2402 return; // not allocated, so okay 2403 2404 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset 2405 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2406 shift = off % wordsPerBitmapWord; 2407 2408 bits = *b>>shift; 2409 if((bits & bitAllocated) != 0) { 2410 runtime·printf("checkfreed %p+%p: off=%p have=%p\n", 2411 v, n, off, bits & bitMask); 2412 runtime·throw("checkfreed: not freed"); 2413 } 2414 } 2415 2416 // mark the span of memory at v as having n blocks of the given size. 2417 // if leftover is true, there is left over space at the end of the span. 2418 void 2419 runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) 2420 { 2421 uintptr *b, off, shift; 2422 byte *p; 2423 2424 if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2425 runtime·throw("markspan: bad pointer"); 2426 2427 p = v; 2428 if(leftover) // mark a boundary just past end of last block too 2429 n++; 2430 for(; n-- > 0; p += size) { 2431 // Okay to use non-atomic ops here, because we control 2432 // the entire span, and each bitmap word has bits for only 2433 // one span, so no other goroutines are changing these 2434 // bitmap words. 2435 off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset 2436 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2437 shift = off % wordsPerBitmapWord; 2438 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 2439 } 2440 } 2441 2442 // unmark the span of memory at v of length n bytes. 2443 void 2444 runtime·unmarkspan(void *v, uintptr n) 2445 { 2446 uintptr *p, *b, off; 2447 2448 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) 2449 runtime·throw("markspan: bad pointer"); 2450 2451 p = v; 2452 off = p - (uintptr*)runtime·mheap.arena_start; // word offset 2453 if(off % wordsPerBitmapWord != 0) 2454 runtime·throw("markspan: unaligned pointer"); 2455 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2456 n /= PtrSize; 2457 if(n%wordsPerBitmapWord != 0) 2458 runtime·throw("unmarkspan: unaligned length"); 2459 // Okay to use non-atomic ops here, because we control 2460 // the entire span, and each bitmap word has bits for only 2461 // one span, so no other goroutines are changing these 2462 // bitmap words. 2463 n /= wordsPerBitmapWord; 2464 while(n-- > 0) 2465 *b-- = 0; 2466 } 2467 2468 bool 2469 runtime·blockspecial(void *v) 2470 { 2471 uintptr *b, off, shift; 2472 2473 if(DebugMark) 2474 return true; 2475 2476 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; 2477 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2478 shift = off % wordsPerBitmapWord; 2479 2480 return (*b & (bitSpecial<<shift)) != 0; 2481 } 2482 2483 void 2484 runtime·setblockspecial(void *v, bool s) 2485 { 2486 uintptr *b, off, shift, bits, obits; 2487 2488 if(DebugMark) 2489 return; 2490 2491 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; 2492 b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; 2493 shift = off % wordsPerBitmapWord; 2494 2495 for(;;) { 2496 obits = *b; 2497 if(s) 2498 bits = obits | (bitSpecial<<shift); 2499 else 2500 bits = obits & ~(bitSpecial<<shift); 2501 if(runtime·gomaxprocs == 1) { 2502 *b = bits; 2503 break; 2504 } else { 2505 // more than one goroutine is potentially running: use atomic op 2506 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2507 break; 2508 } 2509 } 2510 } 2511 2512 void 2513 runtime·MHeap_MapBits(MHeap *h) 2514 { 2515 // Caller has added extra mappings to the arena. 2516 // Add extra mappings of bitmap words as needed. 2517 // We allocate extra bitmap pieces in chunks of bitmapChunk. 2518 enum { 2519 bitmapChunk = 8192 2520 }; 2521 uintptr n; 2522 2523 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; 2524 n = ROUND(n, bitmapChunk); 2525 if(h->bitmap_mapped >= n) 2526 return; 2527 2528 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, &mstats.gc_sys); 2529 h->bitmap_mapped = n; 2530 }