github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/mgc0.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector. 6 7 #include "runtime.h" 8 #include "arch_GOARCH.h" 9 #include "malloc.h" 10 #include "stack.h" 11 #include "mgc0.h" 12 #include "race.h" 13 #include "type.h" 14 #include "typekind.h" 15 #include "hashmap.h" 16 17 enum { 18 Debug = 0, 19 DebugMark = 0, // run second pass to check mark 20 CollectStats = 0, 21 ScanStackByFrames = 0, 22 IgnorePreciseGC = 0, 23 24 // Four bits per word (see #defines below). 25 wordsPerBitmapWord = sizeof(void*)*8/4, 26 bitShift = sizeof(void*)*8/4, 27 28 handoffThreshold = 4, 29 IntermediateBufferCapacity = 64, 30 31 // Bits in type information 32 PRECISE = 1, 33 LOOP = 2, 34 PC_BITS = PRECISE | LOOP, 35 }; 36 37 // Bits in per-word bitmap. 38 // #defines because enum might not be able to hold the values. 39 // 40 // Each word in the bitmap describes wordsPerBitmapWord words 41 // of heap memory. There are 4 bitmap bits dedicated to each heap word, 42 // so on a 64-bit system there is one bitmap word per 16 heap words. 43 // The bits in the word are packed together by type first, then by 44 // heap location, so each 64-bit bitmap word consists of, from top to bottom, 45 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, 46 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits. 47 // This layout makes it easier to iterate over the bits of a given type. 48 // 49 // The bitmap starts at mheap.arena_start and extends *backward* from 50 // there. On a 64-bit system the off'th word in the arena is tracked by 51 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system, 52 // the only difference is that the divisor is 8.) 53 // 54 // To pull out the bits corresponding to a given pointer p, we use: 55 // 56 // off = p - (uintptr*)mheap.arena_start; // word offset 57 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1; 58 // shift = off % wordsPerBitmapWord 59 // bits = *b >> shift; 60 // /* then test bits & bitAllocated, bits & bitMarked, etc. */ 61 // 62 #define bitAllocated ((uintptr)1<<(bitShift*0)) 63 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ 64 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ 65 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ 66 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ 67 68 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) 69 70 // Holding worldsema grants an M the right to try to stop the world. 71 // The procedure is: 72 // 73 // runtime·semacquire(&runtime·worldsema); 74 // m->gcing = 1; 75 // runtime·stoptheworld(); 76 // 77 // ... do stuff ... 78 // 79 // m->gcing = 0; 80 // runtime·semrelease(&runtime·worldsema); 81 // runtime·starttheworld(); 82 // 83 uint32 runtime·worldsema = 1; 84 85 static int32 gctrace; 86 87 typedef struct Obj Obj; 88 struct Obj 89 { 90 byte *p; // data pointer 91 uintptr n; // size of data in bytes 92 uintptr ti; // type info 93 }; 94 95 // The size of Workbuf is N*PageSize. 96 typedef struct Workbuf Workbuf; 97 struct Workbuf 98 { 99 #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr)) 100 LFNode node; // must be first 101 uintptr nobj; 102 Obj obj[SIZE/sizeof(Obj) - 1]; 103 uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)]; 104 #undef SIZE 105 }; 106 107 typedef struct Finalizer Finalizer; 108 struct Finalizer 109 { 110 FuncVal *fn; 111 void *arg; 112 uintptr nret; 113 }; 114 115 typedef struct FinBlock FinBlock; 116 struct FinBlock 117 { 118 FinBlock *alllink; 119 FinBlock *next; 120 int32 cnt; 121 int32 cap; 122 Finalizer fin[1]; 123 }; 124 125 extern byte data[]; 126 extern byte edata[]; 127 extern byte bss[]; 128 extern byte ebss[]; 129 130 extern byte gcdata[]; 131 extern byte gcbss[]; 132 133 static G *fing; 134 static FinBlock *finq; // list of finalizers that are to be executed 135 static FinBlock *finc; // cache of free blocks 136 static FinBlock *allfin; // list of all blocks 137 static Lock finlock; 138 static int32 fingwait; 139 140 static void runfinq(void); 141 static Workbuf* getempty(Workbuf*); 142 static Workbuf* getfull(Workbuf*); 143 static void putempty(Workbuf*); 144 static Workbuf* handoff(Workbuf*); 145 static void gchelperstart(void); 146 147 static struct { 148 uint64 full; // lock-free list of full blocks 149 uint64 empty; // lock-free list of empty blocks 150 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait 151 uint32 nproc; 152 volatile uint32 nwait; 153 volatile uint32 ndone; 154 volatile uint32 debugmarkdone; 155 Note alldone; 156 ParFor *markfor; 157 ParFor *sweepfor; 158 159 Lock; 160 byte *chunk; 161 uintptr nchunk; 162 163 Obj *roots; 164 uint32 nroot; 165 uint32 rootcap; 166 } work; 167 168 enum { 169 GC_DEFAULT_PTR = GC_NUM_INSTR, 170 GC_MAP_NEXT, 171 GC_CHAN, 172 173 GC_NUM_INSTR2 174 }; 175 176 static struct { 177 struct { 178 uint64 sum; 179 uint64 cnt; 180 } ptr; 181 uint64 nbytes; 182 struct { 183 uint64 sum; 184 uint64 cnt; 185 uint64 notype; 186 uint64 typelookup; 187 } obj; 188 uint64 rescan; 189 uint64 rescanbytes; 190 uint64 instr[GC_NUM_INSTR2]; 191 uint64 putempty; 192 uint64 getfull; 193 } gcstats; 194 195 // markonly marks an object. It returns true if the object 196 // has been marked by this function, false otherwise. 197 // This function doesn't append the object to any buffer. 198 static bool 199 markonly(void *obj) 200 { 201 byte *p; 202 uintptr *bitp, bits, shift, x, xbits, off; 203 MSpan *s; 204 PageID k; 205 206 // Words outside the arena cannot be pointers. 207 if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used) 208 return false; 209 210 // obj may be a pointer to a live object. 211 // Try to find the beginning of the object. 212 213 // Round down to word boundary. 214 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 215 216 // Find bits for this word. 217 off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start; 218 bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 219 shift = off % wordsPerBitmapWord; 220 xbits = *bitp; 221 bits = xbits >> shift; 222 223 // Pointing at the beginning of a block? 224 if((bits & (bitAllocated|bitBlockBoundary)) != 0) 225 goto found; 226 227 // Otherwise consult span table to find beginning. 228 // (Manually inlined copy of MHeap_LookupMaybe.) 229 k = (uintptr)obj>>PageShift; 230 x = k; 231 if(sizeof(void*) == 8) 232 x -= (uintptr)runtime·mheap->arena_start>>PageShift; 233 s = runtime·mheap->map[x]; 234 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) 235 return false; 236 p = (byte*)((uintptr)s->start<<PageShift); 237 if(s->sizeclass == 0) { 238 obj = p; 239 } else { 240 if((byte*)obj >= (byte*)s->limit) 241 return false; 242 uintptr size = s->elemsize; 243 int32 i = ((byte*)obj - p)/size; 244 obj = p+i*size; 245 } 246 247 // Now that we know the object header, reload bits. 248 off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start; 249 bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 250 shift = off % wordsPerBitmapWord; 251 xbits = *bitp; 252 bits = xbits >> shift; 253 254 found: 255 // Now we have bits, bitp, and shift correct for 256 // obj pointing at the base of the object. 257 // Only care about allocated and not marked. 258 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 259 return false; 260 if(work.nproc == 1) 261 *bitp |= bitMarked<<shift; 262 else { 263 for(;;) { 264 x = *bitp; 265 if(x & (bitMarked<<shift)) 266 return false; 267 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 268 break; 269 } 270 } 271 272 // The object is now marked 273 return true; 274 } 275 276 // PtrTarget is a structure used by intermediate buffers. 277 // The intermediate buffers hold GC data before it 278 // is moved/flushed to the work buffer (Workbuf). 279 // The size of an intermediate buffer is very small, 280 // such as 32 or 64 elements. 281 typedef struct PtrTarget PtrTarget; 282 struct PtrTarget 283 { 284 void *p; 285 uintptr ti; 286 }; 287 288 typedef struct BufferList BufferList; 289 struct BufferList 290 { 291 PtrTarget ptrtarget[IntermediateBufferCapacity]; 292 Obj obj[IntermediateBufferCapacity]; 293 uint32 busy; 294 byte pad[CacheLineSize]; 295 }; 296 #pragma dataflag 16 // no pointers 297 static BufferList bufferList[MaxGcproc]; 298 299 static Type *itabtype; 300 301 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); 302 303 // flushptrbuf moves data from the PtrTarget buffer to the work buffer. 304 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned, 305 // while the work buffer contains blocks which have been marked 306 // and are prepared to be scanned by the garbage collector. 307 // 308 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer. 309 // 310 // A simplified drawing explaining how the todo-list moves from a structure to another: 311 // 312 // scanblock 313 // (find pointers) 314 // Obj ------> PtrTarget (pointer targets) 315 // ↑ | 316 // | | 317 // `----------' 318 // flushptrbuf 319 // (find block start, mark and enqueue) 320 static void 321 flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) 322 { 323 byte *p, *arena_start, *obj; 324 uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n; 325 MSpan *s; 326 PageID k; 327 Obj *wp; 328 Workbuf *wbuf; 329 PtrTarget *ptrbuf_end; 330 331 arena_start = runtime·mheap->arena_start; 332 333 wp = *_wp; 334 wbuf = *_wbuf; 335 nobj = *_nobj; 336 337 ptrbuf_end = *ptrbufpos; 338 n = ptrbuf_end - ptrbuf; 339 *ptrbufpos = ptrbuf; 340 341 if(CollectStats) { 342 runtime·xadd64(&gcstats.ptr.sum, n); 343 runtime·xadd64(&gcstats.ptr.cnt, 1); 344 } 345 346 // If buffer is nearly full, get a new one. 347 if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) { 348 if(wbuf != nil) 349 wbuf->nobj = nobj; 350 wbuf = getempty(wbuf); 351 wp = wbuf->obj; 352 nobj = 0; 353 354 if(n >= nelem(wbuf->obj)) 355 runtime·throw("ptrbuf has to be smaller than WorkBuf"); 356 } 357 358 // TODO(atom): This block is a branch of an if-then-else statement. 359 // The single-threaded branch may be added in a next CL. 360 { 361 // Multi-threaded version. 362 363 while(ptrbuf < ptrbuf_end) { 364 obj = ptrbuf->p; 365 ti = ptrbuf->ti; 366 ptrbuf++; 367 368 // obj belongs to interval [mheap.arena_start, mheap.arena_used). 369 if(Debug > 1) { 370 if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used) 371 runtime·throw("object is outside of mheap"); 372 } 373 374 // obj may be a pointer to a live object. 375 // Try to find the beginning of the object. 376 377 // Round down to word boundary. 378 if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) { 379 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 380 ti = 0; 381 } 382 383 // Find bits for this word. 384 off = (uintptr*)obj - (uintptr*)arena_start; 385 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 386 shift = off % wordsPerBitmapWord; 387 xbits = *bitp; 388 bits = xbits >> shift; 389 390 // Pointing at the beginning of a block? 391 if((bits & (bitAllocated|bitBlockBoundary)) != 0) 392 goto found; 393 394 ti = 0; 395 396 // Pointing just past the beginning? 397 // Scan backward a little to find a block boundary. 398 for(j=shift; j-->0; ) { 399 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 400 obj = (byte*)obj - (shift-j)*PtrSize; 401 shift = j; 402 bits = xbits>>shift; 403 goto found; 404 } 405 } 406 407 // Otherwise consult span table to find beginning. 408 // (Manually inlined copy of MHeap_LookupMaybe.) 409 k = (uintptr)obj>>PageShift; 410 x = k; 411 if(sizeof(void*) == 8) 412 x -= (uintptr)arena_start>>PageShift; 413 s = runtime·mheap->map[x]; 414 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) 415 continue; 416 p = (byte*)((uintptr)s->start<<PageShift); 417 if(s->sizeclass == 0) { 418 obj = p; 419 } else { 420 if((byte*)obj >= (byte*)s->limit) 421 continue; 422 size = s->elemsize; 423 int32 i = ((byte*)obj - p)/size; 424 obj = p+i*size; 425 } 426 427 // Now that we know the object header, reload bits. 428 off = (uintptr*)obj - (uintptr*)arena_start; 429 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 430 shift = off % wordsPerBitmapWord; 431 xbits = *bitp; 432 bits = xbits >> shift; 433 434 found: 435 // Now we have bits, bitp, and shift correct for 436 // obj pointing at the base of the object. 437 // Only care about allocated and not marked. 438 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 439 continue; 440 if(work.nproc == 1) 441 *bitp |= bitMarked<<shift; 442 else { 443 for(;;) { 444 x = *bitp; 445 if(x & (bitMarked<<shift)) 446 goto continue_obj; 447 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 448 break; 449 } 450 } 451 452 // If object has no pointers, don't need to scan further. 453 if((bits & bitNoPointers) != 0) 454 continue; 455 456 // Ask span about size class. 457 // (Manually inlined copy of MHeap_Lookup.) 458 x = (uintptr)obj >> PageShift; 459 if(sizeof(void*) == 8) 460 x -= (uintptr)arena_start>>PageShift; 461 s = runtime·mheap->map[x]; 462 463 PREFETCH(obj); 464 465 *wp = (Obj){obj, s->elemsize, ti}; 466 wp++; 467 nobj++; 468 continue_obj:; 469 } 470 471 // If another proc wants a pointer, give it some. 472 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 473 wbuf->nobj = nobj; 474 wbuf = handoff(wbuf); 475 nobj = wbuf->nobj; 476 wp = wbuf->obj + nobj; 477 } 478 } 479 480 *_wp = wp; 481 *_wbuf = wbuf; 482 *_nobj = nobj; 483 } 484 485 static void 486 flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) 487 { 488 uintptr nobj, off; 489 Obj *wp, obj; 490 Workbuf *wbuf; 491 Obj *objbuf_end; 492 493 wp = *_wp; 494 wbuf = *_wbuf; 495 nobj = *_nobj; 496 497 objbuf_end = *objbufpos; 498 *objbufpos = objbuf; 499 500 while(objbuf < objbuf_end) { 501 obj = *objbuf++; 502 503 // Align obj.b to a word boundary. 504 off = (uintptr)obj.p & (PtrSize-1); 505 if(off != 0) { 506 obj.p += PtrSize - off; 507 obj.n -= PtrSize - off; 508 obj.ti = 0; 509 } 510 511 if(obj.p == nil || obj.n == 0) 512 continue; 513 514 // If buffer is full, get a new one. 515 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 516 if(wbuf != nil) 517 wbuf->nobj = nobj; 518 wbuf = getempty(wbuf); 519 wp = wbuf->obj; 520 nobj = 0; 521 } 522 523 *wp = obj; 524 wp++; 525 nobj++; 526 } 527 528 // If another proc wants a pointer, give it some. 529 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 530 wbuf->nobj = nobj; 531 wbuf = handoff(wbuf); 532 nobj = wbuf->nobj; 533 wp = wbuf->obj + nobj; 534 } 535 536 *_wp = wp; 537 *_wbuf = wbuf; 538 *_nobj = nobj; 539 } 540 541 // Program that scans the whole block and treats every block element as a potential pointer 542 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; 543 544 // Hashmap iterator program 545 static uintptr mapProg[2] = {0, GC_MAP_NEXT}; 546 547 // Hchan program 548 static uintptr chanProg[2] = {0, GC_CHAN}; 549 550 // Local variables of a program fragment or loop 551 typedef struct Frame Frame; 552 struct Frame { 553 uintptr count, elemsize, b; 554 uintptr *loop_or_ret; 555 }; 556 557 // Sanity check for the derived type info objti. 558 static void 559 checkptr(void *obj, uintptr objti) 560 { 561 uintptr *pc1, *pc2, type, tisize, i, j, x; 562 byte *objstart; 563 Type *t; 564 MSpan *s; 565 566 if(!Debug) 567 runtime·throw("checkptr is debug only"); 568 569 if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used) 570 return; 571 type = runtime·gettype(obj); 572 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 573 if(t == nil) 574 return; 575 x = (uintptr)obj >> PageShift; 576 if(sizeof(void*) == 8) 577 x -= (uintptr)(runtime·mheap->arena_start)>>PageShift; 578 s = runtime·mheap->map[x]; 579 objstart = (byte*)((uintptr)s->start<<PageShift); 580 if(s->sizeclass != 0) { 581 i = ((byte*)obj - objstart)/s->elemsize; 582 objstart += i*s->elemsize; 583 } 584 tisize = *(uintptr*)objti; 585 // Sanity check for object size: it should fit into the memory block. 586 if((byte*)obj + tisize > objstart + s->elemsize) 587 runtime·throw("invalid gc type info"); 588 if(obj != objstart) 589 return; 590 // If obj points to the beginning of the memory block, 591 // check type info as well. 592 if(t->string == nil || 593 // Gob allocates unsafe pointers for indirection. 594 (runtime·strcmp(t->string->str, (byte*)"unsafe.Pointer") && 595 // Runtime and gc think differently about closures. 596 runtime·strstr(t->string->str, (byte*)"struct { F uintptr") != t->string->str)) { 597 pc1 = (uintptr*)objti; 598 pc2 = (uintptr*)t->gc; 599 // A simple best-effort check until first GC_END. 600 for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) { 601 if(pc1[j] != pc2[j]) { 602 runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n", 603 t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]); 604 runtime·throw("invalid gc type info"); 605 } 606 } 607 } 608 } 609 610 // scanblock scans a block of n bytes starting at pointer b for references 611 // to other objects, scanning any it finds recursively until there are no 612 // unscanned objects left. Instead of using an explicit recursion, it keeps 613 // a work list in the Workbuf* structures and loops in the main function 614 // body. Keeping an explicit work list is easier on the stack allocator and 615 // more efficient. 616 // 617 // wbuf: current work buffer 618 // wp: storage for next queued pointer (write pointer) 619 // nobj: number of queued objects 620 static void 621 scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) 622 { 623 byte *b, *arena_start, *arena_used; 624 uintptr n, i, end_b, elemsize, size, ti, objti, count, type; 625 uintptr *pc, precise_type, nominal_size; 626 uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret; 627 void *obj; 628 Type *t; 629 Slice *sliceptr; 630 Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4]; 631 BufferList *scanbuffers; 632 PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos; 633 Obj *objbuf, *objbuf_end, *objbufpos; 634 Eface *eface; 635 Iface *iface; 636 Hmap *hmap; 637 MapType *maptype; 638 bool mapkey_kind, mapval_kind; 639 struct hash_gciter map_iter; 640 struct hash_gciter_data d; 641 Hchan *chan; 642 ChanType *chantype; 643 644 if(sizeof(Workbuf) % PageSize != 0) 645 runtime·throw("scanblock: size of Workbuf is suboptimal"); 646 647 // Memory arena parameters. 648 arena_start = runtime·mheap->arena_start; 649 arena_used = runtime·mheap->arena_used; 650 651 stack_ptr = stack+nelem(stack)-1; 652 653 precise_type = false; 654 nominal_size = 0; 655 656 // Allocate ptrbuf 657 { 658 scanbuffers = &bufferList[m->helpgc]; 659 ptrbuf = &scanbuffers->ptrtarget[0]; 660 ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget); 661 objbuf = &scanbuffers->obj[0]; 662 objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj); 663 } 664 665 ptrbufpos = ptrbuf; 666 objbufpos = objbuf; 667 668 // (Silence the compiler) 669 map_ret = nil; 670 mapkey_size = mapval_size = 0; 671 mapkey_kind = mapval_kind = false; 672 mapkey_ti = mapval_ti = 0; 673 chan = nil; 674 chantype = nil; 675 chan_ret = nil; 676 677 goto next_block; 678 679 for(;;) { 680 // Each iteration scans the block b of length n, queueing pointers in 681 // the work buffer. 682 if(Debug > 1) { 683 runtime·printf("scanblock %p %D\n", b, (int64)n); 684 } 685 686 if(CollectStats) { 687 runtime·xadd64(&gcstats.nbytes, n); 688 runtime·xadd64(&gcstats.obj.sum, nobj); 689 runtime·xadd64(&gcstats.obj.cnt, 1); 690 } 691 692 if(ti != 0) { 693 pc = (uintptr*)(ti & ~(uintptr)PC_BITS); 694 precise_type = (ti & PRECISE); 695 stack_top.elemsize = pc[0]; 696 if(!precise_type) 697 nominal_size = pc[0]; 698 if(ti & LOOP) { 699 stack_top.count = 0; // 0 means an infinite number of iterations 700 stack_top.loop_or_ret = pc+1; 701 } else { 702 stack_top.count = 1; 703 } 704 if(Debug) { 705 // Simple sanity check for provided type info ti: 706 // The declared size of the object must be not larger than the actual size 707 // (it can be smaller due to inferior pointers). 708 // It's difficult to make a comprehensive check due to inferior pointers, 709 // reflection, gob, etc. 710 if(pc[0] > n) { 711 runtime·printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n); 712 runtime·throw("invalid gc type info"); 713 } 714 } 715 } else if(UseSpanType) { 716 if(CollectStats) 717 runtime·xadd64(&gcstats.obj.notype, 1); 718 719 type = runtime·gettype(b); 720 if(type != 0) { 721 if(CollectStats) 722 runtime·xadd64(&gcstats.obj.typelookup, 1); 723 724 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 725 switch(type & (PtrSize-1)) { 726 case TypeInfo_SingleObject: 727 pc = (uintptr*)t->gc; 728 precise_type = true; // type information about 'b' is precise 729 stack_top.count = 1; 730 stack_top.elemsize = pc[0]; 731 break; 732 case TypeInfo_Array: 733 pc = (uintptr*)t->gc; 734 if(pc[0] == 0) 735 goto next_block; 736 precise_type = true; // type information about 'b' is precise 737 stack_top.count = 0; // 0 means an infinite number of iterations 738 stack_top.elemsize = pc[0]; 739 stack_top.loop_or_ret = pc+1; 740 break; 741 case TypeInfo_Map: 742 hmap = (Hmap*)b; 743 maptype = (MapType*)t; 744 if(hash_gciter_init(hmap, &map_iter)) { 745 mapkey_size = maptype->key->size; 746 mapkey_kind = maptype->key->kind; 747 mapkey_ti = (uintptr)maptype->key->gc | PRECISE; 748 mapval_size = maptype->elem->size; 749 mapval_kind = maptype->elem->kind; 750 mapval_ti = (uintptr)maptype->elem->gc | PRECISE; 751 752 map_ret = nil; 753 pc = mapProg; 754 } else { 755 goto next_block; 756 } 757 break; 758 case TypeInfo_Chan: 759 chan = (Hchan*)b; 760 chantype = (ChanType*)t; 761 chan_ret = nil; 762 pc = chanProg; 763 break; 764 default: 765 runtime·throw("scanblock: invalid type"); 766 return; 767 } 768 } else { 769 pc = defaultProg; 770 } 771 } else { 772 pc = defaultProg; 773 } 774 775 if(IgnorePreciseGC) 776 pc = defaultProg; 777 778 pc++; 779 stack_top.b = (uintptr)b; 780 781 end_b = (uintptr)b + n - PtrSize; 782 783 for(;;) { 784 if(CollectStats) 785 runtime·xadd64(&gcstats.instr[pc[0]], 1); 786 787 obj = nil; 788 objti = 0; 789 switch(pc[0]) { 790 case GC_PTR: 791 obj = *(void**)(stack_top.b + pc[1]); 792 objti = pc[2]; 793 pc += 3; 794 if(Debug) 795 checkptr(obj, objti); 796 break; 797 798 case GC_SLICE: 799 sliceptr = (Slice*)(stack_top.b + pc[1]); 800 if(sliceptr->cap != 0) { 801 obj = sliceptr->array; 802 objti = pc[2] | PRECISE | LOOP; 803 } 804 pc += 3; 805 break; 806 807 case GC_APTR: 808 obj = *(void**)(stack_top.b + pc[1]); 809 pc += 2; 810 break; 811 812 case GC_STRING: 813 obj = *(void**)(stack_top.b + pc[1]); 814 markonly(obj); 815 pc += 2; 816 continue; 817 818 case GC_EFACE: 819 eface = (Eface*)(stack_top.b + pc[1]); 820 pc += 2; 821 if(eface->type == nil) 822 continue; 823 824 // eface->type 825 t = eface->type; 826 if((void*)t >= arena_start && (void*)t < arena_used) { 827 *ptrbufpos++ = (PtrTarget){t, 0}; 828 if(ptrbufpos == ptrbuf_end) 829 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 830 } 831 832 // eface->data 833 if(eface->data >= arena_start && eface->data < arena_used) { 834 if(t->size <= sizeof(void*)) { 835 if((t->kind & KindNoPointers)) 836 continue; 837 838 obj = eface->data; 839 if((t->kind & ~KindNoPointers) == KindPtr) 840 objti = (uintptr)((PtrType*)t)->elem->gc; 841 } else { 842 obj = eface->data; 843 objti = (uintptr)t->gc; 844 } 845 } 846 break; 847 848 case GC_IFACE: 849 iface = (Iface*)(stack_top.b + pc[1]); 850 pc += 2; 851 if(iface->tab == nil) 852 continue; 853 854 // iface->tab 855 if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) { 856 *ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc}; 857 if(ptrbufpos == ptrbuf_end) 858 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 859 } 860 861 // iface->data 862 if(iface->data >= arena_start && iface->data < arena_used) { 863 t = iface->tab->type; 864 if(t->size <= sizeof(void*)) { 865 if((t->kind & KindNoPointers)) 866 continue; 867 868 obj = iface->data; 869 if((t->kind & ~KindNoPointers) == KindPtr) 870 objti = (uintptr)((PtrType*)t)->elem->gc; 871 } else { 872 obj = iface->data; 873 objti = (uintptr)t->gc; 874 } 875 } 876 break; 877 878 case GC_DEFAULT_PTR: 879 while(stack_top.b <= end_b) { 880 obj = *(byte**)stack_top.b; 881 stack_top.b += PtrSize; 882 if(obj >= arena_start && obj < arena_used) { 883 *ptrbufpos++ = (PtrTarget){obj, 0}; 884 if(ptrbufpos == ptrbuf_end) 885 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 886 } 887 } 888 goto next_block; 889 890 case GC_END: 891 if(--stack_top.count != 0) { 892 // Next iteration of a loop if possible. 893 stack_top.b += stack_top.elemsize; 894 if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) { 895 pc = stack_top.loop_or_ret; 896 continue; 897 } 898 i = stack_top.b; 899 } else { 900 // Stack pop if possible. 901 if(stack_ptr+1 < stack+nelem(stack)) { 902 pc = stack_top.loop_or_ret; 903 stack_top = *(++stack_ptr); 904 continue; 905 } 906 i = (uintptr)b + nominal_size; 907 } 908 if(!precise_type) { 909 // Quickly scan [b+i,b+n) for possible pointers. 910 for(; i<=end_b; i+=PtrSize) { 911 if(*(byte**)i != nil) { 912 // Found a value that may be a pointer. 913 // Do a rescan of the entire block. 914 enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj); 915 if(CollectStats) { 916 runtime·xadd64(&gcstats.rescan, 1); 917 runtime·xadd64(&gcstats.rescanbytes, n); 918 } 919 break; 920 } 921 } 922 } 923 goto next_block; 924 925 case GC_ARRAY_START: 926 i = stack_top.b + pc[1]; 927 count = pc[2]; 928 elemsize = pc[3]; 929 pc += 4; 930 931 // Stack push. 932 *stack_ptr-- = stack_top; 933 stack_top = (Frame){count, elemsize, i, pc}; 934 continue; 935 936 case GC_ARRAY_NEXT: 937 if(--stack_top.count != 0) { 938 stack_top.b += stack_top.elemsize; 939 pc = stack_top.loop_or_ret; 940 } else { 941 // Stack pop. 942 stack_top = *(++stack_ptr); 943 pc += 1; 944 } 945 continue; 946 947 case GC_CALL: 948 // Stack push. 949 *stack_ptr-- = stack_top; 950 stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/}; 951 pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction 952 continue; 953 954 case GC_MAP_PTR: 955 hmap = *(Hmap**)(stack_top.b + pc[1]); 956 if(hmap == nil) { 957 pc += 3; 958 continue; 959 } 960 if(markonly(hmap)) { 961 maptype = (MapType*)pc[2]; 962 if(hash_gciter_init(hmap, &map_iter)) { 963 mapkey_size = maptype->key->size; 964 mapkey_kind = maptype->key->kind; 965 mapkey_ti = (uintptr)maptype->key->gc | PRECISE; 966 mapval_size = maptype->elem->size; 967 mapval_kind = maptype->elem->kind; 968 mapval_ti = (uintptr)maptype->elem->gc | PRECISE; 969 970 // Start mapProg. 971 map_ret = pc+3; 972 pc = mapProg+1; 973 } else { 974 pc += 3; 975 } 976 } else { 977 pc += 3; 978 } 979 continue; 980 981 case GC_MAP_NEXT: 982 // Add all keys and values to buffers, mark all subtables. 983 while(hash_gciter_next(&map_iter, &d)) { 984 // buffers: reserve space for 2 objects. 985 if(ptrbufpos+2 >= ptrbuf_end) 986 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 987 if(objbufpos+2 >= objbuf_end) 988 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 989 990 if(d.st != nil) 991 markonly(d.st); 992 993 if(d.key_data != nil) { 994 if(!(mapkey_kind & KindNoPointers) || d.indirectkey) { 995 if(!d.indirectkey) 996 *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti}; 997 else { 998 if(Debug) { 999 obj = *(void**)d.key_data; 1000 if(!(arena_start <= obj && obj < arena_used)) 1001 runtime·throw("scanblock: inconsistent hashmap"); 1002 } 1003 *ptrbufpos++ = (PtrTarget){*(void**)d.key_data, mapkey_ti}; 1004 } 1005 } 1006 if(!(mapval_kind & KindNoPointers) || d.indirectval) { 1007 if(!d.indirectval) 1008 *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti}; 1009 else { 1010 if(Debug) { 1011 obj = *(void**)d.val_data; 1012 if(!(arena_start <= obj && obj < arena_used)) 1013 runtime·throw("scanblock: inconsistent hashmap"); 1014 } 1015 *ptrbufpos++ = (PtrTarget){*(void**)d.val_data, mapval_ti}; 1016 } 1017 } 1018 } 1019 } 1020 if(map_ret == nil) 1021 goto next_block; 1022 pc = map_ret; 1023 continue; 1024 1025 case GC_REGION: 1026 obj = (void*)(stack_top.b + pc[1]); 1027 size = pc[2]; 1028 objti = pc[3]; 1029 pc += 4; 1030 1031 *objbufpos++ = (Obj){obj, size, objti}; 1032 if(objbufpos == objbuf_end) 1033 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1034 continue; 1035 1036 case GC_CHAN_PTR: 1037 // Similar to GC_MAP_PTR 1038 chan = *(Hchan**)(stack_top.b + pc[1]); 1039 if(chan == nil) { 1040 pc += 3; 1041 continue; 1042 } 1043 if(markonly(chan)) { 1044 chantype = (ChanType*)pc[2]; 1045 if(!(chantype->elem->kind & KindNoPointers)) { 1046 // Start chanProg. 1047 chan_ret = pc+3; 1048 pc = chanProg+1; 1049 continue; 1050 } 1051 } 1052 pc += 3; 1053 continue; 1054 1055 case GC_CHAN: 1056 // There are no heap pointers in struct Hchan, 1057 // so we can ignore the leading sizeof(Hchan) bytes. 1058 if(!(chantype->elem->kind & KindNoPointers)) { 1059 // Channel's buffer follows Hchan immediately in memory. 1060 // Size of buffer (cap(c)) is second int in the chan struct. 1061 n = ((uintgo*)chan)[1]; 1062 if(n > 0) { 1063 // TODO(atom): split into two chunks so that only the 1064 // in-use part of the circular buffer is scanned. 1065 // (Channel routines zero the unused part, so the current 1066 // code does not lead to leaks, it's just a little inefficient.) 1067 *objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, n*chantype->elem->size, 1068 (uintptr)chantype->elem->gc | PRECISE | LOOP}; 1069 if(objbufpos == objbuf_end) 1070 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1071 } 1072 } 1073 if(chan_ret == nil) 1074 goto next_block; 1075 pc = chan_ret; 1076 continue; 1077 1078 default: 1079 runtime·throw("scanblock: invalid GC instruction"); 1080 return; 1081 } 1082 1083 if(obj >= arena_start && obj < arena_used) { 1084 *ptrbufpos++ = (PtrTarget){obj, objti}; 1085 if(ptrbufpos == ptrbuf_end) 1086 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 1087 } 1088 } 1089 1090 next_block: 1091 // Done scanning [b, b+n). Prepare for the next iteration of 1092 // the loop by setting b, n, ti to the parameters for the next block. 1093 1094 if(nobj == 0) { 1095 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); 1096 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); 1097 1098 if(nobj == 0) { 1099 if(!keepworking) { 1100 if(wbuf) 1101 putempty(wbuf); 1102 goto endscan; 1103 } 1104 // Emptied our buffer: refill. 1105 wbuf = getfull(wbuf); 1106 if(wbuf == nil) 1107 goto endscan; 1108 nobj = wbuf->nobj; 1109 wp = wbuf->obj + wbuf->nobj; 1110 } 1111 } 1112 1113 // Fetch b from the work buffer. 1114 --wp; 1115 b = wp->p; 1116 n = wp->n; 1117 ti = wp->ti; 1118 nobj--; 1119 } 1120 1121 endscan:; 1122 } 1123 1124 // debug_scanblock is the debug copy of scanblock. 1125 // it is simpler, slower, single-threaded, recursive, 1126 // and uses bitSpecial as the mark bit. 1127 static void 1128 debug_scanblock(byte *b, uintptr n) 1129 { 1130 byte *obj, *p; 1131 void **vp; 1132 uintptr size, *bitp, bits, shift, i, xbits, off; 1133 MSpan *s; 1134 1135 if(!DebugMark) 1136 runtime·throw("debug_scanblock without DebugMark"); 1137 1138 if((intptr)n < 0) { 1139 runtime·printf("debug_scanblock %p %D\n", b, (int64)n); 1140 runtime·throw("debug_scanblock"); 1141 } 1142 1143 // Align b to a word boundary. 1144 off = (uintptr)b & (PtrSize-1); 1145 if(off != 0) { 1146 b += PtrSize - off; 1147 n -= PtrSize - off; 1148 } 1149 1150 vp = (void**)b; 1151 n /= PtrSize; 1152 for(i=0; i<n; i++) { 1153 obj = (byte*)vp[i]; 1154 1155 // Words outside the arena cannot be pointers. 1156 if((byte*)obj < runtime·mheap->arena_start || (byte*)obj >= runtime·mheap->arena_used) 1157 continue; 1158 1159 // Round down to word boundary. 1160 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 1161 1162 // Consult span table to find beginning. 1163 s = runtime·MHeap_LookupMaybe(runtime·mheap, obj); 1164 if(s == nil) 1165 continue; 1166 1167 p = (byte*)((uintptr)s->start<<PageShift); 1168 size = s->elemsize; 1169 if(s->sizeclass == 0) { 1170 obj = p; 1171 } else { 1172 if((byte*)obj >= (byte*)s->limit) 1173 continue; 1174 int32 i = ((byte*)obj - p)/size; 1175 obj = p+i*size; 1176 } 1177 1178 // Now that we know the object header, reload bits. 1179 off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start; 1180 bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 1181 shift = off % wordsPerBitmapWord; 1182 xbits = *bitp; 1183 bits = xbits >> shift; 1184 1185 // Now we have bits, bitp, and shift correct for 1186 // obj pointing at the base of the object. 1187 // If not allocated or already marked, done. 1188 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked 1189 continue; 1190 *bitp |= bitSpecial<<shift; 1191 if(!(bits & bitMarked)) 1192 runtime·printf("found unmarked block %p in %p\n", obj, vp+i); 1193 1194 // If object has no pointers, don't need to scan further. 1195 if((bits & bitNoPointers) != 0) 1196 continue; 1197 1198 debug_scanblock(obj, size); 1199 } 1200 } 1201 1202 // Append obj to the work buffer. 1203 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer. 1204 static void 1205 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj) 1206 { 1207 uintptr nobj, off; 1208 Obj *wp; 1209 Workbuf *wbuf; 1210 1211 if(Debug > 1) 1212 runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti); 1213 1214 // Align obj.b to a word boundary. 1215 off = (uintptr)obj.p & (PtrSize-1); 1216 if(off != 0) { 1217 obj.p += PtrSize - off; 1218 obj.n -= PtrSize - off; 1219 obj.ti = 0; 1220 } 1221 1222 if(obj.p == nil || obj.n == 0) 1223 return; 1224 1225 // Load work buffer state 1226 wp = *_wp; 1227 wbuf = *_wbuf; 1228 nobj = *_nobj; 1229 1230 // If another proc wants a pointer, give it some. 1231 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 1232 wbuf->nobj = nobj; 1233 wbuf = handoff(wbuf); 1234 nobj = wbuf->nobj; 1235 wp = wbuf->obj + nobj; 1236 } 1237 1238 // If buffer is full, get a new one. 1239 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 1240 if(wbuf != nil) 1241 wbuf->nobj = nobj; 1242 wbuf = getempty(wbuf); 1243 wp = wbuf->obj; 1244 nobj = 0; 1245 } 1246 1247 *wp = obj; 1248 wp++; 1249 nobj++; 1250 1251 // Save work buffer state 1252 *_wp = wp; 1253 *_wbuf = wbuf; 1254 *_nobj = nobj; 1255 } 1256 1257 static void 1258 markroot(ParFor *desc, uint32 i) 1259 { 1260 Obj *wp; 1261 Workbuf *wbuf; 1262 uintptr nobj; 1263 1264 USED(&desc); 1265 wp = nil; 1266 wbuf = nil; 1267 nobj = 0; 1268 enqueue(work.roots[i], &wbuf, &wp, &nobj); 1269 scanblock(wbuf, wp, nobj, false); 1270 } 1271 1272 // Get an empty work buffer off the work.empty list, 1273 // allocating new buffers as needed. 1274 static Workbuf* 1275 getempty(Workbuf *b) 1276 { 1277 if(b != nil) 1278 runtime·lfstackpush(&work.full, &b->node); 1279 b = (Workbuf*)runtime·lfstackpop(&work.empty); 1280 if(b == nil) { 1281 // Need to allocate. 1282 runtime·lock(&work); 1283 if(work.nchunk < sizeof *b) { 1284 work.nchunk = 1<<20; 1285 work.chunk = runtime·SysAlloc(work.nchunk); 1286 if(work.chunk == nil) 1287 runtime·throw("runtime: cannot allocate memory"); 1288 } 1289 b = (Workbuf*)work.chunk; 1290 work.chunk += sizeof *b; 1291 work.nchunk -= sizeof *b; 1292 runtime·unlock(&work); 1293 } 1294 b->nobj = 0; 1295 return b; 1296 } 1297 1298 static void 1299 putempty(Workbuf *b) 1300 { 1301 if(CollectStats) 1302 runtime·xadd64(&gcstats.putempty, 1); 1303 1304 runtime·lfstackpush(&work.empty, &b->node); 1305 } 1306 1307 // Get a full work buffer off the work.full list, or return nil. 1308 static Workbuf* 1309 getfull(Workbuf *b) 1310 { 1311 int32 i; 1312 1313 if(CollectStats) 1314 runtime·xadd64(&gcstats.getfull, 1); 1315 1316 if(b != nil) 1317 runtime·lfstackpush(&work.empty, &b->node); 1318 b = (Workbuf*)runtime·lfstackpop(&work.full); 1319 if(b != nil || work.nproc == 1) 1320 return b; 1321 1322 runtime·xadd(&work.nwait, +1); 1323 for(i=0;; i++) { 1324 if(work.full != 0) { 1325 runtime·xadd(&work.nwait, -1); 1326 b = (Workbuf*)runtime·lfstackpop(&work.full); 1327 if(b != nil) 1328 return b; 1329 runtime·xadd(&work.nwait, +1); 1330 } 1331 if(work.nwait == work.nproc) 1332 return nil; 1333 if(i < 10) { 1334 m->gcstats.nprocyield++; 1335 runtime·procyield(20); 1336 } else if(i < 20) { 1337 m->gcstats.nosyield++; 1338 runtime·osyield(); 1339 } else { 1340 m->gcstats.nsleep++; 1341 runtime·usleep(100); 1342 } 1343 } 1344 } 1345 1346 static Workbuf* 1347 handoff(Workbuf *b) 1348 { 1349 int32 n; 1350 Workbuf *b1; 1351 1352 // Make new buffer with half of b's pointers. 1353 b1 = getempty(nil); 1354 n = b->nobj/2; 1355 b->nobj -= n; 1356 b1->nobj = n; 1357 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); 1358 m->gcstats.nhandoff++; 1359 m->gcstats.nhandoffcnt += n; 1360 1361 // Put b on full list - let first half of b get stolen. 1362 runtime·lfstackpush(&work.full, &b->node); 1363 return b1; 1364 } 1365 1366 static void 1367 addroot(Obj obj) 1368 { 1369 uint32 cap; 1370 Obj *new; 1371 1372 if(work.nroot >= work.rootcap) { 1373 cap = PageSize/sizeof(Obj); 1374 if(cap < 2*work.rootcap) 1375 cap = 2*work.rootcap; 1376 new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj)); 1377 if(new == nil) 1378 runtime·throw("runtime: cannot allocate memory"); 1379 if(work.roots != nil) { 1380 runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj)); 1381 runtime·SysFree(work.roots, work.rootcap*sizeof(Obj)); 1382 } 1383 work.roots = new; 1384 work.rootcap = cap; 1385 } 1386 work.roots[work.nroot] = obj; 1387 work.nroot++; 1388 } 1389 1390 // Scan a stack frame. The doframe parameter is a signal that the previously 1391 // scanned activation has an unknown argument size. When *doframe is true the 1392 // current activation must have its entire frame scanned. Otherwise, only the 1393 // locals need to be scanned. 1394 static void 1395 addframeroots(Func *f, byte*, byte *sp, void *doframe) 1396 { 1397 uintptr outs; 1398 1399 if(thechar == '5') 1400 sp += sizeof(uintptr); 1401 if(f->locals == 0 || *(bool*)doframe == true) 1402 addroot((Obj){sp, f->frame - sizeof(uintptr), 0}); 1403 else if(f->locals > 0) { 1404 outs = f->frame - sizeof(uintptr) - f->locals; 1405 addroot((Obj){sp + outs, f->locals, 0}); 1406 } 1407 if(f->args > 0) 1408 addroot((Obj){sp + f->frame, f->args, 0}); 1409 *(bool*)doframe = (f->args == ArgsSizeUnknown); 1410 } 1411 1412 static void 1413 addstackroots(G *gp) 1414 { 1415 M *mp; 1416 int32 n; 1417 Stktop *stk; 1418 byte *sp, *guard, *pc; 1419 Func *f; 1420 bool doframe; 1421 1422 stk = (Stktop*)gp->stackbase; 1423 guard = (byte*)gp->stackguard; 1424 1425 if(gp == g) { 1426 // Scanning our own stack: start at &gp. 1427 sp = runtime·getcallersp(&gp); 1428 pc = runtime·getcallerpc(&gp); 1429 } else if((mp = gp->m) != nil && mp->helpgc) { 1430 // gchelper's stack is in active use and has no interesting pointers. 1431 return; 1432 } else if(gp->gcstack != (uintptr)nil) { 1433 // Scanning another goroutine that is about to enter or might 1434 // have just exited a system call. It may be executing code such 1435 // as schedlock and may have needed to start a new stack segment. 1436 // Use the stack segment and stack pointer at the time of 1437 // the system call instead, since that won't change underfoot. 1438 sp = (byte*)gp->gcsp; 1439 pc = gp->gcpc; 1440 stk = (Stktop*)gp->gcstack; 1441 guard = (byte*)gp->gcguard; 1442 } else { 1443 // Scanning another goroutine's stack. 1444 // The goroutine is usually asleep (the world is stopped). 1445 sp = (byte*)gp->sched.sp; 1446 pc = gp->sched.pc; 1447 if(ScanStackByFrames && pc == (byte*)runtime·goexit && gp->fnstart != nil) { 1448 // The goroutine has not started. However, its incoming 1449 // arguments are live at the top of the stack and must 1450 // be scanned. No other live values should be on the 1451 // stack. 1452 f = runtime·findfunc((uintptr)gp->fnstart->fn); 1453 if(f->args > 0) { 1454 if(thechar == '5') 1455 sp += sizeof(uintptr); 1456 addroot((Obj){sp, f->args, 0}); 1457 } 1458 return; 1459 } 1460 } 1461 if (ScanStackByFrames) { 1462 doframe = false; 1463 runtime·gentraceback(pc, sp, nil, gp, 0, nil, 0x7fffffff, addframeroots, &doframe); 1464 } else { 1465 USED(pc); 1466 n = 0; 1467 while(stk) { 1468 if(sp < guard-StackGuard || (byte*)stk < sp) { 1469 runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); 1470 runtime·throw("scanstack"); 1471 } 1472 addroot((Obj){sp, (byte*)stk - sp, (uintptr)defaultProg | PRECISE | LOOP}); 1473 sp = (byte*)stk->gobuf.sp; 1474 guard = stk->stackguard; 1475 stk = (Stktop*)stk->stackbase; 1476 n++; 1477 } 1478 } 1479 } 1480 1481 static void 1482 addfinroots(void *v) 1483 { 1484 uintptr size; 1485 void *base; 1486 1487 size = 0; 1488 if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base)) 1489 runtime·throw("mark - finalizer inconsistency"); 1490 1491 // do not mark the finalizer block itself. just mark the things it points at. 1492 addroot((Obj){base, size, 0}); 1493 } 1494 1495 static void 1496 addroots(void) 1497 { 1498 G *gp; 1499 FinBlock *fb; 1500 MSpan *s, **allspans; 1501 uint32 spanidx; 1502 1503 work.nroot = 0; 1504 1505 // data & bss 1506 // TODO(atom): load balancing 1507 addroot((Obj){data, edata - data, (uintptr)gcdata}); 1508 addroot((Obj){bss, ebss - bss, (uintptr)gcbss}); 1509 1510 // MSpan.types 1511 allspans = runtime·mheap->allspans; 1512 for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) { 1513 s = allspans[spanidx]; 1514 if(s->state == MSpanInUse) { 1515 // The garbage collector ignores type pointers stored in MSpan.types: 1516 // - Compiler-generated types are stored outside of heap. 1517 // - The reflect package has runtime-generated types cached in its data structures. 1518 // The garbage collector relies on finding the references via that cache. 1519 switch(s->types.compression) { 1520 case MTypes_Empty: 1521 case MTypes_Single: 1522 break; 1523 case MTypes_Words: 1524 case MTypes_Bytes: 1525 markonly((byte*)s->types.data); 1526 break; 1527 } 1528 } 1529 } 1530 1531 // stacks 1532 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { 1533 switch(gp->status){ 1534 default: 1535 runtime·printf("unexpected G.status %d\n", gp->status); 1536 runtime·throw("mark - bad status"); 1537 case Gdead: 1538 break; 1539 case Grunning: 1540 if(gp != g) 1541 runtime·throw("mark - world not stopped"); 1542 addstackroots(gp); 1543 break; 1544 case Grunnable: 1545 case Gsyscall: 1546 case Gwaiting: 1547 addstackroots(gp); 1548 break; 1549 } 1550 } 1551 1552 runtime·walkfintab(addfinroots); 1553 1554 for(fb=allfin; fb; fb=fb->alllink) 1555 addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0}); 1556 } 1557 1558 static bool 1559 handlespecial(byte *p, uintptr size) 1560 { 1561 FuncVal *fn; 1562 uintptr nret; 1563 FinBlock *block; 1564 Finalizer *f; 1565 1566 if(!runtime·getfinalizer(p, true, &fn, &nret)) { 1567 runtime·setblockspecial(p, false); 1568 runtime·MProf_Free(p, size); 1569 return false; 1570 } 1571 1572 runtime·lock(&finlock); 1573 if(finq == nil || finq->cnt == finq->cap) { 1574 if(finc == nil) { 1575 finc = runtime·SysAlloc(PageSize); 1576 if(finc == nil) 1577 runtime·throw("runtime: cannot allocate memory"); 1578 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; 1579 finc->alllink = allfin; 1580 allfin = finc; 1581 } 1582 block = finc; 1583 finc = block->next; 1584 block->next = finq; 1585 finq = block; 1586 } 1587 f = &finq->fin[finq->cnt]; 1588 finq->cnt++; 1589 f->fn = fn; 1590 f->nret = nret; 1591 f->arg = p; 1592 runtime·unlock(&finlock); 1593 return true; 1594 } 1595 1596 // Sweep frees or collects finalizers for blocks not marked in the mark phase. 1597 // It clears the mark bits in preparation for the next GC round. 1598 static void 1599 sweepspan(ParFor *desc, uint32 idx) 1600 { 1601 int32 cl, n, npages; 1602 uintptr size; 1603 byte *p; 1604 MCache *c; 1605 byte *arena_start; 1606 MLink head, *end; 1607 int32 nfree; 1608 byte *type_data; 1609 byte compression; 1610 uintptr type_data_inc; 1611 MSpan *s; 1612 1613 USED(&desc); 1614 s = runtime·mheap->allspans[idx]; 1615 if(s->state != MSpanInUse) 1616 return; 1617 arena_start = runtime·mheap->arena_start; 1618 p = (byte*)(s->start << PageShift); 1619 cl = s->sizeclass; 1620 size = s->elemsize; 1621 if(cl == 0) { 1622 n = 1; 1623 } else { 1624 // Chunk full of small blocks. 1625 npages = runtime·class_to_allocnpages[cl]; 1626 n = (npages << PageShift) / size; 1627 } 1628 nfree = 0; 1629 end = &head; 1630 c = m->mcache; 1631 1632 type_data = (byte*)s->types.data; 1633 type_data_inc = sizeof(uintptr); 1634 compression = s->types.compression; 1635 switch(compression) { 1636 case MTypes_Bytes: 1637 type_data += 8*sizeof(uintptr); 1638 type_data_inc = 1; 1639 break; 1640 } 1641 1642 // Sweep through n objects of given size starting at p. 1643 // This thread owns the span now, so it can manipulate 1644 // the block bitmap without atomic operations. 1645 for(; n > 0; n--, p += size, type_data+=type_data_inc) { 1646 uintptr off, *bitp, shift, bits; 1647 1648 off = (uintptr*)p - (uintptr*)arena_start; 1649 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1650 shift = off % wordsPerBitmapWord; 1651 bits = *bitp>>shift; 1652 1653 if((bits & bitAllocated) == 0) 1654 continue; 1655 1656 if((bits & bitMarked) != 0) { 1657 if(DebugMark) { 1658 if(!(bits & bitSpecial)) 1659 runtime·printf("found spurious mark on %p\n", p); 1660 *bitp &= ~(bitSpecial<<shift); 1661 } 1662 *bitp &= ~(bitMarked<<shift); 1663 continue; 1664 } 1665 1666 // Special means it has a finalizer or is being profiled. 1667 // In DebugMark mode, the bit has been coopted so 1668 // we have to assume all blocks are special. 1669 if(DebugMark || (bits & bitSpecial) != 0) { 1670 if(handlespecial(p, size)) 1671 continue; 1672 } 1673 1674 // Mark freed; restore block boundary bit. 1675 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 1676 1677 if(cl == 0) { 1678 // Free large span. 1679 runtime·unmarkspan(p, 1<<PageShift); 1680 *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing 1681 runtime·MHeap_Free(runtime·mheap, s, 1); 1682 c->local_alloc -= size; 1683 c->local_nfree++; 1684 } else { 1685 // Free small object. 1686 switch(compression) { 1687 case MTypes_Words: 1688 *(uintptr*)type_data = 0; 1689 break; 1690 case MTypes_Bytes: 1691 *(byte*)type_data = 0; 1692 break; 1693 } 1694 if(size > sizeof(uintptr)) 1695 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed" 1696 1697 end->next = (MLink*)p; 1698 end = (MLink*)p; 1699 nfree++; 1700 } 1701 } 1702 1703 if(nfree) { 1704 c->local_by_size[cl].nfree += nfree; 1705 c->local_alloc -= size * nfree; 1706 c->local_nfree += nfree; 1707 c->local_cachealloc -= nfree * size; 1708 c->local_objects -= nfree; 1709 runtime·MCentral_FreeSpan(&runtime·mheap->central[cl], s, nfree, head.next, end); 1710 } 1711 } 1712 1713 static void 1714 dumpspan(uint32 idx) 1715 { 1716 int32 sizeclass, n, npages, i, column; 1717 uintptr size; 1718 byte *p; 1719 byte *arena_start; 1720 MSpan *s; 1721 bool allocated, special; 1722 1723 s = runtime·mheap->allspans[idx]; 1724 if(s->state != MSpanInUse) 1725 return; 1726 arena_start = runtime·mheap->arena_start; 1727 p = (byte*)(s->start << PageShift); 1728 sizeclass = s->sizeclass; 1729 size = s->elemsize; 1730 if(sizeclass == 0) { 1731 n = 1; 1732 } else { 1733 npages = runtime·class_to_allocnpages[sizeclass]; 1734 n = (npages << PageShift) / size; 1735 } 1736 1737 runtime·printf("%p .. %p:\n", p, p+n*size); 1738 column = 0; 1739 for(; n>0; n--, p+=size) { 1740 uintptr off, *bitp, shift, bits; 1741 1742 off = (uintptr*)p - (uintptr*)arena_start; 1743 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1744 shift = off % wordsPerBitmapWord; 1745 bits = *bitp>>shift; 1746 1747 allocated = ((bits & bitAllocated) != 0); 1748 special = ((bits & bitSpecial) != 0); 1749 1750 for(i=0; i<size; i+=sizeof(void*)) { 1751 if(column == 0) { 1752 runtime·printf("\t"); 1753 } 1754 if(i == 0) { 1755 runtime·printf(allocated ? "(" : "["); 1756 runtime·printf(special ? "@" : ""); 1757 runtime·printf("%p: ", p+i); 1758 } else { 1759 runtime·printf(" "); 1760 } 1761 1762 runtime·printf("%p", *(void**)(p+i)); 1763 1764 if(i+sizeof(void*) >= size) { 1765 runtime·printf(allocated ? ") " : "] "); 1766 } 1767 1768 column++; 1769 if(column == 8) { 1770 runtime·printf("\n"); 1771 column = 0; 1772 } 1773 } 1774 } 1775 runtime·printf("\n"); 1776 } 1777 1778 // A debugging function to dump the contents of memory 1779 void 1780 runtime·memorydump(void) 1781 { 1782 uint32 spanidx; 1783 1784 for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) { 1785 dumpspan(spanidx); 1786 } 1787 } 1788 1789 void 1790 runtime·gchelper(void) 1791 { 1792 gchelperstart(); 1793 1794 // parallel mark for over gc roots 1795 runtime·parfordo(work.markfor); 1796 1797 // help other threads scan secondary blocks 1798 scanblock(nil, nil, 0, true); 1799 1800 if(DebugMark) { 1801 // wait while the main thread executes mark(debug_scanblock) 1802 while(runtime·atomicload(&work.debugmarkdone) == 0) 1803 runtime·usleep(10); 1804 } 1805 1806 runtime·parfordo(work.sweepfor); 1807 bufferList[m->helpgc].busy = 0; 1808 if(runtime·xadd(&work.ndone, +1) == work.nproc-1) 1809 runtime·notewakeup(&work.alldone); 1810 } 1811 1812 #define GcpercentUnknown (-2) 1813 1814 // Initialized from $GOGC. GOGC=off means no gc. 1815 // 1816 // Next gc is after we've allocated an extra amount of 1817 // memory proportional to the amount already in use. 1818 // If gcpercent=100 and we're using 4M, we'll gc again 1819 // when we get to 8M. This keeps the gc cost in linear 1820 // proportion to the allocation cost. Adjusting gcpercent 1821 // just changes the linear constant (and also the amount of 1822 // extra memory used). 1823 static int32 gcpercent = GcpercentUnknown; 1824 1825 static void 1826 cachestats(GCStats *stats) 1827 { 1828 M *mp; 1829 MCache *c; 1830 P *p, **pp; 1831 int32 i; 1832 uint64 stacks_inuse; 1833 uint64 *src, *dst; 1834 1835 if(stats) 1836 runtime·memclr((byte*)stats, sizeof(*stats)); 1837 stacks_inuse = 0; 1838 for(mp=runtime·allm; mp; mp=mp->alllink) { 1839 stacks_inuse += mp->stackinuse*FixedStack; 1840 if(stats) { 1841 src = (uint64*)&mp->gcstats; 1842 dst = (uint64*)stats; 1843 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++) 1844 dst[i] += src[i]; 1845 runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); 1846 } 1847 } 1848 for(pp=runtime·allp; p=*pp; pp++) { 1849 c = p->mcache; 1850 if(c==nil) 1851 continue; 1852 runtime·purgecachedstats(c); 1853 for(i=0; i<nelem(c->local_by_size); i++) { 1854 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; 1855 c->local_by_size[i].nmalloc = 0; 1856 mstats.by_size[i].nfree += c->local_by_size[i].nfree; 1857 c->local_by_size[i].nfree = 0; 1858 } 1859 } 1860 mstats.stacks_inuse = stacks_inuse; 1861 } 1862 1863 // Structure of arguments passed to function gc(). 1864 // This allows the arguments to be passed via reflect·call. 1865 struct gc_args 1866 { 1867 int32 force; 1868 }; 1869 1870 static void gc(struct gc_args *args); 1871 1872 static int32 1873 readgogc(void) 1874 { 1875 byte *p; 1876 1877 p = runtime·getenv("GOGC"); 1878 if(p == nil || p[0] == '\0') 1879 return 100; 1880 if(runtime·strcmp(p, (byte*)"off") == 0) 1881 return -1; 1882 return runtime·atoi(p); 1883 } 1884 1885 void 1886 runtime·gc(int32 force) 1887 { 1888 byte *p; 1889 struct gc_args a, *ap; 1890 FuncVal gcv; 1891 1892 // The atomic operations are not atomic if the uint64s 1893 // are not aligned on uint64 boundaries. This has been 1894 // a problem in the past. 1895 if((((uintptr)&work.empty) & 7) != 0) 1896 runtime·throw("runtime: gc work buffer is misaligned"); 1897 if((((uintptr)&work.full) & 7) != 0) 1898 runtime·throw("runtime: gc work buffer is misaligned"); 1899 1900 // The gc is turned off (via enablegc) until 1901 // the bootstrap has completed. 1902 // Also, malloc gets called in the guts 1903 // of a number of libraries that might be 1904 // holding locks. To avoid priority inversion 1905 // problems, don't bother trying to run gc 1906 // while holding a lock. The next mallocgc 1907 // without a lock will do the gc instead. 1908 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) 1909 return; 1910 1911 if(gcpercent == GcpercentUnknown) { // first time through 1912 gcpercent = readgogc(); 1913 1914 p = runtime·getenv("GOGCTRACE"); 1915 if(p != nil) 1916 gctrace = runtime·atoi(p); 1917 } 1918 if(gcpercent < 0) 1919 return; 1920 1921 // Run gc on a bigger stack to eliminate 1922 // a potentially large number of calls to runtime·morestack. 1923 a.force = force; 1924 ap = &a; 1925 m->moreframesize_minalloc = StackBig; 1926 gcv.fn = (void*)gc; 1927 reflect·call(&gcv, (byte*)&ap, sizeof(ap)); 1928 1929 if(gctrace > 1 && !force) { 1930 a.force = 1; 1931 gc(&a); 1932 } 1933 } 1934 1935 static FuncVal runfinqv = {runfinq}; 1936 1937 static void 1938 gc(struct gc_args *args) 1939 { 1940 int64 t0, t1, t2, t3, t4; 1941 uint64 heap0, heap1, obj0, obj1, ninstr; 1942 GCStats stats; 1943 M *mp; 1944 uint32 i; 1945 Eface eface; 1946 1947 runtime·semacquire(&runtime·worldsema); 1948 if(!args->force && mstats.heap_alloc < mstats.next_gc) { 1949 runtime·semrelease(&runtime·worldsema); 1950 return; 1951 } 1952 1953 t0 = runtime·nanotime(); 1954 1955 m->gcing = 1; 1956 runtime·stoptheworld(); 1957 1958 if(CollectStats) 1959 runtime·memclr((byte*)&gcstats, sizeof(gcstats)); 1960 1961 for(mp=runtime·allm; mp; mp=mp->alllink) 1962 runtime·settype_flush(mp, false); 1963 1964 heap0 = 0; 1965 obj0 = 0; 1966 if(gctrace) { 1967 cachestats(nil); 1968 heap0 = mstats.heap_alloc; 1969 obj0 = mstats.nmalloc - mstats.nfree; 1970 } 1971 1972 m->locks++; // disable gc during mallocs in parforalloc 1973 if(work.markfor == nil) 1974 work.markfor = runtime·parforalloc(MaxGcproc); 1975 if(work.sweepfor == nil) 1976 work.sweepfor = runtime·parforalloc(MaxGcproc); 1977 m->locks--; 1978 1979 if(itabtype == nil) { 1980 // get C pointer to the Go type "itab" 1981 runtime·gc_itab_ptr(&eface); 1982 itabtype = ((PtrType*)eface.type)->elem; 1983 } 1984 1985 work.nwait = 0; 1986 work.ndone = 0; 1987 work.debugmarkdone = 0; 1988 work.nproc = runtime·gcprocs(); 1989 addroots(); 1990 runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot); 1991 runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap->nspan, nil, true, sweepspan); 1992 if(work.nproc > 1) { 1993 runtime·noteclear(&work.alldone); 1994 runtime·helpgc(work.nproc); 1995 } 1996 1997 t1 = runtime·nanotime(); 1998 1999 gchelperstart(); 2000 runtime·parfordo(work.markfor); 2001 scanblock(nil, nil, 0, true); 2002 2003 if(DebugMark) { 2004 for(i=0; i<work.nroot; i++) 2005 debug_scanblock(work.roots[i].p, work.roots[i].n); 2006 runtime·atomicstore(&work.debugmarkdone, 1); 2007 } 2008 t2 = runtime·nanotime(); 2009 2010 runtime·parfordo(work.sweepfor); 2011 bufferList[m->helpgc].busy = 0; 2012 t3 = runtime·nanotime(); 2013 2014 if(work.nproc > 1) 2015 runtime·notesleep(&work.alldone); 2016 2017 cachestats(&stats); 2018 2019 stats.nprocyield += work.sweepfor->nprocyield; 2020 stats.nosyield += work.sweepfor->nosyield; 2021 stats.nsleep += work.sweepfor->nsleep; 2022 2023 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; 2024 m->gcing = 0; 2025 2026 if(finq != nil) { 2027 m->locks++; // disable gc during the mallocs in newproc 2028 // kick off or wake up goroutine to run queued finalizers 2029 if(fing == nil) 2030 fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc); 2031 else if(fingwait) { 2032 fingwait = 0; 2033 runtime·ready(fing); 2034 } 2035 m->locks--; 2036 } 2037 2038 heap1 = mstats.heap_alloc; 2039 obj1 = mstats.nmalloc - mstats.nfree; 2040 2041 t4 = runtime·nanotime(); 2042 mstats.last_gc = t4; 2043 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0; 2044 mstats.pause_total_ns += t4 - t0; 2045 mstats.numgc++; 2046 if(mstats.debuggc) 2047 runtime·printf("pause %D\n", t4-t0); 2048 2049 if(gctrace) { 2050 runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," 2051 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", 2052 mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000, 2053 heap0>>20, heap1>>20, obj0, obj1, 2054 mstats.nmalloc, mstats.nfree, 2055 stats.nhandoff, stats.nhandoffcnt, 2056 work.sweepfor->nsteal, work.sweepfor->nstealcnt, 2057 stats.nprocyield, stats.nosyield, stats.nsleep); 2058 if(CollectStats) { 2059 runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n", 2060 gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup); 2061 if(gcstats.ptr.cnt != 0) 2062 runtime·printf("avg ptrbufsize: %D (%D/%D)\n", 2063 gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt); 2064 if(gcstats.obj.cnt != 0) 2065 runtime·printf("avg nobj: %D (%D/%D)\n", 2066 gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt); 2067 runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes); 2068 2069 runtime·printf("instruction counts:\n"); 2070 ninstr = 0; 2071 for(i=0; i<nelem(gcstats.instr); i++) { 2072 runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]); 2073 ninstr += gcstats.instr[i]; 2074 } 2075 runtime·printf("\ttotal:\t%D\n", ninstr); 2076 2077 runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); 2078 } 2079 } 2080 2081 runtime·MProf_GC(); 2082 runtime·semrelease(&runtime·worldsema); 2083 runtime·starttheworld(); 2084 2085 // give the queued finalizers, if any, a chance to run 2086 if(finq != nil) 2087 runtime·gosched(); 2088 } 2089 2090 void 2091 runtime·ReadMemStats(MStats *stats) 2092 { 2093 // Have to acquire worldsema to stop the world, 2094 // because stoptheworld can only be used by 2095 // one goroutine at a time, and there might be 2096 // a pending garbage collection already calling it. 2097 runtime·semacquire(&runtime·worldsema); 2098 m->gcing = 1; 2099 runtime·stoptheworld(); 2100 cachestats(nil); 2101 *stats = mstats; 2102 m->gcing = 0; 2103 runtime·semrelease(&runtime·worldsema); 2104 runtime·starttheworld(); 2105 } 2106 2107 void 2108 runtime∕debug·readGCStats(Slice *pauses) 2109 { 2110 uint64 *p; 2111 uint32 i, n; 2112 2113 // Calling code in runtime/debug should make the slice large enough. 2114 if(pauses->cap < nelem(mstats.pause_ns)+3) 2115 runtime·throw("runtime: short slice passed to readGCStats"); 2116 2117 // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. 2118 p = (uint64*)pauses->array; 2119 runtime·lock(runtime·mheap); 2120 n = mstats.numgc; 2121 if(n > nelem(mstats.pause_ns)) 2122 n = nelem(mstats.pause_ns); 2123 2124 // The pause buffer is circular. The most recent pause is at 2125 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward 2126 // from there to go back farther in time. We deliver the times 2127 // most recent first (in p[0]). 2128 for(i=0; i<n; i++) 2129 p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)]; 2130 2131 p[n] = mstats.last_gc; 2132 p[n+1] = mstats.numgc; 2133 p[n+2] = mstats.pause_total_ns; 2134 runtime·unlock(runtime·mheap); 2135 pauses->len = n+3; 2136 } 2137 2138 void 2139 runtime∕debug·setGCPercent(intgo in, intgo out) 2140 { 2141 runtime·lock(runtime·mheap); 2142 if(gcpercent == GcpercentUnknown) 2143 gcpercent = readgogc(); 2144 out = gcpercent; 2145 if(in < 0) 2146 in = -1; 2147 gcpercent = in; 2148 runtime·unlock(runtime·mheap); 2149 FLUSH(&out); 2150 } 2151 2152 static void 2153 gchelperstart(void) 2154 { 2155 if(m->helpgc < 0 || m->helpgc >= MaxGcproc) 2156 runtime·throw("gchelperstart: bad m->helpgc"); 2157 if(runtime·xchg(&bufferList[m->helpgc].busy, 1)) 2158 runtime·throw("gchelperstart: already busy"); 2159 } 2160 2161 static void 2162 runfinq(void) 2163 { 2164 Finalizer *f; 2165 FinBlock *fb, *next; 2166 byte *frame; 2167 uint32 framesz, framecap, i; 2168 2169 frame = nil; 2170 framecap = 0; 2171 for(;;) { 2172 // There's no need for a lock in this section 2173 // because it only conflicts with the garbage 2174 // collector, and the garbage collector only 2175 // runs when everyone else is stopped, and 2176 // runfinq only stops at the gosched() or 2177 // during the calls in the for loop. 2178 fb = finq; 2179 finq = nil; 2180 if(fb == nil) { 2181 fingwait = 1; 2182 runtime·park(nil, nil, "finalizer wait"); 2183 continue; 2184 } 2185 if(raceenabled) 2186 runtime·racefingo(); 2187 for(; fb; fb=next) { 2188 next = fb->next; 2189 for(i=0; i<fb->cnt; i++) { 2190 f = &fb->fin[i]; 2191 framesz = sizeof(uintptr) + f->nret; 2192 if(framecap < framesz) { 2193 runtime·free(frame); 2194 frame = runtime·mal(framesz); 2195 framecap = framesz; 2196 } 2197 *(void**)frame = f->arg; 2198 reflect·call(f->fn, frame, sizeof(uintptr) + f->nret); 2199 f->fn = nil; 2200 f->arg = nil; 2201 } 2202 fb->cnt = 0; 2203 fb->next = finc; 2204 finc = fb; 2205 } 2206 runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible 2207 } 2208 } 2209 2210 // mark the block at v of size n as allocated. 2211 // If noptr is true, mark it as having no pointers. 2212 void 2213 runtime·markallocated(void *v, uintptr n, bool noptr) 2214 { 2215 uintptr *b, obits, bits, off, shift; 2216 2217 if(0) 2218 runtime·printf("markallocated %p+%p\n", v, n); 2219 2220 if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) 2221 runtime·throw("markallocated: bad pointer"); 2222 2223 off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset 2224 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2225 shift = off % wordsPerBitmapWord; 2226 2227 for(;;) { 2228 obits = *b; 2229 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift); 2230 if(noptr) 2231 bits |= bitNoPointers<<shift; 2232 if(runtime·singleproc) { 2233 *b = bits; 2234 break; 2235 } else { 2236 // more than one goroutine is potentially running: use atomic op 2237 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2238 break; 2239 } 2240 } 2241 } 2242 2243 // mark the block at v of size n as freed. 2244 void 2245 runtime·markfreed(void *v, uintptr n) 2246 { 2247 uintptr *b, obits, bits, off, shift; 2248 2249 if(0) 2250 runtime·printf("markallocated %p+%p\n", v, n); 2251 2252 if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) 2253 runtime·throw("markallocated: bad pointer"); 2254 2255 off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset 2256 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2257 shift = off % wordsPerBitmapWord; 2258 2259 for(;;) { 2260 obits = *b; 2261 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 2262 if(runtime·singleproc) { 2263 *b = bits; 2264 break; 2265 } else { 2266 // more than one goroutine is potentially running: use atomic op 2267 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2268 break; 2269 } 2270 } 2271 } 2272 2273 // check that the block at v of size n is marked freed. 2274 void 2275 runtime·checkfreed(void *v, uintptr n) 2276 { 2277 uintptr *b, bits, off, shift; 2278 2279 if(!runtime·checking) 2280 return; 2281 2282 if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) 2283 return; // not allocated, so okay 2284 2285 off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset 2286 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2287 shift = off % wordsPerBitmapWord; 2288 2289 bits = *b>>shift; 2290 if((bits & bitAllocated) != 0) { 2291 runtime·printf("checkfreed %p+%p: off=%p have=%p\n", 2292 v, n, off, bits & bitMask); 2293 runtime·throw("checkfreed: not freed"); 2294 } 2295 } 2296 2297 // mark the span of memory at v as having n blocks of the given size. 2298 // if leftover is true, there is left over space at the end of the span. 2299 void 2300 runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) 2301 { 2302 uintptr *b, off, shift; 2303 byte *p; 2304 2305 if((byte*)v+size*n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) 2306 runtime·throw("markspan: bad pointer"); 2307 2308 p = v; 2309 if(leftover) // mark a boundary just past end of last block too 2310 n++; 2311 for(; n-- > 0; p += size) { 2312 // Okay to use non-atomic ops here, because we control 2313 // the entire span, and each bitmap word has bits for only 2314 // one span, so no other goroutines are changing these 2315 // bitmap words. 2316 off = (uintptr*)p - (uintptr*)runtime·mheap->arena_start; // word offset 2317 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2318 shift = off % wordsPerBitmapWord; 2319 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 2320 } 2321 } 2322 2323 // unmark the span of memory at v of length n bytes. 2324 void 2325 runtime·unmarkspan(void *v, uintptr n) 2326 { 2327 uintptr *p, *b, off; 2328 2329 if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) 2330 runtime·throw("markspan: bad pointer"); 2331 2332 p = v; 2333 off = p - (uintptr*)runtime·mheap->arena_start; // word offset 2334 if(off % wordsPerBitmapWord != 0) 2335 runtime·throw("markspan: unaligned pointer"); 2336 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2337 n /= PtrSize; 2338 if(n%wordsPerBitmapWord != 0) 2339 runtime·throw("unmarkspan: unaligned length"); 2340 // Okay to use non-atomic ops here, because we control 2341 // the entire span, and each bitmap word has bits for only 2342 // one span, so no other goroutines are changing these 2343 // bitmap words. 2344 n /= wordsPerBitmapWord; 2345 while(n-- > 0) 2346 *b-- = 0; 2347 } 2348 2349 bool 2350 runtime·blockspecial(void *v) 2351 { 2352 uintptr *b, off, shift; 2353 2354 if(DebugMark) 2355 return true; 2356 2357 off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; 2358 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2359 shift = off % wordsPerBitmapWord; 2360 2361 return (*b & (bitSpecial<<shift)) != 0; 2362 } 2363 2364 void 2365 runtime·setblockspecial(void *v, bool s) 2366 { 2367 uintptr *b, off, shift, bits, obits; 2368 2369 if(DebugMark) 2370 return; 2371 2372 off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; 2373 b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; 2374 shift = off % wordsPerBitmapWord; 2375 2376 for(;;) { 2377 obits = *b; 2378 if(s) 2379 bits = obits | (bitSpecial<<shift); 2380 else 2381 bits = obits & ~(bitSpecial<<shift); 2382 if(runtime·singleproc) { 2383 *b = bits; 2384 break; 2385 } else { 2386 // more than one goroutine is potentially running: use atomic op 2387 if(runtime·casp((void**)b, (void*)obits, (void*)bits)) 2388 break; 2389 } 2390 } 2391 } 2392 2393 void 2394 runtime·MHeap_MapBits(MHeap *h) 2395 { 2396 // Caller has added extra mappings to the arena. 2397 // Add extra mappings of bitmap words as needed. 2398 // We allocate extra bitmap pieces in chunks of bitmapChunk. 2399 enum { 2400 bitmapChunk = 8192 2401 }; 2402 uintptr n; 2403 2404 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; 2405 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); 2406 if(h->bitmap_mapped >= n) 2407 return; 2408 2409 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); 2410 h->bitmap_mapped = n; 2411 }