github.com/llvm-mirror/llgo@v0.0.0-20190322182713-bf6f0a60fce1/third_party/gofrontend/libgo/runtime/mgc0.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector (GC). 6 // 7 // GC is: 8 // - mark&sweep 9 // - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc) 10 // - parallel (up to MaxGcproc threads) 11 // - partially concurrent (mark is stop-the-world, while sweep is concurrent) 12 // - non-moving/non-compacting 13 // - full (non-partial) 14 // 15 // GC rate. 16 // Next GC is after we've allocated an extra amount of memory proportional to 17 // the amount already in use. The proportion is controlled by GOGC environment variable 18 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 19 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 20 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 21 // (and also the amount of extra memory used). 22 // 23 // Concurrent sweep. 24 // The sweep phase proceeds concurrently with normal program execution. 25 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 26 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 27 // However, at the end of the stop-the-world GC phase we don't know the size of the live heap, 28 // and so next_gc calculation is tricky and happens as follows. 29 // At the end of the stop-the-world phase next_gc is conservatively set based on total 30 // heap size; all spans are marked as "needs sweeping". 31 // Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory. 32 // The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc 33 // closer to the target value. However, this is not enough to avoid over-allocating memory. 34 // Consider that a goroutine wants to allocate a new span for a large object and 35 // there are no free swept spans, but there are small-object unswept spans. 36 // If the goroutine naively allocates a new span, it can surpass the yet-unknown 37 // target next_gc value. In order to prevent such cases (1) when a goroutine needs 38 // to allocate a new small-object span, it sweeps small-object spans for the same 39 // object size until it frees at least one object; (2) when a goroutine needs to 40 // allocate large-object span from heap, it sweeps spans until it frees at least 41 // that many pages into heap. Together these two measures ensure that we don't surpass 42 // target next_gc value by a large margin. There is an exception: if a goroutine sweeps 43 // and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span, 44 // but there can still be other one-page unswept spans which could be combined into a two-page span. 45 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 46 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 47 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 48 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 49 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 50 // The finalizer goroutine is kicked off only when all spans are swept. 51 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 52 53 #include <unistd.h> 54 55 #include "runtime.h" 56 #include "arch.h" 57 #include "malloc.h" 58 #include "mgc0.h" 59 #include "chan.h" 60 #include "go-type.h" 61 62 // Map gccgo field names to gc field names. 63 // Slice aka __go_open_array. 64 #define array __values 65 #define cap __capacity 66 // Iface aka __go_interface 67 #define tab __methods 68 // Hmap aka __go_map 69 typedef struct __go_map Hmap; 70 // Type aka __go_type_descriptor 71 #define string __reflection 72 #define KindPtr GO_PTR 73 #define KindNoPointers GO_NO_POINTERS 74 #define kindMask GO_CODE_MASK 75 // PtrType aka __go_ptr_type 76 #define elem __element_type 77 78 #ifdef USING_SPLIT_STACK 79 80 extern void * __splitstack_find (void *, void *, size_t *, void **, void **, 81 void **); 82 83 extern void * __splitstack_find_context (void *context[10], size_t *, void **, 84 void **, void **); 85 86 #endif 87 88 enum { 89 Debug = 0, 90 CollectStats = 0, 91 ConcurrentSweep = 1, 92 93 WorkbufSize = 16*1024, 94 FinBlockSize = 4*1024, 95 96 handoffThreshold = 4, 97 IntermediateBufferCapacity = 64, 98 99 // Bits in type information 100 PRECISE = 1, 101 LOOP = 2, 102 PC_BITS = PRECISE | LOOP, 103 104 RootData = 0, 105 RootBss = 1, 106 RootFinalizers = 2, 107 RootSpanTypes = 3, 108 RootFlushCaches = 4, 109 RootCount = 5, 110 }; 111 112 #define GcpercentUnknown (-2) 113 114 // Initialized from $GOGC. GOGC=off means no gc. 115 static int32 gcpercent = GcpercentUnknown; 116 117 static FuncVal* poolcleanup; 118 119 void sync_runtime_registerPoolCleanup(FuncVal*) 120 __asm__ (GOSYM_PREFIX "sync.runtime_registerPoolCleanup"); 121 122 void 123 sync_runtime_registerPoolCleanup(FuncVal *f) 124 { 125 poolcleanup = f; 126 } 127 128 static void 129 clearpools(void) 130 { 131 P *p, **pp; 132 MCache *c; 133 134 // clear sync.Pool's 135 if(poolcleanup != nil) { 136 __builtin_call_with_static_chain(poolcleanup->fn(), 137 poolcleanup); 138 } 139 140 for(pp=runtime_allp; (p=*pp) != nil; pp++) { 141 // clear tinyalloc pool 142 c = p->mcache; 143 if(c != nil) { 144 c->tiny = nil; 145 c->tinysize = 0; 146 } 147 // clear defer pools 148 p->deferpool = nil; 149 } 150 } 151 152 // Holding worldsema grants an M the right to try to stop the world. 153 // The procedure is: 154 // 155 // runtime_semacquire(&runtime_worldsema); 156 // m->gcing = 1; 157 // runtime_stoptheworld(); 158 // 159 // ... do stuff ... 160 // 161 // m->gcing = 0; 162 // runtime_semrelease(&runtime_worldsema); 163 // runtime_starttheworld(); 164 // 165 uint32 runtime_worldsema = 1; 166 167 typedef struct Workbuf Workbuf; 168 struct Workbuf 169 { 170 #define SIZE (WorkbufSize-sizeof(LFNode)-sizeof(uintptr)) 171 LFNode node; // must be first 172 uintptr nobj; 173 Obj obj[SIZE/sizeof(Obj) - 1]; 174 uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)]; 175 #undef SIZE 176 }; 177 178 typedef struct Finalizer Finalizer; 179 struct Finalizer 180 { 181 FuncVal *fn; 182 void *arg; 183 const struct __go_func_type *ft; 184 const PtrType *ot; 185 }; 186 187 typedef struct FinBlock FinBlock; 188 struct FinBlock 189 { 190 FinBlock *alllink; 191 FinBlock *next; 192 int32 cnt; 193 int32 cap; 194 Finalizer fin[1]; 195 }; 196 197 static Lock finlock; // protects the following variables 198 static FinBlock *finq; // list of finalizers that are to be executed 199 static FinBlock *finc; // cache of free blocks 200 static FinBlock *allfin; // list of all blocks 201 bool runtime_fingwait; 202 bool runtime_fingwake; 203 204 static Lock gclock; 205 static G* fing; 206 207 static void runfinq(void*); 208 static void bgsweep(void*); 209 static Workbuf* getempty(Workbuf*); 210 static Workbuf* getfull(Workbuf*); 211 static void putempty(Workbuf*); 212 static Workbuf* handoff(Workbuf*); 213 static void gchelperstart(void); 214 static void flushallmcaches(void); 215 static void addstackroots(G *gp, Workbuf **wbufp); 216 217 static struct { 218 uint64 full; // lock-free list of full blocks 219 uint64 empty; // lock-free list of empty blocks 220 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait 221 uint32 nproc; 222 int64 tstart; 223 volatile uint32 nwait; 224 volatile uint32 ndone; 225 Note alldone; 226 ParFor *markfor; 227 228 Lock lock; 229 byte *chunk; 230 uintptr nchunk; 231 } work __attribute__((aligned(8))); 232 233 enum { 234 GC_DEFAULT_PTR = GC_NUM_INSTR, 235 GC_CHAN, 236 237 GC_NUM_INSTR2 238 }; 239 240 static struct { 241 struct { 242 uint64 sum; 243 uint64 cnt; 244 } ptr; 245 uint64 nbytes; 246 struct { 247 uint64 sum; 248 uint64 cnt; 249 uint64 notype; 250 uint64 typelookup; 251 } obj; 252 uint64 rescan; 253 uint64 rescanbytes; 254 uint64 instr[GC_NUM_INSTR2]; 255 uint64 putempty; 256 uint64 getfull; 257 struct { 258 uint64 foundbit; 259 uint64 foundword; 260 uint64 foundspan; 261 } flushptrbuf; 262 struct { 263 uint64 foundbit; 264 uint64 foundword; 265 uint64 foundspan; 266 } markonly; 267 uint32 nbgsweep; 268 uint32 npausesweep; 269 } gcstats; 270 271 // markonly marks an object. It returns true if the object 272 // has been marked by this function, false otherwise. 273 // This function doesn't append the object to any buffer. 274 static bool 275 markonly(const void *obj) 276 { 277 byte *p; 278 uintptr *bitp, bits, shift, x, xbits, off, j; 279 MSpan *s; 280 PageID k; 281 282 // Words outside the arena cannot be pointers. 283 if((const byte*)obj < runtime_mheap.arena_start || (const byte*)obj >= runtime_mheap.arena_used) 284 return false; 285 286 // obj may be a pointer to a live object. 287 // Try to find the beginning of the object. 288 289 // Round down to word boundary. 290 obj = (const void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 291 292 // Find bits for this word. 293 off = (const uintptr*)obj - (uintptr*)runtime_mheap.arena_start; 294 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 295 shift = off % wordsPerBitmapWord; 296 xbits = *bitp; 297 bits = xbits >> shift; 298 299 // Pointing at the beginning of a block? 300 if((bits & (bitAllocated|bitBlockBoundary)) != 0) { 301 if(CollectStats) 302 runtime_xadd64(&gcstats.markonly.foundbit, 1); 303 goto found; 304 } 305 306 // Pointing just past the beginning? 307 // Scan backward a little to find a block boundary. 308 for(j=shift; j-->0; ) { 309 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 310 shift = j; 311 bits = xbits>>shift; 312 if(CollectStats) 313 runtime_xadd64(&gcstats.markonly.foundword, 1); 314 goto found; 315 } 316 } 317 318 // Otherwise consult span table to find beginning. 319 // (Manually inlined copy of MHeap_LookupMaybe.) 320 k = (uintptr)obj>>PageShift; 321 x = k; 322 x -= (uintptr)runtime_mheap.arena_start>>PageShift; 323 s = runtime_mheap.spans[x]; 324 if(s == nil || k < s->start || (const byte*)obj >= s->limit || s->state != MSpanInUse) 325 return false; 326 p = (byte*)((uintptr)s->start<<PageShift); 327 if(s->sizeclass == 0) { 328 obj = p; 329 } else { 330 uintptr size = s->elemsize; 331 int32 i = ((const byte*)obj - p)/size; 332 obj = p+i*size; 333 } 334 335 // Now that we know the object header, reload bits. 336 off = (const uintptr*)obj - (uintptr*)runtime_mheap.arena_start; 337 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 338 shift = off % wordsPerBitmapWord; 339 xbits = *bitp; 340 bits = xbits >> shift; 341 if(CollectStats) 342 runtime_xadd64(&gcstats.markonly.foundspan, 1); 343 344 found: 345 // Now we have bits, bitp, and shift correct for 346 // obj pointing at the base of the object. 347 // Only care about allocated and not marked. 348 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 349 return false; 350 if(work.nproc == 1) 351 *bitp |= bitMarked<<shift; 352 else { 353 for(;;) { 354 x = *bitp; 355 if(x & (bitMarked<<shift)) 356 return false; 357 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 358 break; 359 } 360 } 361 362 // The object is now marked 363 return true; 364 } 365 366 // PtrTarget is a structure used by intermediate buffers. 367 // The intermediate buffers hold GC data before it 368 // is moved/flushed to the work buffer (Workbuf). 369 // The size of an intermediate buffer is very small, 370 // such as 32 or 64 elements. 371 typedef struct PtrTarget PtrTarget; 372 struct PtrTarget 373 { 374 void *p; 375 uintptr ti; 376 }; 377 378 typedef struct Scanbuf Scanbuf; 379 struct Scanbuf 380 { 381 struct { 382 PtrTarget *begin; 383 PtrTarget *end; 384 PtrTarget *pos; 385 } ptr; 386 struct { 387 Obj *begin; 388 Obj *end; 389 Obj *pos; 390 } obj; 391 Workbuf *wbuf; 392 Obj *wp; 393 uintptr nobj; 394 }; 395 396 typedef struct BufferList BufferList; 397 struct BufferList 398 { 399 PtrTarget ptrtarget[IntermediateBufferCapacity]; 400 Obj obj[IntermediateBufferCapacity]; 401 uint32 busy; 402 byte pad[CacheLineSize]; 403 }; 404 static BufferList bufferList[MaxGcproc]; 405 406 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); 407 408 // flushptrbuf moves data from the PtrTarget buffer to the work buffer. 409 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned, 410 // while the work buffer contains blocks which have been marked 411 // and are prepared to be scanned by the garbage collector. 412 // 413 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer. 414 // 415 // A simplified drawing explaining how the todo-list moves from a structure to another: 416 // 417 // scanblock 418 // (find pointers) 419 // Obj ------> PtrTarget (pointer targets) 420 // ↑ | 421 // | | 422 // `----------' 423 // flushptrbuf 424 // (find block start, mark and enqueue) 425 static void 426 flushptrbuf(Scanbuf *sbuf) 427 { 428 byte *p, *arena_start, *obj; 429 uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n; 430 MSpan *s; 431 PageID k; 432 Obj *wp; 433 Workbuf *wbuf; 434 PtrTarget *ptrbuf; 435 PtrTarget *ptrbuf_end; 436 437 arena_start = runtime_mheap.arena_start; 438 439 wp = sbuf->wp; 440 wbuf = sbuf->wbuf; 441 nobj = sbuf->nobj; 442 443 ptrbuf = sbuf->ptr.begin; 444 ptrbuf_end = sbuf->ptr.pos; 445 n = ptrbuf_end - sbuf->ptr.begin; 446 sbuf->ptr.pos = sbuf->ptr.begin; 447 448 if(CollectStats) { 449 runtime_xadd64(&gcstats.ptr.sum, n); 450 runtime_xadd64(&gcstats.ptr.cnt, 1); 451 } 452 453 // If buffer is nearly full, get a new one. 454 if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) { 455 if(wbuf != nil) 456 wbuf->nobj = nobj; 457 wbuf = getempty(wbuf); 458 wp = wbuf->obj; 459 nobj = 0; 460 461 if(n >= nelem(wbuf->obj)) 462 runtime_throw("ptrbuf has to be smaller than WorkBuf"); 463 } 464 465 while(ptrbuf < ptrbuf_end) { 466 obj = ptrbuf->p; 467 ti = ptrbuf->ti; 468 ptrbuf++; 469 470 // obj belongs to interval [mheap.arena_start, mheap.arena_used). 471 if(Debug > 1) { 472 if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used) 473 runtime_throw("object is outside of mheap"); 474 } 475 476 // obj may be a pointer to a live object. 477 // Try to find the beginning of the object. 478 479 // Round down to word boundary. 480 if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) { 481 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 482 ti = 0; 483 } 484 485 // Find bits for this word. 486 off = (uintptr*)obj - (uintptr*)arena_start; 487 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 488 shift = off % wordsPerBitmapWord; 489 xbits = *bitp; 490 bits = xbits >> shift; 491 492 // Pointing at the beginning of a block? 493 if((bits & (bitAllocated|bitBlockBoundary)) != 0) { 494 if(CollectStats) 495 runtime_xadd64(&gcstats.flushptrbuf.foundbit, 1); 496 goto found; 497 } 498 499 ti = 0; 500 501 // Pointing just past the beginning? 502 // Scan backward a little to find a block boundary. 503 for(j=shift; j-->0; ) { 504 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { 505 obj = (byte*)obj - (shift-j)*PtrSize; 506 shift = j; 507 bits = xbits>>shift; 508 if(CollectStats) 509 runtime_xadd64(&gcstats.flushptrbuf.foundword, 1); 510 goto found; 511 } 512 } 513 514 // Otherwise consult span table to find beginning. 515 // (Manually inlined copy of MHeap_LookupMaybe.) 516 k = (uintptr)obj>>PageShift; 517 x = k; 518 x -= (uintptr)arena_start>>PageShift; 519 s = runtime_mheap.spans[x]; 520 if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) 521 continue; 522 p = (byte*)((uintptr)s->start<<PageShift); 523 if(s->sizeclass == 0) { 524 obj = p; 525 } else { 526 size = s->elemsize; 527 int32 i = ((byte*)obj - p)/size; 528 obj = p+i*size; 529 } 530 531 // Now that we know the object header, reload bits. 532 off = (uintptr*)obj - (uintptr*)arena_start; 533 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 534 shift = off % wordsPerBitmapWord; 535 xbits = *bitp; 536 bits = xbits >> shift; 537 if(CollectStats) 538 runtime_xadd64(&gcstats.flushptrbuf.foundspan, 1); 539 540 found: 541 // Now we have bits, bitp, and shift correct for 542 // obj pointing at the base of the object. 543 // Only care about allocated and not marked. 544 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 545 continue; 546 if(work.nproc == 1) 547 *bitp |= bitMarked<<shift; 548 else { 549 for(;;) { 550 x = *bitp; 551 if(x & (bitMarked<<shift)) 552 goto continue_obj; 553 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 554 break; 555 } 556 } 557 558 // If object has no pointers, don't need to scan further. 559 if((bits & bitScan) == 0) 560 continue; 561 562 // Ask span about size class. 563 // (Manually inlined copy of MHeap_Lookup.) 564 x = (uintptr)obj >> PageShift; 565 x -= (uintptr)arena_start>>PageShift; 566 s = runtime_mheap.spans[x]; 567 568 PREFETCH(obj); 569 570 *wp = (Obj){obj, s->elemsize, ti}; 571 wp++; 572 nobj++; 573 continue_obj:; 574 } 575 576 // If another proc wants a pointer, give it some. 577 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 578 wbuf->nobj = nobj; 579 wbuf = handoff(wbuf); 580 nobj = wbuf->nobj; 581 wp = wbuf->obj + nobj; 582 } 583 584 sbuf->wp = wp; 585 sbuf->wbuf = wbuf; 586 sbuf->nobj = nobj; 587 } 588 589 static void 590 flushobjbuf(Scanbuf *sbuf) 591 { 592 uintptr nobj, off; 593 Obj *wp, obj; 594 Workbuf *wbuf; 595 Obj *objbuf; 596 Obj *objbuf_end; 597 598 wp = sbuf->wp; 599 wbuf = sbuf->wbuf; 600 nobj = sbuf->nobj; 601 602 objbuf = sbuf->obj.begin; 603 objbuf_end = sbuf->obj.pos; 604 sbuf->obj.pos = sbuf->obj.begin; 605 606 while(objbuf < objbuf_end) { 607 obj = *objbuf++; 608 609 // Align obj.b to a word boundary. 610 off = (uintptr)obj.p & (PtrSize-1); 611 if(off != 0) { 612 obj.p += PtrSize - off; 613 obj.n -= PtrSize - off; 614 obj.ti = 0; 615 } 616 617 if(obj.p == nil || obj.n == 0) 618 continue; 619 620 // If buffer is full, get a new one. 621 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 622 if(wbuf != nil) 623 wbuf->nobj = nobj; 624 wbuf = getempty(wbuf); 625 wp = wbuf->obj; 626 nobj = 0; 627 } 628 629 *wp = obj; 630 wp++; 631 nobj++; 632 } 633 634 // If another proc wants a pointer, give it some. 635 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 636 wbuf->nobj = nobj; 637 wbuf = handoff(wbuf); 638 nobj = wbuf->nobj; 639 wp = wbuf->obj + nobj; 640 } 641 642 sbuf->wp = wp; 643 sbuf->wbuf = wbuf; 644 sbuf->nobj = nobj; 645 } 646 647 // Program that scans the whole block and treats every block element as a potential pointer 648 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; 649 650 // Hchan program 651 static uintptr chanProg[2] = {0, GC_CHAN}; 652 653 // Local variables of a program fragment or loop 654 typedef struct Frame Frame; 655 struct Frame { 656 uintptr count, elemsize, b; 657 const uintptr *loop_or_ret; 658 }; 659 660 // Sanity check for the derived type info objti. 661 static void 662 checkptr(void *obj, uintptr objti) 663 { 664 uintptr *pc1, type, tisize, i, j, x; 665 const uintptr *pc2; 666 byte *objstart; 667 Type *t; 668 MSpan *s; 669 670 if(!Debug) 671 runtime_throw("checkptr is debug only"); 672 673 if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) 674 return; 675 type = runtime_gettype(obj); 676 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 677 if(t == nil) 678 return; 679 x = (uintptr)obj >> PageShift; 680 x -= (uintptr)(runtime_mheap.arena_start)>>PageShift; 681 s = runtime_mheap.spans[x]; 682 objstart = (byte*)((uintptr)s->start<<PageShift); 683 if(s->sizeclass != 0) { 684 i = ((byte*)obj - objstart)/s->elemsize; 685 objstart += i*s->elemsize; 686 } 687 tisize = *(uintptr*)objti; 688 // Sanity check for object size: it should fit into the memory block. 689 if((byte*)obj + tisize > objstart + s->elemsize) { 690 runtime_printf("object of type '%S' at %p/%p does not fit in block %p/%p\n", 691 *t->string, obj, tisize, objstart, s->elemsize); 692 runtime_throw("invalid gc type info"); 693 } 694 if(obj != objstart) 695 return; 696 // If obj points to the beginning of the memory block, 697 // check type info as well. 698 if(t->string == nil || 699 // Gob allocates unsafe pointers for indirection. 700 (runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") && 701 // Runtime and gc think differently about closures. 702 runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) { 703 pc1 = (uintptr*)objti; 704 pc2 = (const uintptr*)t->__gc; 705 // A simple best-effort check until first GC_END. 706 for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) { 707 if(pc1[j] != pc2[j]) { 708 runtime_printf("invalid gc type info for '%s', type info %p [%d]=%p, block info %p [%d]=%p\n", 709 t->string ? (const int8*)t->string->str : (const int8*)"?", pc1, (int32)j, pc1[j], pc2, (int32)j, pc2[j]); 710 runtime_throw("invalid gc type info"); 711 } 712 } 713 } 714 } 715 716 // scanblock scans a block of n bytes starting at pointer b for references 717 // to other objects, scanning any it finds recursively until there are no 718 // unscanned objects left. Instead of using an explicit recursion, it keeps 719 // a work list in the Workbuf* structures and loops in the main function 720 // body. Keeping an explicit work list is easier on the stack allocator and 721 // more efficient. 722 static void 723 scanblock(Workbuf *wbuf, bool keepworking) 724 { 725 byte *b, *arena_start, *arena_used; 726 uintptr n, i, end_b, elemsize, size, ti, objti, count, type, nobj; 727 uintptr precise_type, nominal_size; 728 const uintptr *pc, *chan_ret; 729 uintptr chancap; 730 void *obj; 731 const Type *t, *et; 732 Slice *sliceptr; 733 String *stringptr; 734 Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4]; 735 BufferList *scanbuffers; 736 Scanbuf sbuf; 737 Eface *eface; 738 Iface *iface; 739 Hchan *chan; 740 const ChanType *chantype; 741 Obj *wp; 742 743 if(sizeof(Workbuf) % WorkbufSize != 0) 744 runtime_throw("scanblock: size of Workbuf is suboptimal"); 745 746 // Memory arena parameters. 747 arena_start = runtime_mheap.arena_start; 748 arena_used = runtime_mheap.arena_used; 749 750 stack_ptr = stack+nelem(stack)-1; 751 752 precise_type = false; 753 nominal_size = 0; 754 755 if(wbuf) { 756 nobj = wbuf->nobj; 757 wp = &wbuf->obj[nobj]; 758 } else { 759 nobj = 0; 760 wp = nil; 761 } 762 763 // Initialize sbuf 764 scanbuffers = &bufferList[runtime_m()->helpgc]; 765 766 sbuf.ptr.begin = sbuf.ptr.pos = &scanbuffers->ptrtarget[0]; 767 sbuf.ptr.end = sbuf.ptr.begin + nelem(scanbuffers->ptrtarget); 768 769 sbuf.obj.begin = sbuf.obj.pos = &scanbuffers->obj[0]; 770 sbuf.obj.end = sbuf.obj.begin + nelem(scanbuffers->obj); 771 772 sbuf.wbuf = wbuf; 773 sbuf.wp = wp; 774 sbuf.nobj = nobj; 775 776 // (Silence the compiler) 777 chan = nil; 778 chantype = nil; 779 chan_ret = nil; 780 781 goto next_block; 782 783 for(;;) { 784 // Each iteration scans the block b of length n, queueing pointers in 785 // the work buffer. 786 787 if(CollectStats) { 788 runtime_xadd64(&gcstats.nbytes, n); 789 runtime_xadd64(&gcstats.obj.sum, sbuf.nobj); 790 runtime_xadd64(&gcstats.obj.cnt, 1); 791 } 792 793 if(ti != 0) { 794 if(Debug > 1) { 795 runtime_printf("scanblock %p %D ti %p\n", b, (int64)n, ti); 796 } 797 pc = (uintptr*)(ti & ~(uintptr)PC_BITS); 798 precise_type = (ti & PRECISE); 799 stack_top.elemsize = pc[0]; 800 if(!precise_type) 801 nominal_size = pc[0]; 802 if(ti & LOOP) { 803 stack_top.count = 0; // 0 means an infinite number of iterations 804 stack_top.loop_or_ret = pc+1; 805 } else { 806 stack_top.count = 1; 807 } 808 if(Debug) { 809 // Simple sanity check for provided type info ti: 810 // The declared size of the object must be not larger than the actual size 811 // (it can be smaller due to inferior pointers). 812 // It's difficult to make a comprehensive check due to inferior pointers, 813 // reflection, gob, etc. 814 if(pc[0] > n) { 815 runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n); 816 runtime_throw("invalid gc type info"); 817 } 818 } 819 } else if(UseSpanType) { 820 if(CollectStats) 821 runtime_xadd64(&gcstats.obj.notype, 1); 822 823 type = runtime_gettype(b); 824 if(type != 0) { 825 if(CollectStats) 826 runtime_xadd64(&gcstats.obj.typelookup, 1); 827 828 t = (Type*)(type & ~(uintptr)(PtrSize-1)); 829 switch(type & (PtrSize-1)) { 830 case TypeInfo_SingleObject: 831 pc = (const uintptr*)t->__gc; 832 precise_type = true; // type information about 'b' is precise 833 stack_top.count = 1; 834 stack_top.elemsize = pc[0]; 835 break; 836 case TypeInfo_Array: 837 pc = (const uintptr*)t->__gc; 838 if(pc[0] == 0) 839 goto next_block; 840 precise_type = true; // type information about 'b' is precise 841 stack_top.count = 0; // 0 means an infinite number of iterations 842 stack_top.elemsize = pc[0]; 843 stack_top.loop_or_ret = pc+1; 844 break; 845 case TypeInfo_Chan: 846 chan = (Hchan*)b; 847 chantype = (const ChanType*)t; 848 chan_ret = nil; 849 pc = chanProg; 850 break; 851 default: 852 if(Debug > 1) 853 runtime_printf("scanblock %p %D type %p %S\n", b, (int64)n, type, *t->string); 854 runtime_throw("scanblock: invalid type"); 855 return; 856 } 857 if(Debug > 1) 858 runtime_printf("scanblock %p %D type %p %S pc=%p\n", b, (int64)n, type, *t->string, pc); 859 } else { 860 pc = defaultProg; 861 if(Debug > 1) 862 runtime_printf("scanblock %p %D unknown type\n", b, (int64)n); 863 } 864 } else { 865 pc = defaultProg; 866 if(Debug > 1) 867 runtime_printf("scanblock %p %D no span types\n", b, (int64)n); 868 } 869 870 if(IgnorePreciseGC) 871 pc = defaultProg; 872 873 pc++; 874 stack_top.b = (uintptr)b; 875 end_b = (uintptr)b + n - PtrSize; 876 877 for(;;) { 878 if(CollectStats) 879 runtime_xadd64(&gcstats.instr[pc[0]], 1); 880 881 obj = nil; 882 objti = 0; 883 switch(pc[0]) { 884 case GC_PTR: 885 obj = *(void**)(stack_top.b + pc[1]); 886 objti = pc[2]; 887 if(Debug > 2) 888 runtime_printf("gc_ptr @%p: %p ti=%p\n", stack_top.b+pc[1], obj, objti); 889 pc += 3; 890 if(Debug) 891 checkptr(obj, objti); 892 break; 893 894 case GC_SLICE: 895 sliceptr = (Slice*)(stack_top.b + pc[1]); 896 if(Debug > 2) 897 runtime_printf("gc_slice @%p: %p/%D/%D\n", sliceptr, sliceptr->array, (int64)sliceptr->__count, (int64)sliceptr->cap); 898 if(sliceptr->cap != 0) { 899 obj = sliceptr->array; 900 // Can't use slice element type for scanning, 901 // because if it points to an array embedded 902 // in the beginning of a struct, 903 // we will scan the whole struct as the slice. 904 // So just obtain type info from heap. 905 } 906 pc += 3; 907 break; 908 909 case GC_APTR: 910 obj = *(void**)(stack_top.b + pc[1]); 911 if(Debug > 2) 912 runtime_printf("gc_aptr @%p: %p\n", stack_top.b+pc[1], obj); 913 pc += 2; 914 break; 915 916 case GC_STRING: 917 stringptr = (String*)(stack_top.b + pc[1]); 918 if(Debug > 2) 919 runtime_printf("gc_string @%p: %p/%D\n", stack_top.b+pc[1], stringptr->str, (int64)stringptr->len); 920 if(stringptr->len != 0) 921 markonly(stringptr->str); 922 pc += 2; 923 continue; 924 925 case GC_EFACE: 926 eface = (Eface*)(stack_top.b + pc[1]); 927 pc += 2; 928 if(Debug > 2) 929 runtime_printf("gc_eface @%p: %p %p\n", stack_top.b+pc[1], eface->__type_descriptor, eface->__object); 930 if(eface->__type_descriptor == nil) 931 continue; 932 933 // eface->type 934 t = eface->__type_descriptor; 935 if((const byte*)t >= arena_start && (const byte*)t < arena_used) { 936 union { const Type *tc; Type *tr; } u; 937 u.tc = t; 938 *sbuf.ptr.pos++ = (PtrTarget){u.tr, 0}; 939 if(sbuf.ptr.pos == sbuf.ptr.end) 940 flushptrbuf(&sbuf); 941 } 942 943 // eface->__object 944 if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) { 945 if(__go_is_pointer_type(t)) { 946 if((t->__code & KindNoPointers)) 947 continue; 948 949 obj = eface->__object; 950 if((t->__code & kindMask) == KindPtr) { 951 // Only use type information if it is a pointer-containing type. 952 // This matches the GC programs written by cmd/gc/reflect.c's 953 // dgcsym1 in case TPTR32/case TPTR64. See rationale there. 954 et = ((const PtrType*)t)->elem; 955 if(!(et->__code & KindNoPointers)) 956 objti = (uintptr)((const PtrType*)t)->elem->__gc; 957 } 958 } else { 959 obj = eface->__object; 960 objti = (uintptr)t->__gc; 961 } 962 } 963 break; 964 965 case GC_IFACE: 966 iface = (Iface*)(stack_top.b + pc[1]); 967 pc += 2; 968 if(Debug > 2) 969 runtime_printf("gc_iface @%p: %p/%p %p\n", stack_top.b+pc[1], iface->__methods[0], nil, iface->__object); 970 if(iface->tab == nil) 971 continue; 972 973 // iface->tab 974 if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) { 975 *sbuf.ptr.pos++ = (PtrTarget){iface->tab, 0}; 976 if(sbuf.ptr.pos == sbuf.ptr.end) 977 flushptrbuf(&sbuf); 978 } 979 980 // iface->data 981 if((byte*)iface->__object >= arena_start && (byte*)iface->__object < arena_used) { 982 t = (const Type*)iface->tab[0]; 983 if(__go_is_pointer_type(t)) { 984 if((t->__code & KindNoPointers)) 985 continue; 986 987 obj = iface->__object; 988 if((t->__code & kindMask) == KindPtr) { 989 // Only use type information if it is a pointer-containing type. 990 // This matches the GC programs written by cmd/gc/reflect.c's 991 // dgcsym1 in case TPTR32/case TPTR64. See rationale there. 992 et = ((const PtrType*)t)->elem; 993 if(!(et->__code & KindNoPointers)) 994 objti = (uintptr)((const PtrType*)t)->elem->__gc; 995 } 996 } else { 997 obj = iface->__object; 998 objti = (uintptr)t->__gc; 999 } 1000 } 1001 break; 1002 1003 case GC_DEFAULT_PTR: 1004 while(stack_top.b <= end_b) { 1005 obj = *(byte**)stack_top.b; 1006 if(Debug > 2) 1007 runtime_printf("gc_default_ptr @%p: %p\n", stack_top.b, obj); 1008 stack_top.b += PtrSize; 1009 if((byte*)obj >= arena_start && (byte*)obj < arena_used) { 1010 *sbuf.ptr.pos++ = (PtrTarget){obj, 0}; 1011 if(sbuf.ptr.pos == sbuf.ptr.end) 1012 flushptrbuf(&sbuf); 1013 } 1014 } 1015 goto next_block; 1016 1017 case GC_END: 1018 if(--stack_top.count != 0) { 1019 // Next iteration of a loop if possible. 1020 stack_top.b += stack_top.elemsize; 1021 if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) { 1022 pc = stack_top.loop_or_ret; 1023 continue; 1024 } 1025 i = stack_top.b; 1026 } else { 1027 // Stack pop if possible. 1028 if(stack_ptr+1 < stack+nelem(stack)) { 1029 pc = stack_top.loop_or_ret; 1030 stack_top = *(++stack_ptr); 1031 continue; 1032 } 1033 i = (uintptr)b + nominal_size; 1034 } 1035 if(!precise_type) { 1036 // Quickly scan [b+i,b+n) for possible pointers. 1037 for(; i<=end_b; i+=PtrSize) { 1038 if(*(byte**)i != nil) { 1039 // Found a value that may be a pointer. 1040 // Do a rescan of the entire block. 1041 enqueue((Obj){b, n, 0}, &sbuf.wbuf, &sbuf.wp, &sbuf.nobj); 1042 if(CollectStats) { 1043 runtime_xadd64(&gcstats.rescan, 1); 1044 runtime_xadd64(&gcstats.rescanbytes, n); 1045 } 1046 break; 1047 } 1048 } 1049 } 1050 goto next_block; 1051 1052 case GC_ARRAY_START: 1053 i = stack_top.b + pc[1]; 1054 count = pc[2]; 1055 elemsize = pc[3]; 1056 pc += 4; 1057 1058 // Stack push. 1059 *stack_ptr-- = stack_top; 1060 stack_top = (Frame){count, elemsize, i, pc}; 1061 continue; 1062 1063 case GC_ARRAY_NEXT: 1064 if(--stack_top.count != 0) { 1065 stack_top.b += stack_top.elemsize; 1066 pc = stack_top.loop_or_ret; 1067 } else { 1068 // Stack pop. 1069 stack_top = *(++stack_ptr); 1070 pc += 1; 1071 } 1072 continue; 1073 1074 case GC_CALL: 1075 // Stack push. 1076 *stack_ptr-- = stack_top; 1077 stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/}; 1078 pc = (const uintptr*)((const byte*)pc + *(const int32*)(pc+2)); // target of the CALL instruction 1079 continue; 1080 1081 case GC_REGION: 1082 obj = (void*)(stack_top.b + pc[1]); 1083 size = pc[2]; 1084 objti = pc[3]; 1085 pc += 4; 1086 1087 if(Debug > 2) 1088 runtime_printf("gc_region @%p: %D %p\n", stack_top.b+pc[1], (int64)size, objti); 1089 *sbuf.obj.pos++ = (Obj){obj, size, objti}; 1090 if(sbuf.obj.pos == sbuf.obj.end) 1091 flushobjbuf(&sbuf); 1092 continue; 1093 1094 case GC_CHAN_PTR: 1095 chan = *(Hchan**)(stack_top.b + pc[1]); 1096 if(Debug > 2 && chan != nil) 1097 runtime_printf("gc_chan_ptr @%p: %p/%D/%D %p\n", stack_top.b+pc[1], chan, (int64)chan->qcount, (int64)chan->dataqsiz, pc[2]); 1098 if(chan == nil) { 1099 pc += 3; 1100 continue; 1101 } 1102 if(markonly(chan)) { 1103 chantype = (ChanType*)pc[2]; 1104 if(!(chantype->elem->__code & KindNoPointers)) { 1105 // Start chanProg. 1106 chan_ret = pc+3; 1107 pc = chanProg+1; 1108 continue; 1109 } 1110 } 1111 pc += 3; 1112 continue; 1113 1114 case GC_CHAN: 1115 // There are no heap pointers in struct Hchan, 1116 // so we can ignore the leading sizeof(Hchan) bytes. 1117 if(!(chantype->elem->__code & KindNoPointers)) { 1118 // Channel's buffer follows Hchan immediately in memory. 1119 // Size of buffer (cap(c)) is second int in the chan struct. 1120 chancap = ((uintgo*)chan)[1]; 1121 if(chancap > 0) { 1122 // TODO(atom): split into two chunks so that only the 1123 // in-use part of the circular buffer is scanned. 1124 // (Channel routines zero the unused part, so the current 1125 // code does not lead to leaks, it's just a little inefficient.) 1126 *sbuf.obj.pos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->__size, 1127 (uintptr)chantype->elem->__gc | PRECISE | LOOP}; 1128 if(sbuf.obj.pos == sbuf.obj.end) 1129 flushobjbuf(&sbuf); 1130 } 1131 } 1132 if(chan_ret == nil) 1133 goto next_block; 1134 pc = chan_ret; 1135 continue; 1136 1137 default: 1138 runtime_printf("runtime: invalid GC instruction %p at %p\n", pc[0], pc); 1139 runtime_throw("scanblock: invalid GC instruction"); 1140 return; 1141 } 1142 1143 if((byte*)obj >= arena_start && (byte*)obj < arena_used) { 1144 *sbuf.ptr.pos++ = (PtrTarget){obj, objti}; 1145 if(sbuf.ptr.pos == sbuf.ptr.end) 1146 flushptrbuf(&sbuf); 1147 } 1148 } 1149 1150 next_block: 1151 // Done scanning [b, b+n). Prepare for the next iteration of 1152 // the loop by setting b, n, ti to the parameters for the next block. 1153 1154 if(sbuf.nobj == 0) { 1155 flushptrbuf(&sbuf); 1156 flushobjbuf(&sbuf); 1157 1158 if(sbuf.nobj == 0) { 1159 if(!keepworking) { 1160 if(sbuf.wbuf) 1161 putempty(sbuf.wbuf); 1162 return; 1163 } 1164 // Emptied our buffer: refill. 1165 sbuf.wbuf = getfull(sbuf.wbuf); 1166 if(sbuf.wbuf == nil) 1167 return; 1168 sbuf.nobj = sbuf.wbuf->nobj; 1169 sbuf.wp = sbuf.wbuf->obj + sbuf.wbuf->nobj; 1170 } 1171 } 1172 1173 // Fetch b from the work buffer. 1174 --sbuf.wp; 1175 b = sbuf.wp->p; 1176 n = sbuf.wp->n; 1177 ti = sbuf.wp->ti; 1178 sbuf.nobj--; 1179 } 1180 } 1181 1182 static struct root_list* roots; 1183 1184 void 1185 __go_register_gc_roots (struct root_list* r) 1186 { 1187 // FIXME: This needs locking if multiple goroutines can call 1188 // dlopen simultaneously. 1189 r->next = roots; 1190 roots = r; 1191 } 1192 1193 // Append obj to the work buffer. 1194 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer. 1195 static void 1196 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj) 1197 { 1198 uintptr nobj, off; 1199 Obj *wp; 1200 Workbuf *wbuf; 1201 1202 if(Debug > 1) 1203 runtime_printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti); 1204 1205 // Align obj.b to a word boundary. 1206 off = (uintptr)obj.p & (PtrSize-1); 1207 if(off != 0) { 1208 obj.p += PtrSize - off; 1209 obj.n -= PtrSize - off; 1210 obj.ti = 0; 1211 } 1212 1213 if(obj.p == nil || obj.n == 0) 1214 return; 1215 1216 // Load work buffer state 1217 wp = *_wp; 1218 wbuf = *_wbuf; 1219 nobj = *_nobj; 1220 1221 // If another proc wants a pointer, give it some. 1222 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { 1223 wbuf->nobj = nobj; 1224 wbuf = handoff(wbuf); 1225 nobj = wbuf->nobj; 1226 wp = wbuf->obj + nobj; 1227 } 1228 1229 // If buffer is full, get a new one. 1230 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 1231 if(wbuf != nil) 1232 wbuf->nobj = nobj; 1233 wbuf = getempty(wbuf); 1234 wp = wbuf->obj; 1235 nobj = 0; 1236 } 1237 1238 *wp = obj; 1239 wp++; 1240 nobj++; 1241 1242 // Save work buffer state 1243 *_wp = wp; 1244 *_wbuf = wbuf; 1245 *_nobj = nobj; 1246 } 1247 1248 static void 1249 enqueue1(Workbuf **wbufp, Obj obj) 1250 { 1251 Workbuf *wbuf; 1252 1253 wbuf = *wbufp; 1254 if(wbuf->nobj >= nelem(wbuf->obj)) 1255 *wbufp = wbuf = getempty(wbuf); 1256 wbuf->obj[wbuf->nobj++] = obj; 1257 } 1258 1259 static void 1260 markroot(ParFor *desc, uint32 i) 1261 { 1262 Workbuf *wbuf; 1263 FinBlock *fb; 1264 MHeap *h; 1265 MSpan **allspans, *s; 1266 uint32 spanidx, sg; 1267 G *gp; 1268 void *p; 1269 1270 USED(&desc); 1271 wbuf = getempty(nil); 1272 // Note: if you add a case here, please also update heapdump.c:dumproots. 1273 switch(i) { 1274 case RootData: 1275 // For gccgo this is both data and bss. 1276 { 1277 struct root_list *pl; 1278 1279 for(pl = roots; pl != nil; pl = pl->next) { 1280 struct root *pr = &pl->roots[0]; 1281 while(1) { 1282 void *decl = pr->decl; 1283 if(decl == nil) 1284 break; 1285 enqueue1(&wbuf, (Obj){decl, pr->size, 0}); 1286 pr++; 1287 } 1288 } 1289 } 1290 break; 1291 1292 case RootBss: 1293 // For gccgo we use this for all the other global roots. 1294 enqueue1(&wbuf, (Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0}); 1295 enqueue1(&wbuf, (Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0}); 1296 enqueue1(&wbuf, (Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0}); 1297 enqueue1(&wbuf, (Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0}); 1298 enqueue1(&wbuf, (Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0}); 1299 enqueue1(&wbuf, (Obj){(byte*)&work, sizeof work, 0}); 1300 runtime_proc_scan(&wbuf, enqueue1); 1301 runtime_MProf_Mark(&wbuf, enqueue1); 1302 runtime_time_scan(&wbuf, enqueue1); 1303 runtime_netpoll_scan(&wbuf, enqueue1); 1304 break; 1305 1306 case RootFinalizers: 1307 for(fb=allfin; fb; fb=fb->alllink) 1308 enqueue1(&wbuf, (Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0}); 1309 break; 1310 1311 case RootSpanTypes: 1312 // mark span types and MSpan.specials (to walk spans only once) 1313 h = &runtime_mheap; 1314 sg = h->sweepgen; 1315 allspans = h->allspans; 1316 for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) { 1317 Special *sp; 1318 SpecialFinalizer *spf; 1319 1320 s = allspans[spanidx]; 1321 if(s->sweepgen != sg) { 1322 runtime_printf("sweep %d %d\n", s->sweepgen, sg); 1323 runtime_throw("gc: unswept span"); 1324 } 1325 if(s->state != MSpanInUse) 1326 continue; 1327 // The garbage collector ignores type pointers stored in MSpan.types: 1328 // - Compiler-generated types are stored outside of heap. 1329 // - The reflect package has runtime-generated types cached in its data structures. 1330 // The garbage collector relies on finding the references via that cache. 1331 if(s->types.compression == MTypes_Words || s->types.compression == MTypes_Bytes) 1332 markonly((byte*)s->types.data); 1333 for(sp = s->specials; sp != nil; sp = sp->next) { 1334 if(sp->kind != KindSpecialFinalizer) 1335 continue; 1336 // don't mark finalized object, but scan it so we 1337 // retain everything it points to. 1338 spf = (SpecialFinalizer*)sp; 1339 // A finalizer can be set for an inner byte of an object, find object beginning. 1340 p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize); 1341 enqueue1(&wbuf, (Obj){p, s->elemsize, 0}); 1342 enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0}); 1343 enqueue1(&wbuf, (Obj){(void*)&spf->ft, PtrSize, 0}); 1344 enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0}); 1345 } 1346 } 1347 break; 1348 1349 case RootFlushCaches: 1350 flushallmcaches(); 1351 break; 1352 1353 default: 1354 // the rest is scanning goroutine stacks 1355 if(i - RootCount >= runtime_allglen) 1356 runtime_throw("markroot: bad index"); 1357 gp = runtime_allg[i - RootCount]; 1358 // remember when we've first observed the G blocked 1359 // needed only to output in traceback 1360 if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0) 1361 gp->waitsince = work.tstart; 1362 addstackroots(gp, &wbuf); 1363 break; 1364 1365 } 1366 1367 if(wbuf) 1368 scanblock(wbuf, false); 1369 } 1370 1371 static const FuncVal markroot_funcval = { (void *) markroot }; 1372 1373 // Get an empty work buffer off the work.empty list, 1374 // allocating new buffers as needed. 1375 static Workbuf* 1376 getempty(Workbuf *b) 1377 { 1378 if(b != nil) 1379 runtime_lfstackpush(&work.full, &b->node); 1380 b = (Workbuf*)runtime_lfstackpop(&work.empty); 1381 if(b == nil) { 1382 // Need to allocate. 1383 runtime_lock(&work.lock); 1384 if(work.nchunk < sizeof *b) { 1385 work.nchunk = 1<<20; 1386 work.chunk = runtime_SysAlloc(work.nchunk, &mstats.gc_sys); 1387 if(work.chunk == nil) 1388 runtime_throw("runtime: cannot allocate memory"); 1389 } 1390 b = (Workbuf*)work.chunk; 1391 work.chunk += sizeof *b; 1392 work.nchunk -= sizeof *b; 1393 runtime_unlock(&work.lock); 1394 } 1395 b->nobj = 0; 1396 return b; 1397 } 1398 1399 static void 1400 putempty(Workbuf *b) 1401 { 1402 if(CollectStats) 1403 runtime_xadd64(&gcstats.putempty, 1); 1404 1405 runtime_lfstackpush(&work.empty, &b->node); 1406 } 1407 1408 // Get a full work buffer off the work.full list, or return nil. 1409 static Workbuf* 1410 getfull(Workbuf *b) 1411 { 1412 M *m; 1413 int32 i; 1414 1415 if(CollectStats) 1416 runtime_xadd64(&gcstats.getfull, 1); 1417 1418 if(b != nil) 1419 runtime_lfstackpush(&work.empty, &b->node); 1420 b = (Workbuf*)runtime_lfstackpop(&work.full); 1421 if(b != nil || work.nproc == 1) 1422 return b; 1423 1424 m = runtime_m(); 1425 runtime_xadd(&work.nwait, +1); 1426 for(i=0;; i++) { 1427 if(work.full != 0) { 1428 runtime_xadd(&work.nwait, -1); 1429 b = (Workbuf*)runtime_lfstackpop(&work.full); 1430 if(b != nil) 1431 return b; 1432 runtime_xadd(&work.nwait, +1); 1433 } 1434 if(work.nwait == work.nproc) 1435 return nil; 1436 if(i < 10) { 1437 m->gcstats.nprocyield++; 1438 runtime_procyield(20); 1439 } else if(i < 20) { 1440 m->gcstats.nosyield++; 1441 runtime_osyield(); 1442 } else { 1443 m->gcstats.nsleep++; 1444 runtime_usleep(100); 1445 } 1446 } 1447 } 1448 1449 static Workbuf* 1450 handoff(Workbuf *b) 1451 { 1452 M *m; 1453 int32 n; 1454 Workbuf *b1; 1455 1456 m = runtime_m(); 1457 1458 // Make new buffer with half of b's pointers. 1459 b1 = getempty(nil); 1460 n = b->nobj/2; 1461 b->nobj -= n; 1462 b1->nobj = n; 1463 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); 1464 m->gcstats.nhandoff++; 1465 m->gcstats.nhandoffcnt += n; 1466 1467 // Put b on full list - let first half of b get stolen. 1468 runtime_lfstackpush(&work.full, &b->node); 1469 return b1; 1470 } 1471 1472 static void 1473 addstackroots(G *gp, Workbuf **wbufp) 1474 { 1475 switch(gp->status){ 1476 default: 1477 runtime_printf("unexpected G.status %d (goroutine %p %D)\n", gp->status, gp, gp->goid); 1478 runtime_throw("mark - bad status"); 1479 case Gdead: 1480 return; 1481 case Grunning: 1482 runtime_throw("mark - world not stopped"); 1483 case Grunnable: 1484 case Gsyscall: 1485 case Gwaiting: 1486 break; 1487 } 1488 1489 #ifdef USING_SPLIT_STACK 1490 M *mp; 1491 void* sp; 1492 size_t spsize; 1493 void* next_segment; 1494 void* next_sp; 1495 void* initial_sp; 1496 1497 if(gp == runtime_g()) { 1498 // Scanning our own stack. 1499 sp = __splitstack_find(nil, nil, &spsize, &next_segment, 1500 &next_sp, &initial_sp); 1501 } else if((mp = gp->m) != nil && mp->helpgc) { 1502 // gchelper's stack is in active use and has no interesting pointers. 1503 return; 1504 } else { 1505 // Scanning another goroutine's stack. 1506 // The goroutine is usually asleep (the world is stopped). 1507 1508 // The exception is that if the goroutine is about to enter or might 1509 // have just exited a system call, it may be executing code such 1510 // as schedlock and may have needed to start a new stack segment. 1511 // Use the stack segment and stack pointer at the time of 1512 // the system call instead, since that won't change underfoot. 1513 if(gp->gcstack != nil) { 1514 sp = gp->gcstack; 1515 spsize = gp->gcstack_size; 1516 next_segment = gp->gcnext_segment; 1517 next_sp = gp->gcnext_sp; 1518 initial_sp = gp->gcinitial_sp; 1519 } else { 1520 sp = __splitstack_find_context(&gp->stack_context[0], 1521 &spsize, &next_segment, 1522 &next_sp, &initial_sp); 1523 } 1524 } 1525 if(sp != nil) { 1526 enqueue1(wbufp, (Obj){sp, spsize, 0}); 1527 while((sp = __splitstack_find(next_segment, next_sp, 1528 &spsize, &next_segment, 1529 &next_sp, &initial_sp)) != nil) 1530 enqueue1(wbufp, (Obj){sp, spsize, 0}); 1531 } 1532 #else 1533 M *mp; 1534 byte* bottom; 1535 byte* top; 1536 1537 if(gp == runtime_g()) { 1538 // Scanning our own stack. 1539 bottom = (byte*)&gp; 1540 } else if((mp = gp->m) != nil && mp->helpgc) { 1541 // gchelper's stack is in active use and has no interesting pointers. 1542 return; 1543 } else { 1544 // Scanning another goroutine's stack. 1545 // The goroutine is usually asleep (the world is stopped). 1546 bottom = (byte*)gp->gcnext_sp; 1547 if(bottom == nil) 1548 return; 1549 } 1550 top = (byte*)gp->gcinitial_sp + gp->gcstack_size; 1551 if(top > bottom) 1552 enqueue1(wbufp, (Obj){bottom, top - bottom, 0}); 1553 else 1554 enqueue1(wbufp, (Obj){top, bottom - top, 0}); 1555 #endif 1556 } 1557 1558 void 1559 runtime_queuefinalizer(void *p, FuncVal *fn, const FuncType *ft, const PtrType *ot) 1560 { 1561 FinBlock *block; 1562 Finalizer *f; 1563 1564 runtime_lock(&finlock); 1565 if(finq == nil || finq->cnt == finq->cap) { 1566 if(finc == nil) { 1567 finc = runtime_persistentalloc(FinBlockSize, 0, &mstats.gc_sys); 1568 finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; 1569 finc->alllink = allfin; 1570 allfin = finc; 1571 } 1572 block = finc; 1573 finc = block->next; 1574 block->next = finq; 1575 finq = block; 1576 } 1577 f = &finq->fin[finq->cnt]; 1578 finq->cnt++; 1579 f->fn = fn; 1580 f->ft = ft; 1581 f->ot = ot; 1582 f->arg = p; 1583 runtime_fingwake = true; 1584 runtime_unlock(&finlock); 1585 } 1586 1587 void 1588 runtime_iterate_finq(void (*callback)(FuncVal*, void*, const FuncType*, const PtrType*)) 1589 { 1590 FinBlock *fb; 1591 Finalizer *f; 1592 int32 i; 1593 1594 for(fb = allfin; fb; fb = fb->alllink) { 1595 for(i = 0; i < fb->cnt; i++) { 1596 f = &fb->fin[i]; 1597 callback(f->fn, f->arg, f->ft, f->ot); 1598 } 1599 } 1600 } 1601 1602 void 1603 runtime_MSpan_EnsureSwept(MSpan *s) 1604 { 1605 M *m = runtime_m(); 1606 G *g = runtime_g(); 1607 uint32 sg; 1608 1609 // Caller must disable preemption. 1610 // Otherwise when this function returns the span can become unswept again 1611 // (if GC is triggered on another goroutine). 1612 if(m->locks == 0 && m->mallocing == 0 && g != m->g0) 1613 runtime_throw("MSpan_EnsureSwept: m is not locked"); 1614 1615 sg = runtime_mheap.sweepgen; 1616 if(runtime_atomicload(&s->sweepgen) == sg) 1617 return; 1618 if(runtime_cas(&s->sweepgen, sg-2, sg-1)) { 1619 runtime_MSpan_Sweep(s); 1620 return; 1621 } 1622 // unfortunate condition, and we don't have efficient means to wait 1623 while(runtime_atomicload(&s->sweepgen) != sg) 1624 runtime_osyield(); 1625 } 1626 1627 // Sweep frees or collects finalizers for blocks not marked in the mark phase. 1628 // It clears the mark bits in preparation for the next GC round. 1629 // Returns true if the span was returned to heap. 1630 bool 1631 runtime_MSpan_Sweep(MSpan *s) 1632 { 1633 M *m; 1634 int32 cl, n, npages, nfree; 1635 uintptr size, off, *bitp, shift, bits; 1636 uint32 sweepgen; 1637 byte *p; 1638 MCache *c; 1639 byte *arena_start; 1640 MLink head, *end; 1641 byte *type_data; 1642 byte compression; 1643 uintptr type_data_inc; 1644 MLink *x; 1645 Special *special, **specialp, *y; 1646 bool res, sweepgenset; 1647 1648 m = runtime_m(); 1649 1650 // It's critical that we enter this function with preemption disabled, 1651 // GC must not start while we are in the middle of this function. 1652 if(m->locks == 0 && m->mallocing == 0 && runtime_g() != m->g0) 1653 runtime_throw("MSpan_Sweep: m is not locked"); 1654 sweepgen = runtime_mheap.sweepgen; 1655 if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) { 1656 runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n", 1657 s->state, s->sweepgen, sweepgen); 1658 runtime_throw("MSpan_Sweep: bad span state"); 1659 } 1660 arena_start = runtime_mheap.arena_start; 1661 cl = s->sizeclass; 1662 size = s->elemsize; 1663 if(cl == 0) { 1664 n = 1; 1665 } else { 1666 // Chunk full of small blocks. 1667 npages = runtime_class_to_allocnpages[cl]; 1668 n = (npages << PageShift) / size; 1669 } 1670 res = false; 1671 nfree = 0; 1672 end = &head; 1673 c = m->mcache; 1674 sweepgenset = false; 1675 1676 // mark any free objects in this span so we don't collect them 1677 for(x = s->freelist; x != nil; x = x->next) { 1678 // This is markonly(x) but faster because we don't need 1679 // atomic access and we're guaranteed to be pointing at 1680 // the head of a valid object. 1681 off = (uintptr*)x - (uintptr*)runtime_mheap.arena_start; 1682 bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 1683 shift = off % wordsPerBitmapWord; 1684 *bitp |= bitMarked<<shift; 1685 } 1686 1687 // Unlink & free special records for any objects we're about to free. 1688 specialp = &s->specials; 1689 special = *specialp; 1690 while(special != nil) { 1691 // A finalizer can be set for an inner byte of an object, find object beginning. 1692 p = (byte*)(s->start << PageShift) + special->offset/size*size; 1693 off = (uintptr*)p - (uintptr*)arena_start; 1694 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1695 shift = off % wordsPerBitmapWord; 1696 bits = *bitp>>shift; 1697 if((bits & (bitAllocated|bitMarked)) == bitAllocated) { 1698 // Find the exact byte for which the special was setup 1699 // (as opposed to object beginning). 1700 p = (byte*)(s->start << PageShift) + special->offset; 1701 // about to free object: splice out special record 1702 y = special; 1703 special = special->next; 1704 *specialp = special; 1705 if(!runtime_freespecial(y, p, size, false)) { 1706 // stop freeing of object if it has a finalizer 1707 *bitp |= bitMarked << shift; 1708 } 1709 } else { 1710 // object is still live: keep special record 1711 specialp = &special->next; 1712 special = *specialp; 1713 } 1714 } 1715 1716 type_data = (byte*)s->types.data; 1717 type_data_inc = sizeof(uintptr); 1718 compression = s->types.compression; 1719 switch(compression) { 1720 case MTypes_Bytes: 1721 type_data += 8*sizeof(uintptr); 1722 type_data_inc = 1; 1723 break; 1724 } 1725 1726 // Sweep through n objects of given size starting at p. 1727 // This thread owns the span now, so it can manipulate 1728 // the block bitmap without atomic operations. 1729 p = (byte*)(s->start << PageShift); 1730 for(; n > 0; n--, p += size, type_data+=type_data_inc) { 1731 off = (uintptr*)p - (uintptr*)arena_start; 1732 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1733 shift = off % wordsPerBitmapWord; 1734 bits = *bitp>>shift; 1735 1736 if((bits & bitAllocated) == 0) 1737 continue; 1738 1739 if((bits & bitMarked) != 0) { 1740 *bitp &= ~(bitMarked<<shift); 1741 continue; 1742 } 1743 1744 if(runtime_debug.allocfreetrace) 1745 runtime_tracefree(p, size); 1746 1747 // Clear mark and scan bits. 1748 *bitp &= ~((bitScan|bitMarked)<<shift); 1749 1750 if(cl == 0) { 1751 // Free large span. 1752 runtime_unmarkspan(p, 1<<PageShift); 1753 s->needzero = 1; 1754 // important to set sweepgen before returning it to heap 1755 runtime_atomicstore(&s->sweepgen, sweepgen); 1756 sweepgenset = true; 1757 // See note about SysFault vs SysFree in malloc.goc. 1758 if(runtime_debug.efence) 1759 runtime_SysFault(p, size); 1760 else 1761 runtime_MHeap_Free(&runtime_mheap, s, 1); 1762 c->local_nlargefree++; 1763 c->local_largefree += size; 1764 runtime_xadd64(&mstats.next_gc, -(uint64)(size * (gcpercent + 100)/100)); 1765 res = true; 1766 } else { 1767 // Free small object. 1768 switch(compression) { 1769 case MTypes_Words: 1770 *(uintptr*)type_data = 0; 1771 break; 1772 case MTypes_Bytes: 1773 *(byte*)type_data = 0; 1774 break; 1775 } 1776 if(size > 2*sizeof(uintptr)) 1777 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed" 1778 else if(size > sizeof(uintptr)) 1779 ((uintptr*)p)[1] = 0; 1780 1781 end->next = (MLink*)p; 1782 end = (MLink*)p; 1783 nfree++; 1784 } 1785 } 1786 1787 // We need to set s->sweepgen = h->sweepgen only when all blocks are swept, 1788 // because of the potential for a concurrent free/SetFinalizer. 1789 // But we need to set it before we make the span available for allocation 1790 // (return it to heap or mcentral), because allocation code assumes that a 1791 // span is already swept if available for allocation. 1792 1793 if(!sweepgenset && nfree == 0) { 1794 // The span must be in our exclusive ownership until we update sweepgen, 1795 // check for potential races. 1796 if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) { 1797 runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n", 1798 s->state, s->sweepgen, sweepgen); 1799 runtime_throw("MSpan_Sweep: bad span state after sweep"); 1800 } 1801 runtime_atomicstore(&s->sweepgen, sweepgen); 1802 } 1803 if(nfree > 0) { 1804 c->local_nsmallfree[cl] += nfree; 1805 c->local_cachealloc -= nfree * size; 1806 runtime_xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100)); 1807 res = runtime_MCentral_FreeSpan(&runtime_mheap.central[cl].mcentral, s, nfree, head.next, end); 1808 //MCentral_FreeSpan updates sweepgen 1809 } 1810 return res; 1811 } 1812 1813 // State of background sweep. 1814 // Protected by gclock. 1815 static struct 1816 { 1817 G* g; 1818 bool parked; 1819 1820 MSpan** spans; 1821 uint32 nspan; 1822 uint32 spanidx; 1823 } sweep; 1824 1825 // background sweeping goroutine 1826 static void 1827 bgsweep(void* dummy __attribute__ ((unused))) 1828 { 1829 runtime_g()->issystem = 1; 1830 for(;;) { 1831 while(runtime_sweepone() != (uintptr)-1) { 1832 gcstats.nbgsweep++; 1833 runtime_gosched(); 1834 } 1835 runtime_lock(&gclock); 1836 if(!runtime_mheap.sweepdone) { 1837 // It's possible if GC has happened between sweepone has 1838 // returned -1 and gclock lock. 1839 runtime_unlock(&gclock); 1840 continue; 1841 } 1842 sweep.parked = true; 1843 runtime_g()->isbackground = true; 1844 runtime_parkunlock(&gclock, "GC sweep wait"); 1845 runtime_g()->isbackground = false; 1846 } 1847 } 1848 1849 // sweeps one span 1850 // returns number of pages returned to heap, or -1 if there is nothing to sweep 1851 uintptr 1852 runtime_sweepone(void) 1853 { 1854 M *m = runtime_m(); 1855 MSpan *s; 1856 uint32 idx, sg; 1857 uintptr npages; 1858 1859 // increment locks to ensure that the goroutine is not preempted 1860 // in the middle of sweep thus leaving the span in an inconsistent state for next GC 1861 m->locks++; 1862 sg = runtime_mheap.sweepgen; 1863 for(;;) { 1864 idx = runtime_xadd(&sweep.spanidx, 1) - 1; 1865 if(idx >= sweep.nspan) { 1866 runtime_mheap.sweepdone = true; 1867 m->locks--; 1868 return (uintptr)-1; 1869 } 1870 s = sweep.spans[idx]; 1871 if(s->state != MSpanInUse) { 1872 s->sweepgen = sg; 1873 continue; 1874 } 1875 if(s->sweepgen != sg-2 || !runtime_cas(&s->sweepgen, sg-2, sg-1)) 1876 continue; 1877 if(s->incache) 1878 runtime_throw("sweep of incache span"); 1879 npages = s->npages; 1880 if(!runtime_MSpan_Sweep(s)) 1881 npages = 0; 1882 m->locks--; 1883 return npages; 1884 } 1885 } 1886 1887 static void 1888 dumpspan(uint32 idx) 1889 { 1890 int32 sizeclass, n, npages, i, column; 1891 uintptr size; 1892 byte *p; 1893 byte *arena_start; 1894 MSpan *s; 1895 bool allocated; 1896 1897 s = runtime_mheap.allspans[idx]; 1898 if(s->state != MSpanInUse) 1899 return; 1900 arena_start = runtime_mheap.arena_start; 1901 p = (byte*)(s->start << PageShift); 1902 sizeclass = s->sizeclass; 1903 size = s->elemsize; 1904 if(sizeclass == 0) { 1905 n = 1; 1906 } else { 1907 npages = runtime_class_to_allocnpages[sizeclass]; 1908 n = (npages << PageShift) / size; 1909 } 1910 1911 runtime_printf("%p .. %p:\n", p, p+n*size); 1912 column = 0; 1913 for(; n>0; n--, p+=size) { 1914 uintptr off, *bitp, shift, bits; 1915 1916 off = (uintptr*)p - (uintptr*)arena_start; 1917 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 1918 shift = off % wordsPerBitmapWord; 1919 bits = *bitp>>shift; 1920 1921 allocated = ((bits & bitAllocated) != 0); 1922 1923 for(i=0; (uint32)i<size; i+=sizeof(void*)) { 1924 if(column == 0) { 1925 runtime_printf("\t"); 1926 } 1927 if(i == 0) { 1928 runtime_printf(allocated ? "(" : "["); 1929 runtime_printf("%p: ", p+i); 1930 } else { 1931 runtime_printf(" "); 1932 } 1933 1934 runtime_printf("%p", *(void**)(p+i)); 1935 1936 if(i+sizeof(void*) >= size) { 1937 runtime_printf(allocated ? ") " : "] "); 1938 } 1939 1940 column++; 1941 if(column == 8) { 1942 runtime_printf("\n"); 1943 column = 0; 1944 } 1945 } 1946 } 1947 runtime_printf("\n"); 1948 } 1949 1950 // A debugging function to dump the contents of memory 1951 void 1952 runtime_memorydump(void) 1953 { 1954 uint32 spanidx; 1955 1956 for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) { 1957 dumpspan(spanidx); 1958 } 1959 } 1960 1961 void 1962 runtime_gchelper(void) 1963 { 1964 uint32 nproc; 1965 1966 runtime_m()->traceback = 2; 1967 gchelperstart(); 1968 1969 // parallel mark for over gc roots 1970 runtime_parfordo(work.markfor); 1971 1972 // help other threads scan secondary blocks 1973 scanblock(nil, true); 1974 1975 bufferList[runtime_m()->helpgc].busy = 0; 1976 nproc = work.nproc; // work.nproc can change right after we increment work.ndone 1977 if(runtime_xadd(&work.ndone, +1) == nproc-1) 1978 runtime_notewakeup(&work.alldone); 1979 runtime_m()->traceback = 0; 1980 } 1981 1982 static void 1983 cachestats(void) 1984 { 1985 MCache *c; 1986 P *p, **pp; 1987 1988 for(pp=runtime_allp; (p=*pp) != nil; pp++) { 1989 c = p->mcache; 1990 if(c==nil) 1991 continue; 1992 runtime_purgecachedstats(c); 1993 } 1994 } 1995 1996 static void 1997 flushallmcaches(void) 1998 { 1999 P *p, **pp; 2000 MCache *c; 2001 2002 // Flush MCache's to MCentral. 2003 for(pp=runtime_allp; (p=*pp) != nil; pp++) { 2004 c = p->mcache; 2005 if(c==nil) 2006 continue; 2007 runtime_MCache_ReleaseAll(c); 2008 } 2009 } 2010 2011 void 2012 runtime_updatememstats(GCStats *stats) 2013 { 2014 M *mp; 2015 MSpan *s; 2016 uint32 i; 2017 uint64 stacks_inuse, smallfree; 2018 uint64 *src, *dst; 2019 2020 if(stats) 2021 runtime_memclr((byte*)stats, sizeof(*stats)); 2022 stacks_inuse = 0; 2023 for(mp=runtime_allm; mp; mp=mp->alllink) { 2024 //stacks_inuse += mp->stackinuse*FixedStack; 2025 if(stats) { 2026 src = (uint64*)&mp->gcstats; 2027 dst = (uint64*)stats; 2028 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++) 2029 dst[i] += src[i]; 2030 runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); 2031 } 2032 } 2033 mstats.stacks_inuse = stacks_inuse; 2034 mstats.mcache_inuse = runtime_mheap.cachealloc.inuse; 2035 mstats.mspan_inuse = runtime_mheap.spanalloc.inuse; 2036 mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys + 2037 mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys; 2038 2039 // Calculate memory allocator stats. 2040 // During program execution we only count number of frees and amount of freed memory. 2041 // Current number of alive object in the heap and amount of alive heap memory 2042 // are calculated by scanning all spans. 2043 // Total number of mallocs is calculated as number of frees plus number of alive objects. 2044 // Similarly, total amount of allocated memory is calculated as amount of freed memory 2045 // plus amount of alive heap memory. 2046 mstats.alloc = 0; 2047 mstats.total_alloc = 0; 2048 mstats.nmalloc = 0; 2049 mstats.nfree = 0; 2050 for(i = 0; i < nelem(mstats.by_size); i++) { 2051 mstats.by_size[i].nmalloc = 0; 2052 mstats.by_size[i].nfree = 0; 2053 } 2054 2055 // Flush MCache's to MCentral. 2056 flushallmcaches(); 2057 2058 // Aggregate local stats. 2059 cachestats(); 2060 2061 // Scan all spans and count number of alive objects. 2062 for(i = 0; i < runtime_mheap.nspan; i++) { 2063 s = runtime_mheap.allspans[i]; 2064 if(s->state != MSpanInUse) 2065 continue; 2066 if(s->sizeclass == 0) { 2067 mstats.nmalloc++; 2068 mstats.alloc += s->elemsize; 2069 } else { 2070 mstats.nmalloc += s->ref; 2071 mstats.by_size[s->sizeclass].nmalloc += s->ref; 2072 mstats.alloc += s->ref*s->elemsize; 2073 } 2074 } 2075 2076 // Aggregate by size class. 2077 smallfree = 0; 2078 mstats.nfree = runtime_mheap.nlargefree; 2079 for(i = 0; i < nelem(mstats.by_size); i++) { 2080 mstats.nfree += runtime_mheap.nsmallfree[i]; 2081 mstats.by_size[i].nfree = runtime_mheap.nsmallfree[i]; 2082 mstats.by_size[i].nmalloc += runtime_mheap.nsmallfree[i]; 2083 smallfree += runtime_mheap.nsmallfree[i] * runtime_class_to_size[i]; 2084 } 2085 mstats.nmalloc += mstats.nfree; 2086 2087 // Calculate derived stats. 2088 mstats.total_alloc = mstats.alloc + runtime_mheap.largefree + smallfree; 2089 mstats.heap_alloc = mstats.alloc; 2090 mstats.heap_objects = mstats.nmalloc - mstats.nfree; 2091 } 2092 2093 // Structure of arguments passed to function gc(). 2094 // This allows the arguments to be passed via runtime_mcall. 2095 struct gc_args 2096 { 2097 int64 start_time; // start time of GC in ns (just before stoptheworld) 2098 bool eagersweep; 2099 }; 2100 2101 static void gc(struct gc_args *args); 2102 static void mgc(G *gp); 2103 2104 static int32 2105 readgogc(void) 2106 { 2107 String s; 2108 const byte *p; 2109 2110 s = runtime_getenv("GOGC"); 2111 if(s.len == 0) 2112 return 100; 2113 p = s.str; 2114 if(s.len == 3 && runtime_strcmp((const char *)p, "off") == 0) 2115 return -1; 2116 return runtime_atoi(p, s.len); 2117 } 2118 2119 // force = 1 - do GC regardless of current heap usage 2120 // force = 2 - go GC and eager sweep 2121 void 2122 runtime_gc(int32 force) 2123 { 2124 M *m; 2125 G *g; 2126 struct gc_args a; 2127 int32 i; 2128 2129 // The atomic operations are not atomic if the uint64s 2130 // are not aligned on uint64 boundaries. This has been 2131 // a problem in the past. 2132 if((((uintptr)&work.empty) & 7) != 0) 2133 runtime_throw("runtime: gc work buffer is misaligned"); 2134 if((((uintptr)&work.full) & 7) != 0) 2135 runtime_throw("runtime: gc work buffer is misaligned"); 2136 2137 // Make sure all registers are saved on stack so that 2138 // scanstack sees them. 2139 __builtin_unwind_init(); 2140 2141 // The gc is turned off (via enablegc) until 2142 // the bootstrap has completed. 2143 // Also, malloc gets called in the guts 2144 // of a number of libraries that might be 2145 // holding locks. To avoid priority inversion 2146 // problems, don't bother trying to run gc 2147 // while holding a lock. The next mallocgc 2148 // without a lock will do the gc instead. 2149 m = runtime_m(); 2150 if(!mstats.enablegc || runtime_g() == m->g0 || m->locks > 0 || runtime_panicking) 2151 return; 2152 2153 if(gcpercent == GcpercentUnknown) { // first time through 2154 runtime_lock(&runtime_mheap.lock); 2155 if(gcpercent == GcpercentUnknown) 2156 gcpercent = readgogc(); 2157 runtime_unlock(&runtime_mheap.lock); 2158 } 2159 if(gcpercent < 0) 2160 return; 2161 2162 runtime_semacquire(&runtime_worldsema, false); 2163 if(force==0 && mstats.heap_alloc < mstats.next_gc) { 2164 // typically threads which lost the race to grab 2165 // worldsema exit here when gc is done. 2166 runtime_semrelease(&runtime_worldsema); 2167 return; 2168 } 2169 2170 // Ok, we're doing it! Stop everybody else 2171 a.start_time = runtime_nanotime(); 2172 a.eagersweep = force >= 2; 2173 m->gcing = 1; 2174 runtime_stoptheworld(); 2175 2176 clearpools(); 2177 2178 // Run gc on the g0 stack. We do this so that the g stack 2179 // we're currently running on will no longer change. Cuts 2180 // the root set down a bit (g0 stacks are not scanned, and 2181 // we don't need to scan gc's internal state). Also an 2182 // enabler for copyable stacks. 2183 for(i = 0; i < (runtime_debug.gctrace > 1 ? 2 : 1); i++) { 2184 if(i > 0) 2185 a.start_time = runtime_nanotime(); 2186 // switch to g0, call gc(&a), then switch back 2187 g = runtime_g(); 2188 g->param = &a; 2189 g->status = Gwaiting; 2190 g->waitreason = "garbage collection"; 2191 runtime_mcall(mgc); 2192 m = runtime_m(); 2193 } 2194 2195 // all done 2196 m->gcing = 0; 2197 m->locks++; 2198 runtime_semrelease(&runtime_worldsema); 2199 runtime_starttheworld(); 2200 m->locks--; 2201 2202 // now that gc is done, kick off finalizer thread if needed 2203 if(!ConcurrentSweep) { 2204 // give the queued finalizers, if any, a chance to run 2205 runtime_gosched(); 2206 } else { 2207 // For gccgo, let other goroutines run. 2208 runtime_gosched(); 2209 } 2210 } 2211 2212 static void 2213 mgc(G *gp) 2214 { 2215 gc(gp->param); 2216 gp->param = nil; 2217 gp->status = Grunning; 2218 runtime_gogo(gp); 2219 } 2220 2221 static void 2222 gc(struct gc_args *args) 2223 { 2224 M *m; 2225 int64 t0, t1, t2, t3, t4; 2226 uint64 heap0, heap1, obj, ninstr; 2227 GCStats stats; 2228 uint32 i; 2229 // Eface eface; 2230 2231 m = runtime_m(); 2232 2233 if(runtime_debug.allocfreetrace) 2234 runtime_tracegc(); 2235 2236 m->traceback = 2; 2237 t0 = args->start_time; 2238 work.tstart = args->start_time; 2239 2240 if(CollectStats) 2241 runtime_memclr((byte*)&gcstats, sizeof(gcstats)); 2242 2243 m->locks++; // disable gc during mallocs in parforalloc 2244 if(work.markfor == nil) 2245 work.markfor = runtime_parforalloc(MaxGcproc); 2246 m->locks--; 2247 2248 t1 = 0; 2249 if(runtime_debug.gctrace) 2250 t1 = runtime_nanotime(); 2251 2252 // Sweep what is not sweeped by bgsweep. 2253 while(runtime_sweepone() != (uintptr)-1) 2254 gcstats.npausesweep++; 2255 2256 work.nwait = 0; 2257 work.ndone = 0; 2258 work.nproc = runtime_gcprocs(); 2259 runtime_parforsetup(work.markfor, work.nproc, RootCount + runtime_allglen, false, &markroot_funcval); 2260 if(work.nproc > 1) { 2261 runtime_noteclear(&work.alldone); 2262 runtime_helpgc(work.nproc); 2263 } 2264 2265 t2 = 0; 2266 if(runtime_debug.gctrace) 2267 t2 = runtime_nanotime(); 2268 2269 gchelperstart(); 2270 runtime_parfordo(work.markfor); 2271 scanblock(nil, true); 2272 2273 t3 = 0; 2274 if(runtime_debug.gctrace) 2275 t3 = runtime_nanotime(); 2276 2277 bufferList[m->helpgc].busy = 0; 2278 if(work.nproc > 1) 2279 runtime_notesleep(&work.alldone); 2280 2281 cachestats(); 2282 // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap 2283 // estimate what was live heap size after previous GC (for tracing only) 2284 heap0 = mstats.next_gc*100/(gcpercent+100); 2285 // conservatively set next_gc to high value assuming that everything is live 2286 // concurrent/lazy sweep will reduce this number while discovering new garbage 2287 mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100; 2288 2289 t4 = runtime_nanotime(); 2290 mstats.last_gc = runtime_unixnanotime(); // must be Unix time to make sense to user 2291 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0; 2292 mstats.pause_end[mstats.numgc%nelem(mstats.pause_end)] = mstats.last_gc; 2293 mstats.pause_total_ns += t4 - t0; 2294 mstats.numgc++; 2295 if(mstats.debuggc) 2296 runtime_printf("pause %D\n", t4-t0); 2297 2298 if(runtime_debug.gctrace) { 2299 heap1 = mstats.heap_alloc; 2300 runtime_updatememstats(&stats); 2301 if(heap1 != mstats.heap_alloc) { 2302 runtime_printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc); 2303 runtime_throw("mstats skew"); 2304 } 2305 obj = mstats.nmalloc - mstats.nfree; 2306 2307 stats.nprocyield += work.markfor->nprocyield; 2308 stats.nosyield += work.markfor->nosyield; 2309 stats.nsleep += work.markfor->nsleep; 2310 2311 runtime_printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects," 2312 " %d/%d/%d sweeps," 2313 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", 2314 mstats.numgc, work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000, 2315 heap0>>20, heap1>>20, obj, 2316 mstats.nmalloc, mstats.nfree, 2317 sweep.nspan, gcstats.nbgsweep, gcstats.npausesweep, 2318 stats.nhandoff, stats.nhandoffcnt, 2319 work.markfor->nsteal, work.markfor->nstealcnt, 2320 stats.nprocyield, stats.nosyield, stats.nsleep); 2321 gcstats.nbgsweep = gcstats.npausesweep = 0; 2322 if(CollectStats) { 2323 runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n", 2324 gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup); 2325 if(gcstats.ptr.cnt != 0) 2326 runtime_printf("avg ptrbufsize: %D (%D/%D)\n", 2327 gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt); 2328 if(gcstats.obj.cnt != 0) 2329 runtime_printf("avg nobj: %D (%D/%D)\n", 2330 gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt); 2331 runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes); 2332 2333 runtime_printf("instruction counts:\n"); 2334 ninstr = 0; 2335 for(i=0; i<nelem(gcstats.instr); i++) { 2336 runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]); 2337 ninstr += gcstats.instr[i]; 2338 } 2339 runtime_printf("\ttotal:\t%D\n", ninstr); 2340 2341 runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); 2342 2343 runtime_printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan); 2344 runtime_printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan); 2345 } 2346 } 2347 2348 // We cache current runtime_mheap.allspans array in sweep.spans, 2349 // because the former can be resized and freed. 2350 // Otherwise we would need to take heap lock every time 2351 // we want to convert span index to span pointer. 2352 2353 // Free the old cached array if necessary. 2354 if(sweep.spans && sweep.spans != runtime_mheap.allspans) 2355 runtime_SysFree(sweep.spans, sweep.nspan*sizeof(sweep.spans[0]), &mstats.other_sys); 2356 // Cache the current array. 2357 runtime_mheap.sweepspans = runtime_mheap.allspans; 2358 runtime_mheap.sweepgen += 2; 2359 runtime_mheap.sweepdone = false; 2360 sweep.spans = runtime_mheap.allspans; 2361 sweep.nspan = runtime_mheap.nspan; 2362 sweep.spanidx = 0; 2363 2364 // Temporary disable concurrent sweep, because we see failures on builders. 2365 if(ConcurrentSweep && !args->eagersweep) { 2366 runtime_lock(&gclock); 2367 if(sweep.g == nil) 2368 sweep.g = __go_go(bgsweep, nil); 2369 else if(sweep.parked) { 2370 sweep.parked = false; 2371 runtime_ready(sweep.g); 2372 } 2373 runtime_unlock(&gclock); 2374 } else { 2375 // Sweep all spans eagerly. 2376 while(runtime_sweepone() != (uintptr)-1) 2377 gcstats.npausesweep++; 2378 // Do an additional mProf_GC, because all 'free' events are now real as well. 2379 runtime_MProf_GC(); 2380 } 2381 2382 runtime_MProf_GC(); 2383 m->traceback = 0; 2384 } 2385 2386 extern uintptr runtime_sizeof_C_MStats 2387 __asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats"); 2388 2389 void runtime_ReadMemStats(MStats *) 2390 __asm__ (GOSYM_PREFIX "runtime.ReadMemStats"); 2391 2392 void 2393 runtime_ReadMemStats(MStats *stats) 2394 { 2395 M *m; 2396 2397 // Have to acquire worldsema to stop the world, 2398 // because stoptheworld can only be used by 2399 // one goroutine at a time, and there might be 2400 // a pending garbage collection already calling it. 2401 runtime_semacquire(&runtime_worldsema, false); 2402 m = runtime_m(); 2403 m->gcing = 1; 2404 runtime_stoptheworld(); 2405 runtime_updatememstats(nil); 2406 // Size of the trailing by_size array differs between Go and C, 2407 // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility. 2408 runtime_memmove(stats, &mstats, runtime_sizeof_C_MStats); 2409 m->gcing = 0; 2410 m->locks++; 2411 runtime_semrelease(&runtime_worldsema); 2412 runtime_starttheworld(); 2413 m->locks--; 2414 } 2415 2416 void runtime_debug_readGCStats(Slice*) 2417 __asm__("runtime_debug.readGCStats"); 2418 2419 void 2420 runtime_debug_readGCStats(Slice *pauses) 2421 { 2422 uint64 *p; 2423 uint32 i, n; 2424 2425 // Calling code in runtime/debug should make the slice large enough. 2426 if((size_t)pauses->cap < nelem(mstats.pause_ns)+3) 2427 runtime_throw("runtime: short slice passed to readGCStats"); 2428 2429 // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. 2430 p = (uint64*)pauses->array; 2431 runtime_lock(&runtime_mheap.lock); 2432 n = mstats.numgc; 2433 if(n > nelem(mstats.pause_ns)) 2434 n = nelem(mstats.pause_ns); 2435 2436 // The pause buffer is circular. The most recent pause is at 2437 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward 2438 // from there to go back farther in time. We deliver the times 2439 // most recent first (in p[0]). 2440 for(i=0; i<n; i++) 2441 p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)]; 2442 2443 p[n] = mstats.last_gc; 2444 p[n+1] = mstats.numgc; 2445 p[n+2] = mstats.pause_total_ns; 2446 runtime_unlock(&runtime_mheap.lock); 2447 pauses->__count = n+3; 2448 } 2449 2450 int32 2451 runtime_setgcpercent(int32 in) { 2452 int32 out; 2453 2454 runtime_lock(&runtime_mheap.lock); 2455 if(gcpercent == GcpercentUnknown) 2456 gcpercent = readgogc(); 2457 out = gcpercent; 2458 if(in < 0) 2459 in = -1; 2460 gcpercent = in; 2461 runtime_unlock(&runtime_mheap.lock); 2462 return out; 2463 } 2464 2465 static void 2466 gchelperstart(void) 2467 { 2468 M *m; 2469 2470 m = runtime_m(); 2471 if(m->helpgc < 0 || m->helpgc >= MaxGcproc) 2472 runtime_throw("gchelperstart: bad m->helpgc"); 2473 if(runtime_xchg(&bufferList[m->helpgc].busy, 1)) 2474 runtime_throw("gchelperstart: already busy"); 2475 if(runtime_g() != m->g0) 2476 runtime_throw("gchelper not running on g0 stack"); 2477 } 2478 2479 static void 2480 runfinq(void* dummy __attribute__ ((unused))) 2481 { 2482 Finalizer *f; 2483 FinBlock *fb, *next; 2484 uint32 i; 2485 Eface ef; 2486 Iface iface; 2487 2488 // This function blocks for long periods of time, and because it is written in C 2489 // we have no liveness information. Zero everything so that uninitialized pointers 2490 // do not cause memory leaks. 2491 f = nil; 2492 fb = nil; 2493 next = nil; 2494 i = 0; 2495 ef.__type_descriptor = nil; 2496 ef.__object = nil; 2497 2498 // force flush to memory 2499 USED(&f); 2500 USED(&fb); 2501 USED(&next); 2502 USED(&i); 2503 USED(&ef); 2504 2505 for(;;) { 2506 runtime_lock(&finlock); 2507 fb = finq; 2508 finq = nil; 2509 if(fb == nil) { 2510 runtime_fingwait = true; 2511 runtime_g()->isbackground = true; 2512 runtime_parkunlock(&finlock, "finalizer wait"); 2513 runtime_g()->isbackground = false; 2514 continue; 2515 } 2516 runtime_unlock(&finlock); 2517 for(; fb; fb=next) { 2518 next = fb->next; 2519 for(i=0; i<(uint32)fb->cnt; i++) { 2520 const Type *fint; 2521 void *param; 2522 2523 f = &fb->fin[i]; 2524 fint = ((const Type**)f->ft->__in.array)[0]; 2525 if((fint->__code & kindMask) == KindPtr) { 2526 // direct use of pointer 2527 param = &f->arg; 2528 } else if(((const InterfaceType*)fint)->__methods.__count == 0) { 2529 // convert to empty interface 2530 ef.__type_descriptor = (const Type*)f->ot; 2531 ef.__object = f->arg; 2532 param = &ef; 2533 } else { 2534 // convert to interface with methods 2535 iface.__methods = __go_convert_interface_2((const Type*)fint, 2536 (const Type*)f->ot, 2537 1); 2538 iface.__object = f->arg; 2539 if(iface.__methods == nil) 2540 runtime_throw("invalid type conversion in runfinq"); 2541 param = &iface; 2542 } 2543 reflect_call(f->ft, f->fn, 0, 0, ¶m, nil); 2544 f->fn = nil; 2545 f->arg = nil; 2546 f->ot = nil; 2547 } 2548 fb->cnt = 0; 2549 runtime_lock(&finlock); 2550 fb->next = finc; 2551 finc = fb; 2552 runtime_unlock(&finlock); 2553 } 2554 2555 // Zero everything that's dead, to avoid memory leaks. 2556 // See comment at top of function. 2557 f = nil; 2558 fb = nil; 2559 next = nil; 2560 i = 0; 2561 ef.__type_descriptor = nil; 2562 ef.__object = nil; 2563 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible 2564 } 2565 } 2566 2567 void 2568 runtime_createfing(void) 2569 { 2570 if(fing != nil) 2571 return; 2572 // Here we use gclock instead of finlock, 2573 // because newproc1 can allocate, which can cause on-demand span sweep, 2574 // which can queue finalizers, which would deadlock. 2575 runtime_lock(&gclock); 2576 if(fing == nil) 2577 fing = __go_go(runfinq, nil); 2578 runtime_unlock(&gclock); 2579 } 2580 2581 G* 2582 runtime_wakefing(void) 2583 { 2584 G *res; 2585 2586 res = nil; 2587 runtime_lock(&finlock); 2588 if(runtime_fingwait && runtime_fingwake) { 2589 runtime_fingwait = false; 2590 runtime_fingwake = false; 2591 res = fing; 2592 } 2593 runtime_unlock(&finlock); 2594 return res; 2595 } 2596 2597 void 2598 runtime_marknogc(void *v) 2599 { 2600 uintptr *b, off, shift; 2601 2602 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset 2603 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 2604 shift = off % wordsPerBitmapWord; 2605 *b = (*b & ~(bitAllocated<<shift)) | bitBlockBoundary<<shift; 2606 } 2607 2608 void 2609 runtime_markscan(void *v) 2610 { 2611 uintptr *b, off, shift; 2612 2613 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset 2614 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 2615 shift = off % wordsPerBitmapWord; 2616 *b |= bitScan<<shift; 2617 } 2618 2619 // mark the block at v as freed. 2620 void 2621 runtime_markfreed(void *v) 2622 { 2623 uintptr *b, off, shift; 2624 2625 if(0) 2626 runtime_printf("markfreed %p\n", v); 2627 2628 if((byte*)v > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) 2629 runtime_throw("markfreed: bad pointer"); 2630 2631 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset 2632 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 2633 shift = off % wordsPerBitmapWord; 2634 *b = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift); 2635 } 2636 2637 // check that the block at v of size n is marked freed. 2638 void 2639 runtime_checkfreed(void *v, uintptr n) 2640 { 2641 uintptr *b, bits, off, shift; 2642 2643 if(!runtime_checking) 2644 return; 2645 2646 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) 2647 return; // not allocated, so okay 2648 2649 off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset 2650 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 2651 shift = off % wordsPerBitmapWord; 2652 2653 bits = *b>>shift; 2654 if((bits & bitAllocated) != 0) { 2655 runtime_printf("checkfreed %p+%p: off=%p have=%p\n", 2656 v, n, off, bits & bitMask); 2657 runtime_throw("checkfreed: not freed"); 2658 } 2659 } 2660 2661 // mark the span of memory at v as having n blocks of the given size. 2662 // if leftover is true, there is left over space at the end of the span. 2663 void 2664 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover) 2665 { 2666 uintptr *b, *b0, off, shift, i, x; 2667 byte *p; 2668 2669 if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) 2670 runtime_throw("markspan: bad pointer"); 2671 2672 if(runtime_checking) { 2673 // bits should be all zero at the start 2674 off = (byte*)v + size - runtime_mheap.arena_start; 2675 b = (uintptr*)(runtime_mheap.arena_start - off/wordsPerBitmapWord); 2676 for(i = 0; i < size/PtrSize/wordsPerBitmapWord; i++) { 2677 if(b[i] != 0) 2678 runtime_throw("markspan: span bits not zero"); 2679 } 2680 } 2681 2682 p = v; 2683 if(leftover) // mark a boundary just past end of last block too 2684 n++; 2685 2686 b0 = nil; 2687 x = 0; 2688 for(; n-- > 0; p += size) { 2689 // Okay to use non-atomic ops here, because we control 2690 // the entire span, and each bitmap word has bits for only 2691 // one span, so no other goroutines are changing these 2692 // bitmap words. 2693 off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset 2694 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 2695 shift = off % wordsPerBitmapWord; 2696 if(b0 != b) { 2697 if(b0 != nil) 2698 *b0 = x; 2699 b0 = b; 2700 x = 0; 2701 } 2702 x |= bitAllocated<<shift; 2703 } 2704 *b0 = x; 2705 } 2706 2707 // unmark the span of memory at v of length n bytes. 2708 void 2709 runtime_unmarkspan(void *v, uintptr n) 2710 { 2711 uintptr *p, *b, off; 2712 2713 if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) 2714 runtime_throw("markspan: bad pointer"); 2715 2716 p = v; 2717 off = p - (uintptr*)runtime_mheap.arena_start; // word offset 2718 if(off % wordsPerBitmapWord != 0) 2719 runtime_throw("markspan: unaligned pointer"); 2720 b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; 2721 n /= PtrSize; 2722 if(n%wordsPerBitmapWord != 0) 2723 runtime_throw("unmarkspan: unaligned length"); 2724 // Okay to use non-atomic ops here, because we control 2725 // the entire span, and each bitmap word has bits for only 2726 // one span, so no other goroutines are changing these 2727 // bitmap words. 2728 n /= wordsPerBitmapWord; 2729 while(n-- > 0) 2730 *b-- = 0; 2731 } 2732 2733 void 2734 runtime_MHeap_MapBits(MHeap *h) 2735 { 2736 size_t page_size; 2737 2738 // Caller has added extra mappings to the arena. 2739 // Add extra mappings of bitmap words as needed. 2740 // We allocate extra bitmap pieces in chunks of bitmapChunk. 2741 enum { 2742 bitmapChunk = 8192 2743 }; 2744 uintptr n; 2745 2746 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; 2747 n = ROUND(n, bitmapChunk); 2748 n = ROUND(n, PageSize); 2749 page_size = getpagesize(); 2750 n = ROUND(n, page_size); 2751 if(h->bitmap_mapped >= n) 2752 return; 2753 2754 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys); 2755 h->bitmap_mapped = n; 2756 } 2757 2758 // typedmemmove copies a value of type t to dst from src. 2759 2760 extern void typedmemmove(const Type* td, void *dst, const void *src) 2761 __asm__ (GOSYM_PREFIX "reflect.typedmemmove"); 2762 2763 void 2764 typedmemmove(const Type* td, void *dst, const void *src) 2765 { 2766 runtime_memmove(dst, src, td->__size); 2767 } 2768 2769 // typedslicecopy copies a slice of elemType values from src to dst, 2770 // returning the number of elements copied. 2771 2772 extern intgo typedslicecopy(const Type* elem, Slice dst, Slice src) 2773 __asm__ (GOSYM_PREFIX "reflect.typedslicecopy"); 2774 2775 intgo 2776 typedslicecopy(const Type* elem, Slice dst, Slice src) 2777 { 2778 intgo n; 2779 void *dstp; 2780 void *srcp; 2781 2782 n = dst.__count; 2783 if (n > src.__count) 2784 n = src.__count; 2785 if (n == 0) 2786 return 0; 2787 dstp = dst.__values; 2788 srcp = src.__values; 2789 memmove(dstp, srcp, (uintptr_t)n * elem->__size); 2790 return n; 2791 }