github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libgo/runtime/mprof.goc (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc profiling. 6 // Patterned after tcmalloc's algorithms; shorter code. 7 8 package runtime 9 #include "runtime.h" 10 #include "arch.h" 11 #include "malloc.h" 12 #include "defs.h" 13 #include "go-type.h" 14 #include "go-string.h" 15 16 // NOTE(rsc): Everything here could use cas if contention became an issue. 17 static Lock proflock; 18 19 // All memory allocations are local and do not escape outside of the profiler. 20 // The profiler is forbidden from referring to garbage-collected memory. 21 22 enum { MProf, BProf }; // profile types 23 24 // Per-call-stack profiling information. 25 // Lookup by hashing call stack into a linked-list hash table. 26 struct Bucket 27 { 28 Bucket *next; // next in hash list 29 Bucket *allnext; // next in list of all mbuckets/bbuckets 30 int32 typ; 31 // Generally unions can break precise GC, 32 // this one is fine because it does not contain pointers. 33 union 34 { 35 struct // typ == MProf 36 { 37 // The following complex 3-stage scheme of stats accumulation 38 // is required to obtain a consistent picture of mallocs and frees 39 // for some point in time. 40 // The problem is that mallocs come in real time, while frees 41 // come only after a GC during concurrent sweeping. So if we would 42 // naively count them, we would get a skew toward mallocs. 43 // 44 // Mallocs are accounted in recent stats. 45 // Explicit frees are accounted in recent stats. 46 // GC frees are accounted in prev stats. 47 // After GC prev stats are added to final stats and 48 // recent stats are moved into prev stats. 49 uintptr allocs; 50 uintptr frees; 51 uintptr alloc_bytes; 52 uintptr free_bytes; 53 54 uintptr prev_allocs; // since last but one till last gc 55 uintptr prev_frees; 56 uintptr prev_alloc_bytes; 57 uintptr prev_free_bytes; 58 59 uintptr recent_allocs; // since last gc till now 60 uintptr recent_frees; 61 uintptr recent_alloc_bytes; 62 uintptr recent_free_bytes; 63 64 }; 65 struct // typ == BProf 66 { 67 int64 count; 68 int64 cycles; 69 }; 70 }; 71 uintptr hash; // hash of size + stk 72 uintptr size; 73 uintptr nstk; 74 Location stk[1]; 75 }; 76 enum { 77 BuckHashSize = 179999, 78 }; 79 static Bucket **buckhash; 80 static Bucket *mbuckets; // memory profile buckets 81 static Bucket *bbuckets; // blocking profile buckets 82 static uintptr bucketmem; 83 84 // Return the bucket for stk[0:nstk], allocating new bucket if needed. 85 static Bucket* 86 stkbucket(int32 typ, uintptr size, Location *stk, int32 nstk, bool alloc) 87 { 88 int32 i, j; 89 uintptr h; 90 Bucket *b; 91 92 if(buckhash == nil) { 93 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys); 94 if(buckhash == nil) 95 runtime_throw("runtime: cannot allocate memory"); 96 } 97 98 // Hash stack. 99 h = 0; 100 for(i=0; i<nstk; i++) { 101 h += stk[i].pc; 102 h += h<<10; 103 h ^= h>>6; 104 } 105 // hash in size 106 h += size; 107 h += h<<10; 108 h ^= h>>6; 109 // finalize 110 h += h<<3; 111 h ^= h>>11; 112 113 i = h%BuckHashSize; 114 for(b = buckhash[i]; b; b=b->next) { 115 if(b->typ == typ && b->hash == h && b->size == size && b->nstk == (uintptr)nstk) { 116 for(j = 0; j < nstk; j++) { 117 if(b->stk[j].pc != stk[j].pc || 118 b->stk[j].lineno != stk[j].lineno || 119 !__go_strings_equal(b->stk[j].filename, stk[j].filename)) 120 break; 121 } 122 if (j == nstk) 123 return b; 124 } 125 } 126 127 if(!alloc) 128 return nil; 129 130 b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys); 131 bucketmem += sizeof *b + nstk*sizeof stk[0]; 132 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); 133 b->typ = typ; 134 b->hash = h; 135 b->size = size; 136 b->nstk = nstk; 137 b->next = buckhash[i]; 138 buckhash[i] = b; 139 if(typ == MProf) { 140 b->allnext = mbuckets; 141 mbuckets = b; 142 } else { 143 b->allnext = bbuckets; 144 bbuckets = b; 145 } 146 return b; 147 } 148 149 static void 150 MProf_GC(void) 151 { 152 Bucket *b; 153 154 for(b=mbuckets; b; b=b->allnext) { 155 b->allocs += b->prev_allocs; 156 b->frees += b->prev_frees; 157 b->alloc_bytes += b->prev_alloc_bytes; 158 b->free_bytes += b->prev_free_bytes; 159 160 b->prev_allocs = b->recent_allocs; 161 b->prev_frees = b->recent_frees; 162 b->prev_alloc_bytes = b->recent_alloc_bytes; 163 b->prev_free_bytes = b->recent_free_bytes; 164 165 b->recent_allocs = 0; 166 b->recent_frees = 0; 167 b->recent_alloc_bytes = 0; 168 b->recent_free_bytes = 0; 169 } 170 } 171 172 // Record that a gc just happened: all the 'recent' statistics are now real. 173 void 174 runtime_MProf_GC(void) 175 { 176 runtime_lock(&proflock); 177 MProf_GC(); 178 runtime_unlock(&proflock); 179 } 180 181 // Called by malloc to record a profiled block. 182 void 183 runtime_MProf_Malloc(void *p, uintptr size) 184 { 185 Location stk[32]; 186 Bucket *b; 187 int32 nstk; 188 189 nstk = runtime_callers(1, stk, nelem(stk), false); 190 runtime_lock(&proflock); 191 b = stkbucket(MProf, size, stk, nstk, true); 192 b->recent_allocs++; 193 b->recent_alloc_bytes += size; 194 runtime_unlock(&proflock); 195 196 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. 197 // This reduces potential contention and chances of deadlocks. 198 // Since the object must be alive during call to MProf_Malloc, 199 // it's fine to do this non-atomically. 200 runtime_setprofilebucket(p, b); 201 } 202 203 // Called when freeing a profiled block. 204 void 205 runtime_MProf_Free(Bucket *b, uintptr size, bool freed) 206 { 207 runtime_lock(&proflock); 208 if(freed) { 209 b->recent_frees++; 210 b->recent_free_bytes += size; 211 } else { 212 b->prev_frees++; 213 b->prev_free_bytes += size; 214 } 215 runtime_unlock(&proflock); 216 } 217 218 int64 runtime_blockprofilerate; // in CPU ticks 219 220 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate"); 221 222 void 223 runtime_SetBlockProfileRate(intgo rate) 224 { 225 int64 r; 226 227 if(rate <= 0) 228 r = 0; // disable profiling 229 else { 230 // convert ns to cycles, use float64 to prevent overflow during multiplication 231 r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000); 232 if(r == 0) 233 r = 1; 234 } 235 runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r); 236 } 237 238 void 239 runtime_blockevent(int64 cycles, int32 skip) 240 { 241 int32 nstk; 242 int64 rate; 243 Location stk[32]; 244 Bucket *b; 245 246 if(cycles <= 0) 247 return; 248 rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate); 249 if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles)) 250 return; 251 252 nstk = runtime_callers(skip, stk, nelem(stk), false); 253 runtime_lock(&proflock); 254 b = stkbucket(BProf, 0, stk, nstk, true); 255 b->count++; 256 b->cycles += cycles; 257 runtime_unlock(&proflock); 258 } 259 260 // Go interface to profile data. (Declared in debug.go) 261 262 // Must match MemProfileRecord in debug.go. 263 typedef struct Record Record; 264 struct Record { 265 int64 alloc_bytes, free_bytes; 266 int64 alloc_objects, free_objects; 267 uintptr stk[32]; 268 }; 269 270 // Write b's data to r. 271 static void 272 record(Record *r, Bucket *b) 273 { 274 uint32 i; 275 276 r->alloc_bytes = b->alloc_bytes; 277 r->free_bytes = b->free_bytes; 278 r->alloc_objects = b->allocs; 279 r->free_objects = b->frees; 280 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 281 r->stk[i] = b->stk[i].pc; 282 for(; i<nelem(r->stk); i++) 283 r->stk[i] = 0; 284 } 285 286 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { 287 Bucket *b; 288 Record *r; 289 bool clear; 290 291 runtime_lock(&proflock); 292 n = 0; 293 clear = true; 294 for(b=mbuckets; b; b=b->allnext) { 295 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 296 n++; 297 if(b->allocs != 0 || b->frees != 0) 298 clear = false; 299 } 300 if(clear) { 301 // Absolutely no data, suggesting that a garbage collection 302 // has not yet happened. In order to allow profiling when 303 // garbage collection is disabled from the beginning of execution, 304 // accumulate stats as if a GC just happened, and recount buckets. 305 MProf_GC(); 306 MProf_GC(); 307 n = 0; 308 for(b=mbuckets; b; b=b->allnext) 309 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 310 n++; 311 } 312 ok = false; 313 if(n <= p.__count) { 314 ok = true; 315 r = (Record*)p.__values; 316 for(b=mbuckets; b; b=b->allnext) 317 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 318 record(r++, b); 319 } 320 runtime_unlock(&proflock); 321 } 322 323 void 324 runtime_MProf_Mark(struct Workbuf **wbufp, void (*enqueue1)(struct Workbuf**, Obj)) 325 { 326 // buckhash is not allocated via mallocgc. 327 enqueue1(wbufp, (Obj){(byte*)&mbuckets, sizeof mbuckets, 0}); 328 enqueue1(wbufp, (Obj){(byte*)&bbuckets, sizeof bbuckets, 0}); 329 } 330 331 void 332 runtime_iterate_memprof(void (*callback)(Bucket*, uintptr, Location*, uintptr, uintptr, uintptr)) 333 { 334 Bucket *b; 335 336 runtime_lock(&proflock); 337 for(b=mbuckets; b; b=b->allnext) { 338 callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees); 339 } 340 runtime_unlock(&proflock); 341 } 342 343 // Must match BlockProfileRecord in debug.go. 344 typedef struct BRecord BRecord; 345 struct BRecord { 346 int64 count; 347 int64 cycles; 348 uintptr stk[32]; 349 }; 350 351 func BlockProfile(p Slice) (n int, ok bool) { 352 Bucket *b; 353 BRecord *r; 354 int32 i; 355 356 runtime_lock(&proflock); 357 n = 0; 358 for(b=bbuckets; b; b=b->allnext) 359 n++; 360 ok = false; 361 if(n <= p.__count) { 362 ok = true; 363 r = (BRecord*)p.__values; 364 for(b=bbuckets; b; b=b->allnext, r++) { 365 r->count = b->count; 366 r->cycles = b->cycles; 367 for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++) 368 r->stk[i] = b->stk[i].pc; 369 for(; (uintptr)i<nelem(r->stk); i++) 370 r->stk[i] = 0; 371 } 372 } 373 runtime_unlock(&proflock); 374 } 375 376 // Must match StackRecord in debug.go. 377 typedef struct TRecord TRecord; 378 struct TRecord { 379 uintptr stk[32]; 380 }; 381 382 func ThreadCreateProfile(p Slice) (n int, ok bool) { 383 TRecord *r; 384 M *first, *mp; 385 int32 i; 386 387 first = runtime_atomicloadp(&runtime_allm); 388 n = 0; 389 for(mp=first; mp; mp=mp->alllink) 390 n++; 391 ok = false; 392 if(n <= p.__count) { 393 ok = true; 394 r = (TRecord*)p.__values; 395 for(mp=first; mp; mp=mp->alllink) { 396 for(i = 0; (uintptr)i < nelem(r->stk); i++) { 397 r->stk[i] = mp->createstack[i].pc; 398 } 399 r++; 400 } 401 } 402 } 403 404 func Stack(b Slice, all bool) (n int) { 405 byte *pc, *sp; 406 bool enablegc; 407 408 sp = runtime_getcallersp(&b); 409 pc = (byte*)(uintptr)runtime_getcallerpc(&b); 410 411 if(all) { 412 runtime_semacquire(&runtime_worldsema, false); 413 runtime_m()->gcing = 1; 414 runtime_stoptheworld(); 415 enablegc = mstats.enablegc; 416 mstats.enablegc = false; 417 } 418 419 if(b.__count == 0) 420 n = 0; 421 else{ 422 G* g = runtime_g(); 423 g->writebuf = (byte*)b.__values; 424 g->writenbuf = b.__count; 425 USED(pc); 426 USED(sp); 427 runtime_goroutineheader(g); 428 runtime_traceback(); 429 runtime_printcreatedby(g); 430 if(all) 431 runtime_tracebackothers(g); 432 n = b.__count - g->writenbuf; 433 g->writebuf = nil; 434 g->writenbuf = 0; 435 } 436 437 if(all) { 438 runtime_m()->gcing = 0; 439 mstats.enablegc = enablegc; 440 runtime_semrelease(&runtime_worldsema); 441 runtime_starttheworld(); 442 } 443 } 444 445 static void 446 saveg(G *gp, TRecord *r) 447 { 448 int32 n, i; 449 Location locstk[nelem(r->stk)]; 450 451 if(gp == runtime_g()) { 452 n = runtime_callers(0, locstk, nelem(r->stk), false); 453 for(i = 0; i < n; i++) 454 r->stk[i] = locstk[i].pc; 455 } 456 else { 457 // FIXME: Not implemented. 458 n = 0; 459 } 460 if((size_t)n < nelem(r->stk)) 461 r->stk[n] = 0; 462 } 463 464 func GoroutineProfile(b Slice) (n int, ok bool) { 465 uintptr i; 466 TRecord *r; 467 G *gp; 468 469 ok = false; 470 n = runtime_gcount(); 471 if(n <= b.__count) { 472 runtime_semacquire(&runtime_worldsema, false); 473 runtime_m()->gcing = 1; 474 runtime_stoptheworld(); 475 476 n = runtime_gcount(); 477 if(n <= b.__count) { 478 G* g = runtime_g(); 479 ok = true; 480 r = (TRecord*)b.__values; 481 saveg(g, r++); 482 for(i = 0; i < runtime_allglen; i++) { 483 gp = runtime_allg[i]; 484 if(gp == g || gp->status == Gdead) 485 continue; 486 saveg(gp, r++); 487 } 488 } 489 490 runtime_m()->gcing = 0; 491 runtime_semrelease(&runtime_worldsema); 492 runtime_starttheworld(); 493 } 494 } 495 496 // Tracing of alloc/free/gc. 497 498 static Lock tracelock; 499 500 static const char* 501 typeinfoname(int32 typeinfo) 502 { 503 if(typeinfo == TypeInfo_SingleObject) 504 return "single object"; 505 else if(typeinfo == TypeInfo_Array) 506 return "array"; 507 else if(typeinfo == TypeInfo_Chan) 508 return "channel"; 509 runtime_throw("typinfoname: unknown type info"); 510 return nil; 511 } 512 513 void 514 runtime_tracealloc(void *p, uintptr size, uintptr typ) 515 { 516 const char *name; 517 Type *type; 518 519 runtime_lock(&tracelock); 520 runtime_m()->traceback = 2; 521 type = (Type*)(typ & ~3); 522 name = typeinfoname(typ & 3); 523 if(type == nil) 524 runtime_printf("tracealloc(%p, %p, %s)\n", p, size, name); 525 else 526 runtime_printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->__reflection); 527 if(runtime_m()->curg == nil || runtime_g() == runtime_m()->curg) { 528 runtime_goroutineheader(runtime_g()); 529 runtime_traceback(); 530 } else { 531 runtime_goroutineheader(runtime_m()->curg); 532 runtime_traceback(); 533 } 534 runtime_printf("\n"); 535 runtime_m()->traceback = 0; 536 runtime_unlock(&tracelock); 537 } 538 539 void 540 runtime_tracefree(void *p, uintptr size) 541 { 542 runtime_lock(&tracelock); 543 runtime_m()->traceback = 2; 544 runtime_printf("tracefree(%p, %p)\n", p, size); 545 runtime_goroutineheader(runtime_g()); 546 runtime_traceback(); 547 runtime_printf("\n"); 548 runtime_m()->traceback = 0; 549 runtime_unlock(&tracelock); 550 } 551 552 void 553 runtime_tracegc(void) 554 { 555 runtime_lock(&tracelock); 556 runtime_m()->traceback = 2; 557 runtime_printf("tracegc()\n"); 558 // running on m->g0 stack; show all non-g0 goroutines 559 runtime_tracebackothers(runtime_g()); 560 runtime_printf("end tracegc\n"); 561 runtime_printf("\n"); 562 runtime_m()->traceback = 0; 563 runtime_unlock(&tracelock); 564 }