github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/mprof.goc (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc profiling. 6 // Patterned after tcmalloc's algorithms; shorter code. 7 8 package runtime 9 #include "runtime.h" 10 #include "arch_GOARCH.h" 11 #include "malloc.h" 12 #include "defs_GOOS_GOARCH.h" 13 #include "type.h" 14 15 // NOTE(rsc): Everything here could use cas if contention became an issue. 16 static Lock proflock; 17 18 // All memory allocations are local and do not escape outside of the profiler. 19 // The profiler is forbidden from referring to garbage-collected memory. 20 21 enum { MProf, BProf }; // profile types 22 23 // Per-call-stack profiling information. 24 // Lookup by hashing call stack into a linked-list hash table. 25 typedef struct Bucket Bucket; 26 struct Bucket 27 { 28 Bucket *next; // next in hash list 29 Bucket *allnext; // next in list of all mbuckets/bbuckets 30 int32 typ; 31 // Generally unions can break precise GC, 32 // this one is fine because it does not contain pointers. 33 union 34 { 35 struct // typ == MProf 36 { 37 uintptr allocs; 38 uintptr frees; 39 uintptr alloc_bytes; 40 uintptr free_bytes; 41 uintptr recent_allocs; // since last gc 42 uintptr recent_frees; 43 uintptr recent_alloc_bytes; 44 uintptr recent_free_bytes; 45 }; 46 struct // typ == BProf 47 { 48 int64 count; 49 int64 cycles; 50 }; 51 }; 52 uintptr hash; 53 uintptr nstk; 54 uintptr stk[1]; 55 }; 56 enum { 57 BuckHashSize = 179999, 58 }; 59 static Bucket **buckhash; 60 static Bucket *mbuckets; // memory profile buckets 61 static Bucket *bbuckets; // blocking profile buckets 62 static uintptr bucketmem; 63 64 // Return the bucket for stk[0:nstk], allocating new bucket if needed. 65 static Bucket* 66 stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) 67 { 68 int32 i; 69 uintptr h; 70 Bucket *b; 71 72 if(buckhash == nil) { 73 buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]); 74 if(buckhash == nil) 75 runtime·throw("runtime: cannot allocate memory"); 76 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; 77 } 78 79 // Hash stack. 80 h = 0; 81 for(i=0; i<nstk; i++) { 82 h += stk[i]; 83 h += h<<10; 84 h ^= h>>6; 85 } 86 h += h<<3; 87 h ^= h>>11; 88 89 i = h%BuckHashSize; 90 for(b = buckhash[i]; b; b=b->next) 91 if(b->typ == typ && b->hash == h && b->nstk == nstk && 92 runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) 93 return b; 94 95 if(!alloc) 96 return nil; 97 98 b = runtime·persistentalloc(sizeof *b + nstk*sizeof stk[0], 0); 99 bucketmem += sizeof *b + nstk*sizeof stk[0]; 100 runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); 101 b->typ = typ; 102 b->hash = h; 103 b->nstk = nstk; 104 b->next = buckhash[i]; 105 buckhash[i] = b; 106 if(typ == MProf) { 107 b->allnext = mbuckets; 108 mbuckets = b; 109 } else { 110 b->allnext = bbuckets; 111 bbuckets = b; 112 } 113 return b; 114 } 115 116 static void 117 MProf_GC(void) 118 { 119 Bucket *b; 120 121 for(b=mbuckets; b; b=b->allnext) { 122 b->allocs += b->recent_allocs; 123 b->frees += b->recent_frees; 124 b->alloc_bytes += b->recent_alloc_bytes; 125 b->free_bytes += b->recent_free_bytes; 126 b->recent_allocs = 0; 127 b->recent_frees = 0; 128 b->recent_alloc_bytes = 0; 129 b->recent_free_bytes = 0; 130 } 131 } 132 133 // Record that a gc just happened: all the 'recent' statistics are now real. 134 void 135 runtime·MProf_GC(void) 136 { 137 runtime·lock(&proflock); 138 MProf_GC(); 139 runtime·unlock(&proflock); 140 } 141 142 // Map from pointer to Bucket* that allocated it. 143 // Three levels: 144 // Linked-list hash table for top N-AddrHashShift bits. 145 // Array index for next AddrDenseBits bits. 146 // Linked list for next AddrHashShift-AddrDenseBits bits. 147 // This is more efficient than using a general map, 148 // because of the typical clustering of the pointer keys. 149 150 typedef struct AddrHash AddrHash; 151 typedef struct AddrEntry AddrEntry; 152 153 enum { 154 AddrHashBits = 12, // good for 4GB of used address space 155 AddrHashShift = 20, // each AddrHash knows about 1MB of address space 156 AddrDenseBits = 8, // good for a profiling rate of 4096 bytes 157 }; 158 159 struct AddrHash 160 { 161 AddrHash *next; // next in top-level hash table linked list 162 uintptr addr; // addr>>20 163 AddrEntry *dense[1<<AddrDenseBits]; 164 }; 165 166 struct AddrEntry 167 { 168 AddrEntry *next; // next in bottom-level linked list 169 uint32 addr; 170 Bucket *b; 171 }; 172 173 static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits] 174 static AddrEntry *addrfree; 175 static uintptr addrmem; 176 177 // Multiplicative hash function: 178 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). 179 // This is a good multiplier as suggested in CLR, Knuth. The hash 180 // value is taken to be the top AddrHashBits bits of the bottom 32 bits 181 // of the multiplied value. 182 enum { 183 HashMultiplier = 2654435769U 184 }; 185 186 // Set the bucket associated with addr to b. 187 static void 188 setaddrbucket(uintptr addr, Bucket *b) 189 { 190 int32 i; 191 uint32 h; 192 AddrHash *ah; 193 AddrEntry *e; 194 195 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); 196 for(ah=addrhash[h]; ah; ah=ah->next) 197 if(ah->addr == (addr>>AddrHashShift)) 198 goto found; 199 200 ah = runtime·persistentalloc(sizeof *ah, 0); 201 addrmem += sizeof *ah; 202 ah->next = addrhash[h]; 203 ah->addr = addr>>AddrHashShift; 204 addrhash[h] = ah; 205 206 found: 207 if((e = addrfree) == nil) { 208 e = runtime·persistentalloc(64*sizeof *e, 0); 209 addrmem += 64*sizeof *e; 210 for(i=0; i+1<64; i++) 211 e[i].next = &e[i+1]; 212 e[63].next = nil; 213 } 214 addrfree = e->next; 215 e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); 216 e->b = b; 217 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. 218 e->next = ah->dense[h]; 219 ah->dense[h] = e; 220 } 221 222 // Get the bucket associated with addr and clear the association. 223 static Bucket* 224 getaddrbucket(uintptr addr) 225 { 226 uint32 h; 227 AddrHash *ah; 228 AddrEntry *e, **l; 229 Bucket *b; 230 231 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); 232 for(ah=addrhash[h]; ah; ah=ah->next) 233 if(ah->addr == (addr>>AddrHashShift)) 234 goto found; 235 return nil; 236 237 found: 238 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. 239 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { 240 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { 241 *l = e->next; 242 b = e->b; 243 e->next = addrfree; 244 addrfree = e; 245 return b; 246 } 247 } 248 return nil; 249 } 250 251 // Called by malloc to record a profiled block. 252 void 253 runtime·MProf_Malloc(void *p, uintptr size) 254 { 255 int32 nstk; 256 uintptr stk[32]; 257 Bucket *b; 258 259 if(m->nomemprof > 0) 260 return; 261 262 m->nomemprof++; 263 nstk = runtime·callers(1, stk, 32); 264 runtime·lock(&proflock); 265 b = stkbucket(MProf, stk, nstk, true); 266 b->recent_allocs++; 267 b->recent_alloc_bytes += size; 268 setaddrbucket((uintptr)p, b); 269 runtime·unlock(&proflock); 270 m->nomemprof--; 271 } 272 273 // Called when freeing a profiled block. 274 void 275 runtime·MProf_Free(void *p, uintptr size) 276 { 277 Bucket *b; 278 279 if(m->nomemprof > 0) 280 return; 281 282 m->nomemprof++; 283 runtime·lock(&proflock); 284 b = getaddrbucket((uintptr)p); 285 if(b != nil) { 286 b->recent_frees++; 287 b->recent_free_bytes += size; 288 } 289 runtime·unlock(&proflock); 290 m->nomemprof--; 291 } 292 293 int64 runtime·blockprofilerate; // in CPU ticks 294 295 void 296 runtime·SetBlockProfileRate(intgo rate) 297 { 298 int64 r; 299 300 if(rate <= 0) 301 r = 0; // disable profiling 302 else { 303 // convert ns to cycles, use float64 to prevent overflow during multiplication 304 r = (float64)rate*runtime·tickspersecond()/(1000*1000*1000); 305 if(r == 0) 306 r = 1; 307 } 308 runtime·atomicstore64((uint64*)&runtime·blockprofilerate, r); 309 } 310 311 void 312 runtime·blockevent(int64 cycles, int32 skip) 313 { 314 int32 nstk; 315 int64 rate; 316 uintptr stk[32]; 317 Bucket *b; 318 319 if(cycles <= 0) 320 return; 321 rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate); 322 if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles)) 323 return; 324 325 nstk = runtime·callers(skip, stk, 32); 326 runtime·lock(&proflock); 327 b = stkbucket(BProf, stk, nstk, true); 328 b->count++; 329 b->cycles += cycles; 330 runtime·unlock(&proflock); 331 } 332 333 // Go interface to profile data. (Declared in debug.go) 334 335 // Must match MemProfileRecord in debug.go. 336 typedef struct Record Record; 337 struct Record { 338 int64 alloc_bytes, free_bytes; 339 int64 alloc_objects, free_objects; 340 uintptr stk[32]; 341 }; 342 343 // Write b's data to r. 344 static void 345 record(Record *r, Bucket *b) 346 { 347 int32 i; 348 349 r->alloc_bytes = b->alloc_bytes; 350 r->free_bytes = b->free_bytes; 351 r->alloc_objects = b->allocs; 352 r->free_objects = b->frees; 353 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 354 r->stk[i] = b->stk[i]; 355 for(; i<nelem(r->stk); i++) 356 r->stk[i] = 0; 357 } 358 359 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { 360 Bucket *b; 361 Record *r; 362 bool clear; 363 364 runtime·lock(&proflock); 365 n = 0; 366 clear = true; 367 for(b=mbuckets; b; b=b->allnext) { 368 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 369 n++; 370 if(b->allocs != 0 || b->frees != 0) 371 clear = false; 372 } 373 if(clear) { 374 // Absolutely no data, suggesting that a garbage collection 375 // has not yet happened. In order to allow profiling when 376 // garbage collection is disabled from the beginning of execution, 377 // accumulate stats as if a GC just happened, and recount buckets. 378 MProf_GC(); 379 n = 0; 380 for(b=mbuckets; b; b=b->allnext) 381 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 382 n++; 383 } 384 ok = false; 385 if(n <= p.len) { 386 ok = true; 387 r = (Record*)p.array; 388 for(b=mbuckets; b; b=b->allnext) 389 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 390 record(r++, b); 391 } 392 runtime·unlock(&proflock); 393 } 394 395 // Must match BlockProfileRecord in debug.go. 396 typedef struct BRecord BRecord; 397 struct BRecord { 398 int64 count; 399 int64 cycles; 400 uintptr stk[32]; 401 }; 402 403 func BlockProfile(p Slice) (n int, ok bool) { 404 Bucket *b; 405 BRecord *r; 406 int32 i; 407 408 runtime·lock(&proflock); 409 n = 0; 410 for(b=bbuckets; b; b=b->allnext) 411 n++; 412 ok = false; 413 if(n <= p.len) { 414 ok = true; 415 r = (BRecord*)p.array; 416 for(b=bbuckets; b; b=b->allnext, r++) { 417 r->count = b->count; 418 r->cycles = b->cycles; 419 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 420 r->stk[i] = b->stk[i]; 421 for(; i<nelem(r->stk); i++) 422 r->stk[i] = 0; 423 } 424 } 425 runtime·unlock(&proflock); 426 } 427 428 // Must match StackRecord in debug.go. 429 typedef struct TRecord TRecord; 430 struct TRecord { 431 uintptr stk[32]; 432 }; 433 434 func ThreadCreateProfile(p Slice) (n int, ok bool) { 435 TRecord *r; 436 M *first, *mp; 437 438 first = runtime·atomicloadp(&runtime·allm); 439 n = 0; 440 for(mp=first; mp; mp=mp->alllink) 441 n++; 442 ok = false; 443 if(n <= p.len) { 444 ok = true; 445 r = (TRecord*)p.array; 446 for(mp=first; mp; mp=mp->alllink) { 447 runtime·memmove(r->stk, mp->createstack, sizeof r->stk); 448 r++; 449 } 450 } 451 } 452 453 func Stack(b Slice, all bool) (n int) { 454 uintptr pc, sp; 455 456 sp = runtime·getcallersp(&b); 457 pc = (uintptr)runtime·getcallerpc(&b); 458 459 if(all) { 460 runtime·semacquire(&runtime·worldsema, false); 461 m->gcing = 1; 462 runtime·stoptheworld(); 463 } 464 465 if(b.len == 0) 466 n = 0; 467 else{ 468 g->writebuf = (byte*)b.array; 469 g->writenbuf = b.len; 470 runtime·goroutineheader(g); 471 runtime·traceback(pc, sp, 0, g); 472 if(all) 473 runtime·tracebackothers(g); 474 n = b.len - g->writenbuf; 475 g->writebuf = nil; 476 g->writenbuf = 0; 477 } 478 479 if(all) { 480 m->gcing = 0; 481 runtime·semrelease(&runtime·worldsema); 482 runtime·starttheworld(); 483 } 484 } 485 486 static void 487 saveg(uintptr pc, uintptr sp, G *gp, TRecord *r) 488 { 489 int32 n; 490 491 n = runtime·gentraceback((uintptr)pc, (uintptr)sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil, false); 492 if(n < nelem(r->stk)) 493 r->stk[n] = 0; 494 } 495 496 func GoroutineProfile(b Slice) (n int, ok bool) { 497 uintptr pc, sp; 498 TRecord *r; 499 G *gp; 500 501 sp = runtime·getcallersp(&b); 502 pc = (uintptr)runtime·getcallerpc(&b); 503 504 ok = false; 505 n = runtime·gcount(); 506 if(n <= b.len) { 507 runtime·semacquire(&runtime·worldsema, false); 508 m->gcing = 1; 509 runtime·stoptheworld(); 510 511 n = runtime·gcount(); 512 if(n <= b.len) { 513 ok = true; 514 r = (TRecord*)b.array; 515 saveg(pc, sp, g, r++); 516 for(gp = runtime·allg; gp != nil; gp = gp->alllink) { 517 if(gp == g || gp->status == Gdead) 518 continue; 519 saveg(gp->sched.pc, gp->sched.sp, gp, r++); 520 } 521 } 522 523 m->gcing = 0; 524 runtime·semrelease(&runtime·worldsema); 525 runtime·starttheworld(); 526 } 527 } 528 529 void 530 runtime·mprofinit(void) 531 { 532 addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0); 533 }