github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/mprof.goc (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc profiling. 6 // Patterned after tcmalloc's algorithms; shorter code. 7 8 package runtime 9 #include "runtime.h" 10 #include "arch_GOARCH.h" 11 #include "malloc.h" 12 #include "defs_GOOS_GOARCH.h" 13 #include "type.h" 14 15 // NOTE(rsc): Everything here could use cas if contention became an issue. 16 static Lock proflock, alloclock; 17 18 // All memory allocations are local and do not escape outside of the profiler. 19 // The profiler is forbidden from referring to garbage-collected memory. 20 21 static byte *pool; // memory allocation pool 22 static uintptr poolfree; // number of bytes left in the pool 23 enum { 24 Chunk = 32*PageSize, // initial size of the pool 25 }; 26 27 // Memory allocation local to this file. 28 // There is no way to return the allocated memory back to the OS. 29 static void* 30 allocate(uintptr size) 31 { 32 void *v; 33 34 if(size == 0) 35 return nil; 36 37 if(size >= Chunk/2) 38 return runtime·SysAlloc(size); 39 40 runtime·lock(&alloclock); 41 if(size > poolfree) { 42 pool = runtime·SysAlloc(Chunk); 43 if(pool == nil) 44 runtime·throw("runtime: cannot allocate memory"); 45 poolfree = Chunk; 46 } 47 v = pool; 48 pool += size; 49 poolfree -= size; 50 runtime·unlock(&alloclock); 51 return v; 52 } 53 54 enum { MProf, BProf }; // profile types 55 56 // Per-call-stack profiling information. 57 // Lookup by hashing call stack into a linked-list hash table. 58 typedef struct Bucket Bucket; 59 struct Bucket 60 { 61 Bucket *next; // next in hash list 62 Bucket *allnext; // next in list of all mbuckets/bbuckets 63 int32 typ; 64 // Generally unions can break precise GC, 65 // this one is fine because it does not contain pointers. 66 union 67 { 68 struct // typ == MProf 69 { 70 uintptr allocs; 71 uintptr frees; 72 uintptr alloc_bytes; 73 uintptr free_bytes; 74 uintptr recent_allocs; // since last gc 75 uintptr recent_frees; 76 uintptr recent_alloc_bytes; 77 uintptr recent_free_bytes; 78 }; 79 struct // typ == BProf 80 { 81 int64 count; 82 int64 cycles; 83 }; 84 }; 85 uintptr hash; 86 uintptr nstk; 87 uintptr stk[1]; 88 }; 89 enum { 90 BuckHashSize = 179999, 91 }; 92 static Bucket **buckhash; 93 static Bucket *mbuckets; // memory profile buckets 94 static Bucket *bbuckets; // blocking profile buckets 95 static uintptr bucketmem; 96 97 // Return the bucket for stk[0:nstk], allocating new bucket if needed. 98 static Bucket* 99 stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) 100 { 101 int32 i; 102 uintptr h; 103 Bucket *b; 104 105 if(buckhash == nil) { 106 buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]); 107 if(buckhash == nil) 108 runtime·throw("runtime: cannot allocate memory"); 109 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; 110 } 111 112 // Hash stack. 113 h = 0; 114 for(i=0; i<nstk; i++) { 115 h += stk[i]; 116 h += h<<10; 117 h ^= h>>6; 118 } 119 h += h<<3; 120 h ^= h>>11; 121 122 i = h%BuckHashSize; 123 for(b = buckhash[i]; b; b=b->next) 124 if(b->typ == typ && b->hash == h && b->nstk == nstk && 125 runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) 126 return b; 127 128 if(!alloc) 129 return nil; 130 131 b = allocate(sizeof *b + nstk*sizeof stk[0]); 132 if(b == nil) 133 runtime·throw("runtime: cannot allocate memory"); 134 bucketmem += sizeof *b + nstk*sizeof stk[0]; 135 runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); 136 b->typ = typ; 137 b->hash = h; 138 b->nstk = nstk; 139 b->next = buckhash[i]; 140 buckhash[i] = b; 141 if(typ == MProf) { 142 b->allnext = mbuckets; 143 mbuckets = b; 144 } else { 145 b->allnext = bbuckets; 146 bbuckets = b; 147 } 148 return b; 149 } 150 151 static void 152 MProf_GC(void) 153 { 154 Bucket *b; 155 156 for(b=mbuckets; b; b=b->allnext) { 157 b->allocs += b->recent_allocs; 158 b->frees += b->recent_frees; 159 b->alloc_bytes += b->recent_alloc_bytes; 160 b->free_bytes += b->recent_free_bytes; 161 b->recent_allocs = 0; 162 b->recent_frees = 0; 163 b->recent_alloc_bytes = 0; 164 b->recent_free_bytes = 0; 165 } 166 } 167 168 // Record that a gc just happened: all the 'recent' statistics are now real. 169 void 170 runtime·MProf_GC(void) 171 { 172 runtime·lock(&proflock); 173 MProf_GC(); 174 runtime·unlock(&proflock); 175 } 176 177 // Map from pointer to Bucket* that allocated it. 178 // Three levels: 179 // Linked-list hash table for top N-AddrHashShift bits. 180 // Array index for next AddrDenseBits bits. 181 // Linked list for next AddrHashShift-AddrDenseBits bits. 182 // This is more efficient than using a general map, 183 // because of the typical clustering of the pointer keys. 184 185 typedef struct AddrHash AddrHash; 186 typedef struct AddrEntry AddrEntry; 187 188 enum { 189 AddrHashBits = 12, // good for 4GB of used address space 190 AddrHashShift = 20, // each AddrHash knows about 1MB of address space 191 AddrDenseBits = 8, // good for a profiling rate of 4096 bytes 192 }; 193 194 struct AddrHash 195 { 196 AddrHash *next; // next in top-level hash table linked list 197 uintptr addr; // addr>>20 198 AddrEntry *dense[1<<AddrDenseBits]; 199 }; 200 201 struct AddrEntry 202 { 203 AddrEntry *next; // next in bottom-level linked list 204 uint32 addr; 205 Bucket *b; 206 }; 207 208 static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits] 209 static AddrEntry *addrfree; 210 static uintptr addrmem; 211 212 // Multiplicative hash function: 213 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). 214 // This is a good multiplier as suggested in CLR, Knuth. The hash 215 // value is taken to be the top AddrHashBits bits of the bottom 32 bits 216 // of the multiplied value. 217 enum { 218 HashMultiplier = 2654435769U 219 }; 220 221 // Set the bucket associated with addr to b. 222 static void 223 setaddrbucket(uintptr addr, Bucket *b) 224 { 225 int32 i; 226 uint32 h; 227 AddrHash *ah; 228 AddrEntry *e; 229 230 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); 231 for(ah=addrhash[h]; ah; ah=ah->next) 232 if(ah->addr == (addr>>AddrHashShift)) 233 goto found; 234 235 ah = allocate(sizeof *ah); 236 addrmem += sizeof *ah; 237 ah->next = addrhash[h]; 238 ah->addr = addr>>AddrHashShift; 239 addrhash[h] = ah; 240 241 found: 242 if((e = addrfree) == nil) { 243 e = allocate(64*sizeof *e); 244 addrmem += 64*sizeof *e; 245 for(i=0; i+1<64; i++) 246 e[i].next = &e[i+1]; 247 e[63].next = nil; 248 } 249 addrfree = e->next; 250 e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); 251 e->b = b; 252 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. 253 e->next = ah->dense[h]; 254 ah->dense[h] = e; 255 } 256 257 // Get the bucket associated with addr and clear the association. 258 static Bucket* 259 getaddrbucket(uintptr addr) 260 { 261 uint32 h; 262 AddrHash *ah; 263 AddrEntry *e, **l; 264 Bucket *b; 265 266 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); 267 for(ah=addrhash[h]; ah; ah=ah->next) 268 if(ah->addr == (addr>>AddrHashShift)) 269 goto found; 270 return nil; 271 272 found: 273 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. 274 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { 275 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { 276 *l = e->next; 277 b = e->b; 278 e->next = addrfree; 279 addrfree = e; 280 return b; 281 } 282 } 283 return nil; 284 } 285 286 // Called by malloc to record a profiled block. 287 void 288 runtime·MProf_Malloc(void *p, uintptr size) 289 { 290 int32 nstk; 291 uintptr stk[32]; 292 Bucket *b; 293 294 if(m->nomemprof > 0) 295 return; 296 297 m->nomemprof++; 298 nstk = runtime·callers(1, stk, 32); 299 runtime·lock(&proflock); 300 b = stkbucket(MProf, stk, nstk, true); 301 b->recent_allocs++; 302 b->recent_alloc_bytes += size; 303 setaddrbucket((uintptr)p, b); 304 runtime·unlock(&proflock); 305 m->nomemprof--; 306 } 307 308 // Called when freeing a profiled block. 309 void 310 runtime·MProf_Free(void *p, uintptr size) 311 { 312 Bucket *b; 313 314 if(m->nomemprof > 0) 315 return; 316 317 m->nomemprof++; 318 runtime·lock(&proflock); 319 b = getaddrbucket((uintptr)p); 320 if(b != nil) { 321 b->recent_frees++; 322 b->recent_free_bytes += size; 323 } 324 runtime·unlock(&proflock); 325 m->nomemprof--; 326 } 327 328 int64 runtime·blockprofilerate; // in CPU ticks 329 330 void 331 runtime·SetBlockProfileRate(intgo rate) 332 { 333 runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000)); 334 } 335 336 void 337 runtime·blockevent(int64 cycles, int32 skip) 338 { 339 int32 nstk; 340 int64 rate; 341 uintptr stk[32]; 342 Bucket *b; 343 344 if(cycles <= 0) 345 return; 346 rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate); 347 if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles)) 348 return; 349 350 nstk = runtime·callers(skip, stk, 32); 351 runtime·lock(&proflock); 352 b = stkbucket(BProf, stk, nstk, true); 353 b->count++; 354 b->cycles += cycles; 355 runtime·unlock(&proflock); 356 } 357 358 // Go interface to profile data. (Declared in debug.go) 359 360 // Must match MemProfileRecord in debug.go. 361 typedef struct Record Record; 362 struct Record { 363 int64 alloc_bytes, free_bytes; 364 int64 alloc_objects, free_objects; 365 uintptr stk[32]; 366 }; 367 368 // Write b's data to r. 369 static void 370 record(Record *r, Bucket *b) 371 { 372 int32 i; 373 374 r->alloc_bytes = b->alloc_bytes; 375 r->free_bytes = b->free_bytes; 376 r->alloc_objects = b->allocs; 377 r->free_objects = b->frees; 378 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 379 r->stk[i] = b->stk[i]; 380 for(; i<nelem(r->stk); i++) 381 r->stk[i] = 0; 382 } 383 384 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { 385 Bucket *b; 386 Record *r; 387 bool clear; 388 389 runtime·lock(&proflock); 390 n = 0; 391 clear = true; 392 for(b=mbuckets; b; b=b->allnext) { 393 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 394 n++; 395 if(b->allocs != 0 || b->frees != 0) 396 clear = false; 397 } 398 if(clear) { 399 // Absolutely no data, suggesting that a garbage collection 400 // has not yet happened. In order to allow profiling when 401 // garbage collection is disabled from the beginning of execution, 402 // accumulate stats as if a GC just happened, and recount buckets. 403 MProf_GC(); 404 n = 0; 405 for(b=mbuckets; b; b=b->allnext) 406 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 407 n++; 408 } 409 ok = false; 410 if(n <= p.len) { 411 ok = true; 412 r = (Record*)p.array; 413 for(b=mbuckets; b; b=b->allnext) 414 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 415 record(r++, b); 416 } 417 runtime·unlock(&proflock); 418 } 419 420 // Must match BlockProfileRecord in debug.go. 421 typedef struct BRecord BRecord; 422 struct BRecord { 423 int64 count; 424 int64 cycles; 425 uintptr stk[32]; 426 }; 427 428 func BlockProfile(p Slice) (n int, ok bool) { 429 Bucket *b; 430 BRecord *r; 431 int32 i; 432 433 runtime·lock(&proflock); 434 n = 0; 435 for(b=bbuckets; b; b=b->allnext) 436 n++; 437 ok = false; 438 if(n <= p.len) { 439 ok = true; 440 r = (BRecord*)p.array; 441 for(b=bbuckets; b; b=b->allnext, r++) { 442 r->count = b->count; 443 r->cycles = b->cycles; 444 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 445 r->stk[i] = b->stk[i]; 446 for(; i<nelem(r->stk); i++) 447 r->stk[i] = 0; 448 } 449 } 450 runtime·unlock(&proflock); 451 } 452 453 // Must match StackRecord in debug.go. 454 typedef struct TRecord TRecord; 455 struct TRecord { 456 uintptr stk[32]; 457 }; 458 459 func ThreadCreateProfile(p Slice) (n int, ok bool) { 460 TRecord *r; 461 M *first, *mp; 462 463 first = runtime·atomicloadp(&runtime·allm); 464 n = 0; 465 for(mp=first; mp; mp=mp->alllink) 466 n++; 467 ok = false; 468 if(n <= p.len) { 469 ok = true; 470 r = (TRecord*)p.array; 471 for(mp=first; mp; mp=mp->alllink) { 472 runtime·memmove(r->stk, mp->createstack, sizeof r->stk); 473 r++; 474 } 475 } 476 } 477 478 func Stack(b Slice, all bool) (n int) { 479 byte *pc, *sp; 480 481 sp = runtime·getcallersp(&b); 482 pc = runtime·getcallerpc(&b); 483 484 if(all) { 485 runtime·semacquire(&runtime·worldsema); 486 m->gcing = 1; 487 runtime·stoptheworld(); 488 } 489 490 if(b.len == 0) 491 n = 0; 492 else{ 493 g->writebuf = (byte*)b.array; 494 g->writenbuf = b.len; 495 runtime·goroutineheader(g); 496 runtime·traceback(pc, sp, 0, g); 497 if(all) 498 runtime·tracebackothers(g); 499 n = b.len - g->writenbuf; 500 g->writebuf = nil; 501 g->writenbuf = 0; 502 } 503 504 if(all) { 505 m->gcing = 0; 506 runtime·semrelease(&runtime·worldsema); 507 runtime·starttheworld(); 508 } 509 } 510 511 static void 512 saveg(byte *pc, byte *sp, G *gp, TRecord *r) 513 { 514 int32 n; 515 516 n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil); 517 if(n < nelem(r->stk)) 518 r->stk[n] = 0; 519 } 520 521 func GoroutineProfile(b Slice) (n int, ok bool) { 522 byte *pc, *sp; 523 TRecord *r; 524 G *gp; 525 526 sp = runtime·getcallersp(&b); 527 pc = runtime·getcallerpc(&b); 528 529 ok = false; 530 n = runtime·gcount(); 531 if(n <= b.len) { 532 runtime·semacquire(&runtime·worldsema); 533 m->gcing = 1; 534 runtime·stoptheworld(); 535 536 n = runtime·gcount(); 537 if(n <= b.len) { 538 ok = true; 539 r = (TRecord*)b.array; 540 saveg(pc, sp, g, r++); 541 for(gp = runtime·allg; gp != nil; gp = gp->alllink) { 542 if(gp == g || gp->status == Gdead) 543 continue; 544 saveg(gp->sched.pc, (byte*)gp->sched.sp, gp, r++); 545 } 546 } 547 548 m->gcing = 0; 549 runtime·semrelease(&runtime·worldsema); 550 runtime·starttheworld(); 551 } 552 } 553 554 void 555 runtime·mprofinit(void) 556 { 557 addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash); 558 }