github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/mprof.goc (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc profiling. 6 // Patterned after tcmalloc's algorithms; shorter code. 7 8 package runtime 9 #include "runtime.h" 10 #include "arch_GOARCH.h" 11 #include "malloc.h" 12 #include "defs_GOOS_GOARCH.h" 13 #include "type.h" 14 15 // NOTE(rsc): Everything here could use cas if contention became an issue. 16 static Lock proflock; 17 18 // All memory allocations are local and do not escape outside of the profiler. 19 // The profiler is forbidden from referring to garbage-collected memory. 20 21 enum { MProf, BProf }; // profile types 22 23 // Per-call-stack profiling information. 24 // Lookup by hashing call stack into a linked-list hash table. 25 typedef struct Bucket Bucket; 26 struct Bucket 27 { 28 Bucket *next; // next in hash list 29 Bucket *allnext; // next in list of all mbuckets/bbuckets 30 int32 typ; 31 // Generally unions can break precise GC, 32 // this one is fine because it does not contain pointers. 33 union 34 { 35 struct // typ == MProf 36 { 37 uintptr allocs; 38 uintptr frees; 39 uintptr alloc_bytes; 40 uintptr free_bytes; 41 uintptr recent_allocs; // since last gc 42 uintptr recent_frees; 43 uintptr recent_alloc_bytes; 44 uintptr recent_free_bytes; 45 }; 46 struct // typ == BProf 47 { 48 int64 count; 49 int64 cycles; 50 }; 51 }; 52 uintptr hash; 53 uintptr nstk; 54 uintptr stk[1]; 55 }; 56 enum { 57 BuckHashSize = 179999, 58 }; 59 static Bucket **buckhash; 60 static Bucket *mbuckets; // memory profile buckets 61 static Bucket *bbuckets; // blocking profile buckets 62 static uintptr bucketmem; 63 64 // Return the bucket for stk[0:nstk], allocating new bucket if needed. 65 static Bucket* 66 stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) 67 { 68 int32 i; 69 uintptr h; 70 Bucket *b; 71 72 if(buckhash == nil) { 73 buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys); 74 if(buckhash == nil) 75 runtime·throw("runtime: cannot allocate memory"); 76 } 77 78 // Hash stack. 79 h = 0; 80 for(i=0; i<nstk; i++) { 81 h += stk[i]; 82 h += h<<10; 83 h ^= h>>6; 84 } 85 h += h<<3; 86 h ^= h>>11; 87 88 i = h%BuckHashSize; 89 for(b = buckhash[i]; b; b=b->next) 90 if(b->typ == typ && b->hash == h && b->nstk == nstk && 91 runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) 92 return b; 93 94 if(!alloc) 95 return nil; 96 97 b = runtime·persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys); 98 bucketmem += sizeof *b + nstk*sizeof stk[0]; 99 runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); 100 b->typ = typ; 101 b->hash = h; 102 b->nstk = nstk; 103 b->next = buckhash[i]; 104 buckhash[i] = b; 105 if(typ == MProf) { 106 b->allnext = mbuckets; 107 mbuckets = b; 108 } else { 109 b->allnext = bbuckets; 110 bbuckets = b; 111 } 112 return b; 113 } 114 115 static void 116 MProf_GC(void) 117 { 118 Bucket *b; 119 120 for(b=mbuckets; b; b=b->allnext) { 121 b->allocs += b->recent_allocs; 122 b->frees += b->recent_frees; 123 b->alloc_bytes += b->recent_alloc_bytes; 124 b->free_bytes += b->recent_free_bytes; 125 b->recent_allocs = 0; 126 b->recent_frees = 0; 127 b->recent_alloc_bytes = 0; 128 b->recent_free_bytes = 0; 129 } 130 } 131 132 // Record that a gc just happened: all the 'recent' statistics are now real. 133 void 134 runtime·MProf_GC(void) 135 { 136 runtime·lock(&proflock); 137 MProf_GC(); 138 runtime·unlock(&proflock); 139 } 140 141 // Map from pointer to Bucket* that allocated it. 142 // Three levels: 143 // Linked-list hash table for top N-AddrHashShift bits. 144 // Array index for next AddrDenseBits bits. 145 // Linked list for next AddrHashShift-AddrDenseBits bits. 146 // This is more efficient than using a general map, 147 // because of the typical clustering of the pointer keys. 148 149 typedef struct AddrHash AddrHash; 150 typedef struct AddrEntry AddrEntry; 151 152 enum { 153 AddrHashBits = 12, // good for 4GB of used address space 154 AddrHashShift = 20, // each AddrHash knows about 1MB of address space 155 AddrDenseBits = 8, // good for a profiling rate of 4096 bytes 156 }; 157 158 struct AddrHash 159 { 160 AddrHash *next; // next in top-level hash table linked list 161 uintptr addr; // addr>>20 162 AddrEntry *dense[1<<AddrDenseBits]; 163 }; 164 165 struct AddrEntry 166 { 167 AddrEntry *next; // next in bottom-level linked list 168 uint32 addr; 169 Bucket *b; 170 }; 171 172 static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits] 173 static AddrEntry *addrfree; 174 static uintptr addrmem; 175 176 // Multiplicative hash function: 177 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). 178 // This is a good multiplier as suggested in CLR, Knuth. The hash 179 // value is taken to be the top AddrHashBits bits of the bottom 32 bits 180 // of the multiplied value. 181 enum { 182 HashMultiplier = 2654435769U 183 }; 184 185 // Set the bucket associated with addr to b. 186 static void 187 setaddrbucket(uintptr addr, Bucket *b) 188 { 189 int32 i; 190 uint32 h; 191 AddrHash *ah; 192 AddrEntry *e; 193 194 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); 195 for(ah=addrhash[h]; ah; ah=ah->next) 196 if(ah->addr == (addr>>AddrHashShift)) 197 goto found; 198 199 ah = runtime·persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys); 200 addrmem += sizeof *ah; 201 ah->next = addrhash[h]; 202 ah->addr = addr>>AddrHashShift; 203 addrhash[h] = ah; 204 205 found: 206 if((e = addrfree) == nil) { 207 e = runtime·persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys); 208 addrmem += 64*sizeof *e; 209 for(i=0; i+1<64; i++) 210 e[i].next = &e[i+1]; 211 e[63].next = nil; 212 } 213 addrfree = e->next; 214 e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); 215 e->b = b; 216 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. 217 e->next = ah->dense[h]; 218 ah->dense[h] = e; 219 } 220 221 // Get the bucket associated with addr and clear the association. 222 static Bucket* 223 getaddrbucket(uintptr addr) 224 { 225 uint32 h; 226 AddrHash *ah; 227 AddrEntry *e, **l; 228 Bucket *b; 229 230 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); 231 for(ah=addrhash[h]; ah; ah=ah->next) 232 if(ah->addr == (addr>>AddrHashShift)) 233 goto found; 234 return nil; 235 236 found: 237 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. 238 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { 239 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { 240 *l = e->next; 241 b = e->b; 242 e->next = addrfree; 243 addrfree = e; 244 return b; 245 } 246 } 247 return nil; 248 } 249 250 // Called by malloc to record a profiled block. 251 void 252 runtime·MProf_Malloc(void *p, uintptr size) 253 { 254 int32 nstk; 255 uintptr stk[32]; 256 Bucket *b; 257 258 nstk = runtime·callers(1, stk, 32); 259 runtime·lock(&proflock); 260 b = stkbucket(MProf, stk, nstk, true); 261 b->recent_allocs++; 262 b->recent_alloc_bytes += size; 263 setaddrbucket((uintptr)p, b); 264 runtime·unlock(&proflock); 265 } 266 267 // Called when freeing a profiled block. 268 void 269 runtime·MProf_Free(void *p, uintptr size) 270 { 271 Bucket *b; 272 273 runtime·lock(&proflock); 274 b = getaddrbucket((uintptr)p); 275 if(b != nil) { 276 b->recent_frees++; 277 b->recent_free_bytes += size; 278 } 279 runtime·unlock(&proflock); 280 } 281 282 int64 runtime·blockprofilerate; // in CPU ticks 283 284 void 285 runtime·SetBlockProfileRate(intgo rate) 286 { 287 int64 r; 288 289 if(rate <= 0) 290 r = 0; // disable profiling 291 else { 292 // convert ns to cycles, use float64 to prevent overflow during multiplication 293 r = (float64)rate*runtime·tickspersecond()/(1000*1000*1000); 294 if(r == 0) 295 r = 1; 296 } 297 runtime·atomicstore64((uint64*)&runtime·blockprofilerate, r); 298 } 299 300 void 301 runtime·blockevent(int64 cycles, int32 skip) 302 { 303 int32 nstk; 304 int64 rate; 305 uintptr stk[32]; 306 Bucket *b; 307 308 if(cycles <= 0) 309 return; 310 rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate); 311 if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles)) 312 return; 313 314 nstk = runtime·callers(skip, stk, 32); 315 runtime·lock(&proflock); 316 b = stkbucket(BProf, stk, nstk, true); 317 b->count++; 318 b->cycles += cycles; 319 runtime·unlock(&proflock); 320 } 321 322 // Go interface to profile data. (Declared in debug.go) 323 324 // Must match MemProfileRecord in debug.go. 325 typedef struct Record Record; 326 struct Record { 327 int64 alloc_bytes, free_bytes; 328 int64 alloc_objects, free_objects; 329 uintptr stk[32]; 330 }; 331 332 // Write b's data to r. 333 static void 334 record(Record *r, Bucket *b) 335 { 336 int32 i; 337 338 r->alloc_bytes = b->alloc_bytes; 339 r->free_bytes = b->free_bytes; 340 r->alloc_objects = b->allocs; 341 r->free_objects = b->frees; 342 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 343 r->stk[i] = b->stk[i]; 344 for(; i<nelem(r->stk); i++) 345 r->stk[i] = 0; 346 } 347 348 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { 349 Bucket *b; 350 Record *r; 351 bool clear; 352 353 runtime·lock(&proflock); 354 n = 0; 355 clear = true; 356 for(b=mbuckets; b; b=b->allnext) { 357 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 358 n++; 359 if(b->allocs != 0 || b->frees != 0) 360 clear = false; 361 } 362 if(clear) { 363 // Absolutely no data, suggesting that a garbage collection 364 // has not yet happened. In order to allow profiling when 365 // garbage collection is disabled from the beginning of execution, 366 // accumulate stats as if a GC just happened, and recount buckets. 367 MProf_GC(); 368 n = 0; 369 for(b=mbuckets; b; b=b->allnext) 370 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 371 n++; 372 } 373 ok = false; 374 if(n <= p.len) { 375 ok = true; 376 r = (Record*)p.array; 377 for(b=mbuckets; b; b=b->allnext) 378 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 379 record(r++, b); 380 } 381 runtime·unlock(&proflock); 382 } 383 384 // Must match BlockProfileRecord in debug.go. 385 typedef struct BRecord BRecord; 386 struct BRecord { 387 int64 count; 388 int64 cycles; 389 uintptr stk[32]; 390 }; 391 392 func BlockProfile(p Slice) (n int, ok bool) { 393 Bucket *b; 394 BRecord *r; 395 int32 i; 396 397 runtime·lock(&proflock); 398 n = 0; 399 for(b=bbuckets; b; b=b->allnext) 400 n++; 401 ok = false; 402 if(n <= p.len) { 403 ok = true; 404 r = (BRecord*)p.array; 405 for(b=bbuckets; b; b=b->allnext, r++) { 406 r->count = b->count; 407 r->cycles = b->cycles; 408 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 409 r->stk[i] = b->stk[i]; 410 for(; i<nelem(r->stk); i++) 411 r->stk[i] = 0; 412 } 413 } 414 runtime·unlock(&proflock); 415 } 416 417 // Must match StackRecord in debug.go. 418 typedef struct TRecord TRecord; 419 struct TRecord { 420 uintptr stk[32]; 421 }; 422 423 func ThreadCreateProfile(p Slice) (n int, ok bool) { 424 TRecord *r; 425 M *first, *mp; 426 427 first = runtime·atomicloadp(&runtime·allm); 428 n = 0; 429 for(mp=first; mp; mp=mp->alllink) 430 n++; 431 ok = false; 432 if(n <= p.len) { 433 ok = true; 434 r = (TRecord*)p.array; 435 for(mp=first; mp; mp=mp->alllink) { 436 runtime·memmove(r->stk, mp->createstack, sizeof r->stk); 437 r++; 438 } 439 } 440 } 441 442 func Stack(b Slice, all bool) (n int) { 443 uintptr pc, sp; 444 445 sp = runtime·getcallersp(&b); 446 pc = (uintptr)runtime·getcallerpc(&b); 447 448 if(all) { 449 runtime·semacquire(&runtime·worldsema, false); 450 m->gcing = 1; 451 runtime·stoptheworld(); 452 } 453 454 if(b.len == 0) 455 n = 0; 456 else{ 457 g->writebuf = (byte*)b.array; 458 g->writenbuf = b.len; 459 runtime·goroutineheader(g); 460 runtime·traceback(pc, sp, 0, g); 461 if(all) 462 runtime·tracebackothers(g); 463 n = b.len - g->writenbuf; 464 g->writebuf = nil; 465 g->writenbuf = 0; 466 } 467 468 if(all) { 469 m->gcing = 0; 470 runtime·semrelease(&runtime·worldsema); 471 runtime·starttheworld(); 472 } 473 } 474 475 static void 476 saveg(uintptr pc, uintptr sp, G *gp, TRecord *r) 477 { 478 int32 n; 479 480 n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil, false); 481 if(n < nelem(r->stk)) 482 r->stk[n] = 0; 483 } 484 485 func GoroutineProfile(b Slice) (n int, ok bool) { 486 uintptr pc, sp; 487 TRecord *r; 488 G *gp; 489 490 sp = runtime·getcallersp(&b); 491 pc = (uintptr)runtime·getcallerpc(&b); 492 493 ok = false; 494 n = runtime·gcount(); 495 if(n <= b.len) { 496 runtime·semacquire(&runtime·worldsema, false); 497 m->gcing = 1; 498 runtime·stoptheworld(); 499 500 n = runtime·gcount(); 501 if(n <= b.len) { 502 ok = true; 503 r = (TRecord*)b.array; 504 saveg(pc, sp, g, r++); 505 for(gp = runtime·allg; gp != nil; gp = gp->alllink) { 506 if(gp == g || gp->status == Gdead) 507 continue; 508 saveg(~(uintptr)0, ~(uintptr)0, gp, r++); 509 } 510 } 511 512 m->gcing = 0; 513 runtime·semrelease(&runtime·worldsema); 514 runtime·starttheworld(); 515 } 516 } 517 518 void 519 runtime·mprofinit(void) 520 { 521 addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys); 522 }