github.com/lxt1045/json@v0.0.0-20231013032136-54d6b1d6e525/cpu.txt (about) 1 Total: 2.07s 2 ROUTINE ======================== aeshashbody in /usr/local/go/src/runtime/asm_amd64.s 3 70ms 70ms (flat, cum) 3.38% of Total 4 . . 909:// DX: address to put return value 5 . . 910:TEXT aeshashbody<>(SB),NOSPLIT,$0-0 6 . . 911: // Fill an SSE register with our seeds. 7 . . 912: MOVQ h+8(FP), X0 // 64 bits of per-table hash seed 8 . . 913: PINSRW $4, CX, X0 // 16 bits of length 9 10ms 10ms 914: PSHUFHW $0, X0, X0 // repeat length 4 times total 10 . . 915: MOVO X0, X1 // save unscrambled seed 11 . . 916: PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 12 . . 917: AESENC X0, X0 // scramble seed 13 . . 918: 14 10ms 10ms 919: CMPQ CX, $16 15 . . 920: JB aes0to15 16 . . 921: JE aes16 17 . . 922: CMPQ CX, $32 18 . . 923: JBE aes17to32 19 . . 924: CMPQ CX, $64 20 . . 925: JBE aes33to64 21 . . 926: CMPQ CX, $128 22 . . 927: JBE aes65to128 23 . . 928: JMP aes129plus 24 . . 929: 25 . . 930:aes0to15: 26 . . 931: TESTQ CX, CX 27 . . 932: JE aes0 28 . . 933: 29 . . 934: ADDQ $16, AX 30 . . 935: TESTW $0xff0, AX 31 . . 936: JE endofpage 32 . . 937: 33 . . 938: // 16 bytes loaded at this address won't cross 34 . . 939: // a page boundary, so we can load it directly. 35 . . 940: MOVOU -16(AX), X1 36 . . 941: ADDQ CX, CX 37 . . 942: MOVQ $masks<>(SB), AX 38 . . 943: PAND (AX)(CX*8), X1 39 . . 944:final1: 40 10ms 10ms 945: PXOR X0, X1 // xor data with seed 41 10ms 10ms 946: AESENC X1, X1 // scramble combo 3 times 42 . . 947: AESENC X1, X1 43 20ms 20ms 948: AESENC X1, X1 44 10ms 10ms 949: MOVQ X1, (DX) 45 . . 950: RET 46 . . 951: 47 . . 952:endofpage: 48 . . 953: // address ends in 1111xxxx. Might be up against 49 . . 954: // a page boundary, so load ending at last byte. 50 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.(*TagInfo).Set in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct.go 51 20ms 150ms (flat, cum) 7.25% of Total 52 . . 128: 53 . . 129: Marshalable marshalable `json:"-"` 54 . . 130:} 55 . . 131: 56 . . 132:func (t *TagInfo) Set(pStruct unsafe.Pointer, pIn unsafe.Pointer) { 57 20ms 150ms 133: t.Marshalable.Set(t.StructField, pStruct, pIn) 58 . . 134:} 59 . . 135:func (t *TagInfo) Get(pStruct unsafe.Pointer, pOut unsafe.Pointer) { 60 . . 136: t.Marshalable.Get(t.StructField, pStruct, pOut) 61 . . 137:} 62 . . 138: 63 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.BenchmarkMyUnmarshal.func1 in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct_bench_test.go 64 10ms 1.73s (flat, cum) 83.57% of Total 65 . . 204: } 66 . . 205: 67 . . 206: name := "Unmarshal" 68 . . 207: b.Run(name, func(b *testing.B) { 69 . . 208: b.ReportAllocs() 70 10ms 10ms 209: for i := 0; i < b.N; i++ { 71 . 1.72s 210: Unmarshal(bsJSON, &d) 72 . . 211: } 73 . . 212: b.SetBytes(int64(b.N)) 74 . . 213: b.StopTimer() 75 . . 214: }) 76 . . 215:} 77 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.IsSpace in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 78 110ms 110ms (flat, cum) 5.31% of Total 79 . . 132:} 80 . . 133: 81 . . 134:const charSpace uint32 = 1<<('\t'-1) | 1<<('\n'-1) | 1<<('\v'-1) | 1<<('\f'-1) | 1<<('\r'-1) | 1<<(' '-1) 82 . . 135: 83 . . 136:func IsSpace(b byte) bool { 84 110ms 110ms 137: return b == 0x85 || b == 0xA0 || (charSpace>>(b-1)&0x1 > 0) 85 . . 138: // switch b { 86 . . 139: // case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: 87 . . 140: // return true 88 . . 141: // } 89 . . 142: // return false 90 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.LoadTagNode in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct.go 91 10ms 50ms (flat, cum) 2.42% of Total 92 . . 13:var ( 93 . . 14: cacheStructTagInfo = make(map[string]*tagNode) //map[type]map[string]TagInfo 94 . . 15: cacheLock sync.RWMutex 95 . . 16:) 96 . . 17: 97 10ms 10ms 18:func LoadTagNode(key string) (n *tagNode) { 98 . 20ms 19: cacheLock.RLock() 99 . 20ms 20: n = cacheStructTagInfo[key] 100 . . 21: if n != nil { 101 . . 22: cacheLock.RUnlock() 102 . . 23: return 103 . . 24: } 104 . . 25: cacheLock.RUnlock() 105 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.Unmarshal in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct.go 106 30ms 1.72s (flat, cum) 83.09% of Total 107 . . 225: for typ.Kind() == reflect.Ptr { 108 . . 226: vi.Set(reflect.New(vi.Type().Elem())) 109 . . 227: vi = vi.Elem() 110 . . 228: typ = typ.Elem() 111 . . 229: } 112 30ms 180ms 230: node := LoadTagNode(typ.PkgPath() + "." + typ.Name()) 113 . . 231: tagInfo, err := node.GetTagInfo(typ) 114 . . 232: if err != nil { 115 . . 233: return 116 . . 234: } 117 . . 235: 118 . . 236: defer func() { 119 . . 237: if e := recover(); e != nil { 120 . . 238: err = e.(error) 121 . . 239: err = errors.New(err.Error()) 122 . . 240: return 123 . . 241: } 124 . . 242: }() 125 . . 243: empty := (*emptyInterface)(unsafe.Pointer(&in)) 126 . 1.54s 244: parseNextUnit(bs, empty.word, tagInfo) 127 . . 245: if err != nil { 128 . . 246: return 129 . . 247: } 130 . . 248: 131 . . 249: return 132 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.parseNum in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 133 0 20ms (flat, cum) 0.97% of Total 134 . . 262:} 135 . . 263: 136 . . 264:func parseNum(stream []byte) (raw []byte, i int) { 137 . . 265: for ; i < len(stream); i++ { 138 . . 266: c := stream[i] 139 . 20ms 267: if IsSpace(c) || c == ']' || c == '}' || c == ',' { 140 . . 268: raw, i = stream[:i], i+1 141 . . 269: return 142 . . 270: } 143 . . 271: } 144 . . 272: raw = stream 145 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.parseNextUnit in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 146 220ms 2.09s (flat, cum) 100.97% of Total 147 . . 155: if len(stream) < 2 || stream[0] != '{' { 148 . . 156: panicIncorrectFormat(stream[:]) 149 . . 157: } 150 . . 158: var key []byte 151 . . 159: for i = 1; i < len(stream); { 152 40ms 140ms 160: i += trimSpace(stream[i:]) 153 . . 161: if stream[i] == ']' || stream[i] == '}' { 154 . . 162: i++ 155 . . 163: break // 此 struct 结束语法分析 156 . . 164: } 157 . . 165: switch stream[i] { 158 . . 166: default: 159 . . 167: if (stream[i] >= '0' && stream[i] <= '9') || stream[i] == '-' { 160 . . 168: if len(key) <= 0 { 161 . . 169: panicIncorrectFormat(stream[i:]) 162 . . 170: } 163 . 20ms 171: raw, size := parseNum(stream[i:]) 164 . . 172: i += size 165 10ms 90ms 173: if tag, ok := tis[string(key)]; ok && pObj != nil { 166 . 210ms 174: setNumberField(pObj, tag, raw, Number) 167 . . 175: } 168 . . 176: key = nil 169 . . 177: } else { 170 . . 178: panicIncorrectFormat(stream[i:]) 171 . . 179: } 172 . . 180: case '{': // obj 173 10ms 10ms 181: if len(key) <= 0 { 174 . . 182: panicIncorrectFormat(stream[i:]) 175 . . 183: } 176 10ms 50ms 184: if tag, ok := tis[string(key)]; ok { 177 20ms 570ms 185: i += setObjField(pObj, tag, stream[i:]) 178 . . 186: } else { 179 . . 187: i += parseNextUnit(stream[i:], nil, tag.Children) 180 . . 188: } 181 . . 189: key = nil 182 . . 190: case '[': // obj 183 . . 191: if len(key) <= 0 { 184 . . 192: panicIncorrectFormat(stream[i:]) 185 . . 193: } 186 . . 194: if tag, ok := tis[string(key)]; ok { 187 . . 195: i += setObjField(pObj, tag, stream[i:]) 188 . . 196: } else { 189 . . 197: i += parseNextUnit(stream[i:], nil, tag.Children) 190 . . 198: } 191 . . 199: key = nil 192 10ms 10ms 200: case 'n': 193 . . 201: if len(key) <= 0 { 194 . . 202: panicIncorrectFormat(stream[i:]) 195 . . 203: } 196 . . 204: if stream[i+1] != 'u' || stream[i+2] != 'l' || stream[i+3] != 'l' { 197 . . 205: panicIncorrectFormat(stream[i:]) 198 . . 206: } 199 . . 207: i += 4 200 . . 208: key = nil 201 . . 209: case 't': 202 . . 210: if len(key) <= 0 { 203 . . 211: panicIncorrectFormat(stream[i:]) 204 . . 212: } 205 . . 213: if stream[i+1] != 'r' || stream[i+2] != 'u' || stream[i+3] != 'e' { 206 . . 214: panicIncorrectFormat(stream[i:]) 207 . . 215: } 208 . . 216: i += 4 209 . . 217: if tag, ok := tis[string(key)]; ok && pObj != nil { 210 . . 218: setBoolField(pObj, tag, true) 211 . . 219: } 212 . . 220: key = nil 213 . . 221: case 'f': 214 . . 222: if len(key) <= 0 { 215 . . 223: panicIncorrectFormat(stream[i:]) 216 . . 224: } 217 . . 225: if stream[i+1] != 'a' || stream[i+2] != 'l' || stream[i+3] != 's' || stream[i+4] != 'e' { 218 . . 226: panicIncorrectFormat(stream[i:]) 219 . . 227: } 220 . . 228: i += 5 221 . . 229: if tag, ok := tis[string(key)]; ok && pObj != nil { 222 . . 230: setBoolField(pObj, tag, false) 223 . . 231: } 224 . . 232: key = nil 225 . . 233: case '"': 226 . . 234: if len(key) <= 0 { 227 30ms 50ms 235: i += trimSpace(stream[i:]) 228 . . 236: size := 0 229 20ms 190ms 237: key, size = parseStr(stream[i:]) //先解析key 再解析value 230 . . 238: i += size 231 20ms 60ms 239: i += trimSpace(stream[i:]) 232 . . 240: if stream[i] != ':' { 233 . . 241: panicIncorrectFormat(stream[i:]) 234 . . 242: } 235 . . 243: i++ 236 20ms 20ms 244: i += trimSpace(stream[i:]) 237 . . 245: continue 238 . . 246: } else { 239 . 80ms 247: raw, size := parseStr(stream[i:]) 240 . . 248: i += size 241 . 160ms 249: if tag, ok := tis[string(key)]; ok && pObj != nil { 242 10ms 360ms 250: setStringField(pObj, tag, raw) 243 . . 251: } 244 . . 252: key = nil 245 . . 253: } 246 . . 254: } 247 10ms 60ms 255: i += trimSpace(stream[i:]) 248 10ms 10ms 256: if stream[i] == ',' { 249 . . 257: i++ 250 . . 258: continue 251 . . 259: } 252 . . 260: } 253 . . 261: return 254 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.parseStr in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 255 250ms 250ms (flat, cum) 12.08% of Total 256 . . 273: return 257 . . 274:} 258 . . 275: 259 . . 276://stream: "fgshw1321"... 260 . . 277:func parseStr(stream []byte) (raw []byte, i int) { 261 70ms 70ms 278: for i = 1; i < len(stream); { 262 20ms 20ms 279: if stream[i] == '"' { 263 10ms 10ms 280: if len(raw) <= 0 { 264 10ms 10ms 281: raw = stream[1:i] 265 . . 282: } 266 . . 283: return raw, i + 1 267 . . 284: } 268 10ms 10ms 285: if stream[i] == '\\' { 269 . . 286: word, wordSize := unescapeStr(stream[i:]) 270 100ms 100ms 287: if len(raw) <= 0 { 271 . . 288: raw = stream[1:i] 272 . . 289: } 273 . . 290: raw = append(raw, word...) 274 . . 291: i += wordSize 275 . . 292: continue 276 . . 293: } 277 . . 294: if len(raw) > 0 { 278 . . 295: raw = append(raw, stream[i]) 279 . . 296: } 280 30ms 30ms 297: i++ 281 . . 298: } 282 . . 299: return 283 . . 300:} 284 . . 301: 285 . . 302:// unescape unescapes a string 286 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/marshalable_func.go 287 20ms 60ms (flat, cum) 2.90% of Total 288 . . 95:} 289 . . 96: 290 . . 97:func setField(field reflect.StructField, pStruct unsafe.Pointer, pIn unsafe.Pointer) { 291 . . 98: pValue := unsafe.Pointer(uintptr(pStruct) + uintptr(field.Offset)) 292 . . 99: typ := field.Type 293 . 10ms 100: if typ.Kind() != reflect.Ptr { 294 . . 101: from := SliceHeader{ 295 . . 102: Data: uintptr(pIn), 296 . 20ms 103: Len: int(typ.Size()), 297 . . 104: Cap: int(typ.Size()), 298 . . 105: } 299 . . 106: to := SliceHeader{ 300 . . 107: Data: uintptr(pValue), 301 . . 108: Len: int(typ.Size()), 302 10ms 10ms 109: Cap: int(typ.Size()), 303 . . 110: } 304 10ms 20ms 111: copy(*(*[]byte)(unsafe.Pointer(&to)), *(*[]byte)(unsafe.Pointer(&from))) 305 . . 112: return 306 . . 113: } 307 . . 114: setPointerField(field, pStruct, pIn) 308 . . 115: return 309 . . 116:} 310 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setFieldString in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/marshalable_func.go 311 30ms 30ms (flat, cum) 1.45% of Total 312 . . 127: *(*unsafe.Pointer)(pValue) = *(*unsafe.Pointer)(pIn) 313 . . 128: return 314 . . 129:} 315 . . 130: 316 . . 131:func setFieldString(field reflect.StructField, pStruct unsafe.Pointer, pIn unsafe.Pointer) { 317 10ms 10ms 132: pValue := unsafe.Pointer(uintptr(pStruct) + uintptr(field.Offset)) 318 . . 133: typ := field.Type 319 . . 134: if typ.Kind() != reflect.Ptr { 320 . . 135: *(*string)(pValue) = *(*string)(pIn) 321 20ms 20ms 136: return 322 . . 137: } 323 . . 138: setPointerField(field, pStruct, pIn) 324 . . 139: return 325 . . 140:} 326 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setNumberField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 327 0 210ms (flat, cum) 10.14% of Total 328 . . 57:} 329 . . 58:func setNumberField(pObj unsafe.Pointer, tag *TagInfo, raw []byte, typ Type) (i int) { 330 . . 59: if tag.Kind < reflect.Int || tag.Kind > reflect.Float64 { 331 . . 60: panicIncorrectType(False, tag) 332 . . 61: } 333 . 120ms 62: num, err := strconv.ParseFloat(bytesString(raw), 64) 334 . . 63: if err != nil { 335 . . 64: panicIncorrectFormat([]byte("error:" + err.Error() + ", stream:" + string(raw))) 336 . . 65: } 337 . . 66: switch tag.Kind { 338 . . 67: case reflect.Int8: 339 . . 68: i8 := int8(num) 340 . . 69: tag.Set(pObj, unsafe.Pointer(&i8)) 341 . . 70: case reflect.Uint8: 342 . . 71: u8 := int8(num) 343 . . 72: tag.Set(pObj, unsafe.Pointer(&u8)) 344 . . 73: case reflect.Uint16: 345 . . 74: u := uint16(num) 346 . . 75: tag.Set(pObj, unsafe.Pointer(&u)) 347 . . 76: case reflect.Int16: 348 . . 77: i := int16(num) 349 . . 78: tag.Set(pObj, unsafe.Pointer(&i)) 350 . . 79: case reflect.Uint32: 351 . . 80: u := uint32(num) 352 . . 81: tag.Set(pObj, unsafe.Pointer(&u)) 353 . . 82: case reflect.Int32: 354 . . 83: i := int32(num) 355 . . 84: tag.Set(pObj, unsafe.Pointer(&i)) 356 . . 85: case reflect.Uint64: 357 . . 86: u := uint64(num) 358 . . 87: tag.Set(pObj, unsafe.Pointer(&u)) 359 . . 88: case reflect.Int64: 360 . 10ms 89: i := int64(num) 361 . 40ms 90: tag.Set(pObj, unsafe.Pointer(&i)) 362 . . 91: case reflect.Int: 363 . . 92: u := int(num) 364 . 40ms 93: tag.Set(pObj, unsafe.Pointer(&u)) 365 . . 94: case reflect.Uint: 366 . . 95: i := uint(num) 367 . . 96: tag.Set(pObj, unsafe.Pointer(&i)) 368 . . 97: case reflect.Float32: 369 . . 98: u := float32(num) 370 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setObjField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 371 0 550ms (flat, cum) 26.57% of Total 372 . . 124:func setObjField(pObj unsafe.Pointer, tag *TagInfo, raw []byte) (i int) { 373 . . 125: if tag.Kind != reflect.Struct { 374 . . 126: panicIncorrectType(False, tag) 375 . . 127: } 376 . . 128: pField := unsafe.Pointer(uintptr(pObj) + uintptr(tag.StructField.Offset)) 377 . 550ms 129: size := parseNextUnit(raw, pField, tag.Children) 378 . . 130: i += size 379 . . 131: return 380 . . 132:} 381 . . 133: 382 . . 134:const charSpace uint32 = 1<<('\t'-1) | 1<<('\n'-1) | 1<<('\v'-1) | 1<<('\f'-1) | 1<<('\r'-1) | 1<<(' '-1) 383 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setStringField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 384 30ms 350ms (flat, cum) 16.91% of Total 385 . . 110: panicIncorrectType(typ, tag) 386 . . 111: } 387 . . 112: 388 . . 113: return 389 . . 114:} 390 30ms 280ms 115:func setStringField(pObj unsafe.Pointer, tag *TagInfo, raw []byte) { 391 . . 116: if tag.Kind != reflect.String { 392 . . 117: panicIncorrectType(False, tag) 393 . . 118: } 394 . . 119: // str := bytesString(raw) 395 . . 120: // tag.Set(pObj, unsafe.Pointer(&str)) 396 . 70ms 121: tag.Set(pObj, unsafe.Pointer(&raw)) 397 . . 122: return 398 . . 123:} 399 . . 124:func setObjField(pObj unsafe.Pointer, tag *TagInfo, raw []byte) (i int) { 400 . . 125: if tag.Kind != reflect.Struct { 401 . . 126: panicIncorrectType(False, tag) 402 ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.trimSpace in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go 403 120ms 210ms (flat, cum) 10.14% of Total 404 . . 140: // return true 405 . . 141: // } 406 . . 142: // return false 407 . . 143:} 408 . . 144:func trimSpace(stream []byte) (i int) { 409 100ms 100ms 145: for i = range stream { 410 . 90ms 146: if !IsSpace(stream[i]) { 411 . . 147: break 412 . . 148: } 413 . . 149: } 414 20ms 20ms 150: return 415 . . 151:} 416 . . 152: 417 . . 153://解析 obj: {}, 或 [] 418 . . 154:func parseNextUnit(stream []byte, pObj unsafe.Pointer, tis map[string]*TagInfo) (i int) { 419 . . 155: if len(stream) < 2 || stream[0] != '{' { 420 ROUTINE ======================== memeqbody in /usr/local/go/src/internal/bytealg/equal_amd64.s 421 40ms 40ms (flat, cum) 1.93% of Total 422 . . 98: // 8 bytes at a time using 64-bit register 423 . . 99:bigloop: 424 . . 100: CMPQ BX, $8 425 . . 101: JBE leftover 426 . . 102: MOVQ (SI), CX 427 10ms 10ms 103: MOVQ (DI), DX 428 . . 104: ADDQ $8, SI 429 . . 105: ADDQ $8, DI 430 . . 106: SUBQ $8, BX 431 . . 107: CMPQ CX, DX 432 . . 108: JEQ bigloop 433 . . 109: MOVB $0, (AX) 434 . . 110: RET 435 . . 111: 436 . . 112: // remaining 0-8 bytes 437 . . 113:leftover: 438 10ms 10ms 114: MOVQ -8(SI)(BX*1), CX 439 . . 115: MOVQ -8(DI)(BX*1), DX 440 . . 116: CMPQ CX, DX 441 . . 117: SETEQ (AX) 442 . . 118: RET 443 . . 119: 444 . . 120:small: 445 . . 121: CMPQ BX, $0 446 . . 122: JEQ equal 447 . . 123: 448 . . 124: LEAQ 0(BX*8), CX 449 . . 125: NEGQ CX 450 . . 126: 451 . . 127: CMPB SI, $0xf8 452 . . 128: JA si_high 453 . . 129: 454 . . 130: // load at SI won't cross a page boundary. 455 . . 131: MOVQ (SI), SI 456 . . 132: JMP si_finish 457 . . 133:si_high: 458 . . 134: // address ends in 11111xxx. Load up to bytes we want, move to correct position. 459 . . 135: MOVQ -8(SI)(BX*1), SI 460 . . 136: SHRQ CX, SI 461 . . 137:si_finish: 462 . . 138: 463 . . 139: // same for DI. 464 . . 140: CMPB DI, $0xf8 465 . . 141: JA di_high 466 . . 142: MOVQ (DI), DI 467 . . 143: JMP di_finish 468 . . 144:di_high: 469 . . 145: MOVQ -8(DI)(BX*1), DI 470 . . 146: SHRQ CX, DI 471 . . 147:di_finish: 472 . . 148: 473 10ms 10ms 149: SUBQ SI, DI 474 . . 150: SHLQ CX, DI 475 . . 151:equal: 476 10ms 10ms 152: SETEQ (AX) 477 . . 153: RET 478 . . 154: 479 ROUTINE ======================== reflect.(*rtype).Kind in /usr/local/go/src/reflect/type.go 480 10ms 10ms (flat, cum) 0.48% of Total 481 . . 775: 482 . . 776:func (t *rtype) Align() int { return int(t.align) } 483 . . 777: 484 . . 778:func (t *rtype) FieldAlign() int { return int(t.fieldAlign) } 485 . . 779: 486 10ms 10ms 780:func (t *rtype) Kind() Kind { return Kind(t.kind & kindMask) } 487 . . 781: 488 . . 782:func (t *rtype) pointers() bool { return t.ptrdata != 0 } 489 . . 783: 490 . . 784:func (t *rtype) common() *rtype { return t } 491 . . 785: 492 ROUTINE ======================== reflect.(*rtype).PkgPath in /usr/local/go/src/reflect/type.go 493 10ms 20ms (flat, cum) 0.97% of Total 494 . . 857: } 495 . . 858: ut := t.uncommon() 496 . . 859: if ut == nil { 497 . . 860: return "" 498 . . 861: } 499 10ms 20ms 862: return t.nameOff(ut.pkgPath).name() 500 . . 863:} 501 . . 864: 502 . . 865:func (t *rtype) hasName() bool { 503 . . 866: return t.tflag&tflagNamed != 0 504 . . 867:} 505 ROUTINE ======================== reflect.(*rtype).Size in /usr/local/go/src/reflect/type.go 506 20ms 20ms (flat, cum) 0.97% of Total 507 . . 758: return s[1:] 508 . . 759: } 509 . . 760: return s 510 . . 761:} 511 . . 762: 512 20ms 20ms 763:func (t *rtype) Size() uintptr { return t.size } 513 . . 764: 514 . . 765:func (t *rtype) Bits() int { 515 . . 766: if t == nil { 516 . . 767: panic("reflect: Bits of nil Type") 517 . . 768: } 518 ROUTINE ======================== reflect.(*rtype).nameOff in /usr/local/go/src/reflect/type.go 519 10ms 10ms (flat, cum) 0.48% of Total 520 . . 681:type nameOff int32 // offset to a name 521 . . 682:type typeOff int32 // offset to an *rtype 522 . . 683:type textOff int32 // offset from top of text section 523 . . 684: 524 . . 685:func (t *rtype) nameOff(off nameOff) name { 525 10ms 10ms 686: return name{(*byte)(resolveNameOff(unsafe.Pointer(t), int32(off)))} 526 . . 687:} 527 . . 688: 528 . . 689:func (t *rtype) typeOff(off typeOff) *rtype { 529 . . 690: return (*rtype)(resolveTypeOff(unsafe.Pointer(t), int32(off))) 530 . . 691:} 531 ROUTINE ======================== runtime.(*addrRanges).removeGreaterEqual in /usr/local/go/src/runtime/mranges.go 532 0 10ms (flat, cum) 0.48% of Total 533 . . 346: removed += r.size() 534 . . 347: r = r.removeGreaterEqual(addr) 535 . . 348: if r.size() == 0 { 536 . . 349: pivot-- 537 . . 350: } else { 538 . 10ms 351: removed -= r.size() 539 . . 352: a.ranges[pivot-1] = r 540 . . 353: } 541 . . 354: } 542 . . 355: a.ranges = a.ranges[:pivot] 543 . . 356: a.totalBytes -= removed 544 ROUTINE ======================== runtime.(*gcControllerState).enlistWorker in /usr/local/go/src/runtime/mgc.go 545 0 30ms (flat, cum) 1.45% of Total 546 . . 705: } 547 . . 706: p := allp[id] 548 . . 707: if p.status != _Prunning { 549 . . 708: continue 550 . . 709: } 551 . 30ms 710: if preemptone(p) { 552 . . 711: return 553 . . 712: } 554 . . 713: } 555 . . 714:} 556 . . 715: 557 ROUTINE ======================== runtime.(*gcWork).balance in /usr/local/go/src/runtime/mgcwork.go 558 0 40ms (flat, cum) 1.93% of Total 559 . . 290: if wbuf := w.wbuf2; wbuf.nobj != 0 { 560 . . 291: putfull(wbuf) 561 . . 292: w.flushedWork = true 562 . . 293: w.wbuf2 = getempty() 563 . . 294: } else if wbuf := w.wbuf1; wbuf.nobj > 4 { 564 . 10ms 295: w.wbuf1 = handoff(wbuf) 565 . . 296: w.flushedWork = true // handoff did putfull 566 . . 297: } else { 567 . . 298: return 568 . . 299: } 569 . . 300: // We flushed a buffer to the full list, so wake a worker. 570 . . 301: if gcphase == _GCmark { 571 . 30ms 302: gcController.enlistWorker() 572 . . 303: } 573 . . 304:} 574 . . 305: 575 . . 306:// empty reports whether w has no mark work available. 576 . . 307://go:nowritebarrierrec 577 ROUTINE ======================== runtime.(*lfstack).push in /usr/local/go/src/runtime/lfstack.go 578 10ms 10ms (flat, cum) 0.48% of Total 579 . . 30: throw("lfstack.push") 580 . . 31: } 581 . . 32: for { 582 . . 33: old := atomic.Load64((*uint64)(head)) 583 . . 34: node.next = old 584 10ms 10ms 35: if atomic.Cas64((*uint64)(head), old, new) { 585 . . 36: break 586 . . 37: } 587 . . 38: } 588 . . 39:} 589 . . 40: 590 ROUTINE ======================== runtime.(*mcache).nextFree in /usr/local/go/src/runtime/malloc.go 591 0 80ms (flat, cum) 3.86% of Total 592 . . 877: // The span is full. 593 . . 878: if uintptr(s.allocCount) != s.nelems { 594 . . 879: println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) 595 . . 880: throw("s.allocCount != s.nelems && freeIndex == s.nelems") 596 . . 881: } 597 . 80ms 882: c.refill(spc) 598 . . 883: shouldhelpgc = true 599 . . 884: s = c.alloc[spc] 600 . . 885: 601 . . 886: freeIndex = s.nextFreeIndex() 602 . . 887: } 603 ROUTINE ======================== runtime.(*mcache).refill in /usr/local/go/src/runtime/mcache.go 604 0 80ms (flat, cum) 3.86% of Total 605 . . 157: } 606 . . 158: mheap_.central[spc].mcentral.uncacheSpan(s) 607 . . 159: } 608 . . 160: 609 . . 161: // Get a new cached span from the central lists. 610 . 80ms 162: s = mheap_.central[spc].mcentral.cacheSpan() 611 . . 163: if s == nil { 612 . . 164: throw("out of memory") 613 . . 165: } 614 . . 166: 615 . . 167: if uintptr(s.allocCount) == s.nelems { 616 ROUTINE ======================== runtime.(*mcentral).cacheSpan in /usr/local/go/src/runtime/mcentral.go 617 0 80ms (flat, cum) 3.86% of Total 618 . . 153: traceGCSweepDone() 619 . . 154: traceDone = true 620 . . 155: } 621 . . 156: 622 . . 157: // We failed to get a span from the mcentral so get one from mheap. 623 . 80ms 158: s = c.grow() 624 . . 159: if s == nil { 625 . . 160: return nil 626 . . 161: } 627 . . 162: 628 . . 163: // At this point s is a span that should have free slots. 629 ROUTINE ======================== runtime.(*mcentral).grow in /usr/local/go/src/runtime/mcentral.go 630 0 80ms (flat, cum) 3.86% of Total 631 . . 227:// grow allocates a new empty span from the heap and initializes it for c's size class. 632 . . 228:func (c *mcentral) grow() *mspan { 633 . . 229: npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) 634 . . 230: size := uintptr(class_to_size[c.spanclass.sizeclass()]) 635 . . 231: 636 . 70ms 232: s := mheap_.alloc(npages, c.spanclass, true) 637 . . 233: if s == nil { 638 . . 234: return nil 639 . . 235: } 640 . . 236: 641 . . 237: // Use division by multiplication and shifts to quickly compute: 642 . . 238: // n := (npages << _PageShift) / size 643 . . 239: n := (npages << _PageShift) >> s.divShift * uintptr(s.divMul) >> s.divShift2 644 . . 240: s.limit = s.base() + size*n 645 . 10ms 241: heapBitsForAddr(s.base()).initSpan(s) 646 . . 242: return s 647 . . 243:} 648 ROUTINE ======================== runtime.(*mheap).alloc in /usr/local/go/src/runtime/mheap.go 649 0 70ms (flat, cum) 3.38% of Total 650 . . 899:func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan { 651 . . 900: // Don't do any operations that lock the heap on the G stack. 652 . . 901: // It might trigger stack growth, and the stack growth code needs 653 . . 902: // to be able to allocate heap. 654 . . 903: var s *mspan 655 . 30ms 904: systemstack(func() { 656 . . 905: // To prevent excessive heap growth, before allocating n pages 657 . . 906: // we need to sweep and reclaim at least n pages. 658 . . 907: if h.sweepdone == 0 { 659 . . 908: h.reclaim(npages) 660 . . 909: } 661 . . 910: s = h.allocSpan(npages, spanAllocHeap, spanclass) 662 . . 911: }) 663 . . 912: 664 . . 913: if s != nil { 665 . . 914: if needzero && s.needzero != 0 { 666 . 40ms 915: memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift) 667 . . 916: } 668 . . 917: s.needzero = 0 669 . . 918: } 670 . . 919: return s 671 . . 920:} 672 ROUTINE ======================== runtime.(*mheap).alloc.func1 in /usr/local/go/src/runtime/mheap.go 673 0 150ms (flat, cum) 7.25% of Total 674 . . 905: // To prevent excessive heap growth, before allocating n pages 675 . . 906: // we need to sweep and reclaim at least n pages. 676 . . 907: if h.sweepdone == 0 { 677 . . 908: h.reclaim(npages) 678 . . 909: } 679 . 150ms 910: s = h.allocSpan(npages, spanAllocHeap, spanclass) 680 . . 911: }) 681 . . 912: 682 . . 913: if s != nil { 683 . . 914: if needzero && s.needzero != 0 { 684 . . 915: memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift) 685 ROUTINE ======================== runtime.(*mheap).allocSpan in /usr/local/go/src/runtime/mheap.go 686 0 150ms (flat, cum) 7.25% of Total 687 . . 1205: unlock(&h.lock) 688 . . 1206: 689 . . 1207:HaveSpan: 690 . . 1208: // At this point, both s != nil and base != 0, and the heap 691 . . 1209: // lock is no longer held. Initialize the span. 692 . 30ms 1210: s.init(base, npages) 693 . . 1211: if h.allocNeedsZero(base, npages) { 694 . . 1212: s.needzero = 1 695 . . 1213: } 696 . . 1214: nbytes := npages * pageSize 697 . . 1215: if typ.manual() { 698 . . 1216: s.manualFreeList = 0 699 . . 1217: s.nelems = 0 700 . . 1218: s.limit = s.base() + s.npages*pageSize 701 . . 1219: s.state.set(mSpanManual) 702 . . 1220: } else { 703 . . 1221: // We must set span properties before the span is published anywhere 704 . . 1222: // since we're not holding the heap lock. 705 . . 1223: s.spanclass = spanclass 706 . . 1224: if sizeclass := spanclass.sizeclass(); sizeclass == 0 { 707 . . 1225: s.elemsize = nbytes 708 . . 1226: s.nelems = 1 709 . . 1227: 710 . . 1228: s.divShift = 0 711 . . 1229: s.divMul = 0 712 . . 1230: s.divShift2 = 0 713 . . 1231: s.baseMask = 0 714 . . 1232: } else { 715 . . 1233: s.elemsize = uintptr(class_to_size[sizeclass]) 716 . . 1234: s.nelems = nbytes / s.elemsize 717 . . 1235: 718 . . 1236: m := &class_to_divmagic[sizeclass] 719 . . 1237: s.divShift = m.shift 720 . . 1238: s.divMul = m.mul 721 . . 1239: s.divShift2 = m.shift2 722 . . 1240: s.baseMask = m.baseMask 723 . . 1241: } 724 . . 1242: 725 . . 1243: // Initialize mark and allocation structures. 726 . . 1244: s.freeindex = 0 727 . . 1245: s.allocCache = ^uint64(0) // all 1s indicating all free. 728 . . 1246: s.gcmarkBits = newMarkBits(s.nelems) 729 . . 1247: s.allocBits = newAllocBits(s.nelems) 730 . . 1248: 731 . . 1249: // It's safe to access h.sweepgen without the heap lock because it's 732 . . 1250: // only ever updated with the world stopped and we run on the 733 . . 1251: // systemstack which blocks a STW transition. 734 . . 1252: atomic.PoolStore(&s.sweepgen, h.sweepgen) 735 . . 1253: 736 . . 1254: // Now that the span is filled in, set its state. This 737 . . 1255: // is a publication barrier for the other fields in 738 . . 1256: // the span. While valid pointers into this span 739 . . 1257: // should never be visible until the span is returned, 740 . . 1258: // if the garbage collector finds an invalid pointer, 741 . . 1259: // access to the span may race with initialization of 742 . . 1260: // the span. We resolve this race by atomically 743 . . 1261: // setting the state after the span is fully 744 . . 1262: // initialized, and atomically checking the state in 745 . . 1263: // any situation where a pointer is suspect. 746 . . 1264: s.state.set(mSpanInUse) 747 . . 1265: } 748 . . 1266: 749 . . 1267: // Commit and account for any scavenged memory that the span now owns. 750 . . 1268: if scav != 0 { 751 . . 1269: // sysUsed all the pages that are actually available 752 . . 1270: // in the span since some of them might be scavenged. 753 . 120ms 1271: sysUsed(unsafe.Pointer(base), nbytes) 754 . . 1272: atomic.Xadd64(&memstats.heap_released, -int64(scav)) 755 . . 1273: } 756 . . 1274: // Update stats. 757 . . 1275: if typ == spanAllocHeap { 758 . . 1276: atomic.Xadd64(&memstats.heap_inuse, int64(nbytes)) 759 ROUTINE ======================== runtime.(*mspan).init in /usr/local/go/src/runtime/mheap.go 760 30ms 30ms (flat, cum) 1.45% of Total 761 . . 1522:} 762 . . 1523: 763 . . 1524:// Initialize a new span with the given start and npages. 764 . . 1525:func (span *mspan) init(base uintptr, npages uintptr) { 765 . . 1526: // span is *not* zeroed. 766 30ms 30ms 1527: span.next = nil 767 . . 1528: span.prev = nil 768 . . 1529: span.list = nil 769 . . 1530: span.startAddr = base 770 . . 1531: span.npages = npages 771 . . 1532: span.allocCount = 0 772 ROUTINE ======================== runtime.(*pageAlloc).scavenge in /usr/local/go/src/runtime/mgcscavenge.go 773 0 10ms (flat, cum) 0.48% of Total 774 . . 404: gen uint32 775 . . 405: ) 776 . . 406: released := uintptr(0) 777 . . 407: for released < nbytes { 778 . . 408: if addrs.size() == 0 { 779 . 10ms 409: if addrs, gen = p.scavengeReserve(); addrs.size() == 0 { 780 . . 410: break 781 . . 411: } 782 . . 412: } 783 . . 413: r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock) 784 . . 414: released += r 785 ROUTINE ======================== runtime.(*pageAlloc).scavengeReserve in /usr/local/go/src/runtime/mgcscavenge.go 786 0 10ms (flat, cum) 0.48% of Total 787 . . 515: // the scavenger, so align down, potentially extending 788 . . 516: // the range. 789 . . 517: newBase := alignDown(r.base.addr(), pallocChunkBytes) 790 . . 518: 791 . . 519: // Remove from inUse however much extra we just pulled out. 792 . 10ms 520: p.scav.inUse.removeGreaterEqual(newBase) 793 . . 521: r.base = offAddr{newBase} 794 . . 522: return r, p.scav.gen 795 . . 523:} 796 . . 524: 797 . . 525:// scavengeUnreserve returns an unscavenged portion of a range that was 798 ROUTINE ======================== runtime.add in /usr/local/go/src/runtime/stubs.go 799 10ms 10ms (flat, cum) 0.48% of Total 800 . . 7:import "unsafe" 801 . . 8: 802 . . 9:// Should be a built-in for unsafe.Pointer? 803 . . 10://go:nosplit 804 . . 11:func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { 805 10ms 10ms 12: return unsafe.Pointer(uintptr(p) + x) 806 . . 13:} 807 . . 14: 808 . . 15:// getg returns the pointer to the current g. 809 . . 16:// The compiler rewrites calls to this function into instructions 810 . . 17:// that fetch the g directly (from TLS or from the dedicated register). 811 ROUTINE ======================== runtime.addrRange.size in /usr/local/go/src/runtime/mranges.go 812 10ms 10ms (flat, cum) 0.48% of Total 813 . . 42: if !a.base.lessThan(a.limit) { 814 . . 43: return 0 815 . . 44: } 816 . . 45: // Subtraction is safe because limit and base must be in the same 817 . . 46: // segment of the address space. 818 10ms 10ms 47: return a.limit.diff(a.base) 819 . . 48:} 820 . . 49: 821 . . 50:// contains returns whether or not the range contains a given address. 822 . . 51:func (a addrRange) contains(addr uintptr) bool { 823 . . 52: return a.base.lessEqual(offAddr{addr}) && (offAddr{addr}).lessThan(a.limit) 824 ROUTINE ======================== runtime.bgscavenge in /usr/local/go/src/runtime/mgcscavenge.go 825 0 10ms (flat, cum) 0.48% of Total 826 . . 287: // Time in scavenging critical section. 827 . . 288: crit := float64(0) 828 . . 289: 829 . . 290: // Run on the system stack since we grab the heap lock, 830 . . 291: // and a stack growth with the heap lock means a deadlock. 831 . 10ms 292: systemstack(func() { 832 . . 293: lock(&mheap_.lock) 833 . . 294: 834 . . 295: // If background scavenging is disabled or if there's no work to do just park. 835 . . 296: retained, goal := heapRetained(), mheap_.scavengeGoal 836 . . 297: if retained <= goal { 837 ROUTINE ======================== runtime.bgscavenge.func2 in /usr/local/go/src/runtime/mgcscavenge.go 838 0 10ms (flat, cum) 0.48% of Total 839 . . 299: return 840 . . 300: } 841 . . 301: 842 . . 302: // Scavenge one page, and measure the amount of time spent scavenging. 843 . . 303: start := nanotime() 844 . 10ms 304: released = mheap_.pages.scavenge(physPageSize, true) 845 . . 305: mheap_.pages.scav.released += released 846 . . 306: crit = float64(nanotime() - start) 847 . . 307: 848 . . 308: unlock(&mheap_.lock) 849 . . 309: }) 850 ROUTINE ======================== runtime.bucketMask in /usr/local/go/src/runtime/map.go 851 10ms 20ms (flat, cum) 0.97% of Total 852 . . 185: return uintptr(1) << (b & (sys.PtrSize*8 - 1)) 853 . . 186:} 854 . . 187: 855 . . 188:// bucketMask returns 1<<b - 1, optimized for code generation. 856 . . 189:func bucketMask(b uint8) uintptr { 857 10ms 20ms 190: return bucketShift(b) - 1 858 . . 191:} 859 . . 192: 860 . . 193:// tophash calculates the tophash value for hash. 861 . . 194:func tophash(hash uintptr) uint8 { 862 . . 195: top := uint8(hash >> (sys.PtrSize*8 - 8)) 863 ROUTINE ======================== runtime.bucketShift in /usr/local/go/src/runtime/map.go 864 10ms 10ms (flat, cum) 0.48% of Total 865 . . 180:} 866 . . 181: 867 . . 182:// bucketShift returns 1<<b, optimized for code generation. 868 . . 183:func bucketShift(b uint8) uintptr { 869 . . 184: // Masking the shift amount allows overflow checks to be elided. 870 10ms 10ms 185: return uintptr(1) << (b & (sys.PtrSize*8 - 1)) 871 . . 186:} 872 . . 187: 873 . . 188:// bucketMask returns 1<<b - 1, optimized for code generation. 874 . . 189:func bucketMask(b uint8) uintptr { 875 . . 190: return bucketShift(b) - 1 876 ROUTINE ======================== runtime.checkTimers in /usr/local/go/src/runtime/proc.go 877 0 10ms (flat, cum) 0.48% of Total 878 . . 3244: // No timers to run or adjust. 879 . . 3245: return now, 0, false 880 . . 3246: } 881 . . 3247: 882 . . 3248: if now == 0 { 883 . 10ms 3249: now = nanotime() 884 . . 3250: } 885 . . 3251: if now < next { 886 . . 3252: // Next timer is not ready to run, but keep going 887 . . 3253: // if we would clear deleted timers. 888 . . 3254: // This corresponds to the condition below where 889 ROUTINE ======================== runtime.concatstring3 in /usr/local/go/src/runtime/string.go 890 10ms 80ms (flat, cum) 3.86% of Total 891 . . 58:func concatstring2(buf *tmpBuf, a [2]string) string { 892 . . 59: return concatstrings(buf, a[:]) 893 . . 60:} 894 . . 61: 895 . . 62:func concatstring3(buf *tmpBuf, a [3]string) string { 896 10ms 80ms 63: return concatstrings(buf, a[:]) 897 . . 64:} 898 . . 65: 899 . . 66:func concatstring4(buf *tmpBuf, a [4]string) string { 900 . . 67: return concatstrings(buf, a[:]) 901 . . 68:} 902 ROUTINE ======================== runtime.concatstrings in /usr/local/go/src/runtime/string.go 903 10ms 70ms (flat, cum) 3.38% of Total 904 . . 45: // or our result does not escape the calling frame (buf != nil), 905 . . 46: // then we can return that string directly. 906 . . 47: if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) { 907 . . 48: return a[idx] 908 . . 49: } 909 . 40ms 50: s, b := rawstringtmp(buf, l) 910 . . 51: for _, x := range a { 911 10ms 30ms 52: copy(b, x) 912 . . 53: b = b[len(x):] 913 . . 54: } 914 . . 55: return s 915 . . 56:} 916 . . 57: 917 ROUTINE ======================== runtime.duffcopy in /usr/local/go/src/runtime/duff_amd64.s 918 40ms 40ms (flat, cum) 1.93% of Total 919 . . 398: ADDQ $16, SI 920 . . 399: MOVUPS X0, (DI) 921 . . 400: ADDQ $16, DI 922 . . 401: 923 . . 402: MOVUPS (SI), X0 924 10ms 10ms 403: ADDQ $16, SI 925 10ms 10ms 404: MOVUPS X0, (DI) 926 . . 405: ADDQ $16, DI 927 . . 406: 928 . . 407: MOVUPS (SI), X0 929 . . 408: ADDQ $16, SI 930 . . 409: MOVUPS X0, (DI) 931 . . 410: ADDQ $16, DI 932 . . 411: 933 . . 412: MOVUPS (SI), X0 934 . . 413: ADDQ $16, SI 935 . . 414: MOVUPS X0, (DI) 936 10ms 10ms 415: ADDQ $16, DI 937 . . 416: 938 . . 417: MOVUPS (SI), X0 939 . . 418: ADDQ $16, SI 940 . . 419: MOVUPS X0, (DI) 941 . . 420: ADDQ $16, DI 942 . . 421: 943 . . 422: MOVUPS (SI), X0 944 . . 423: ADDQ $16, SI 945 . . 424: MOVUPS X0, (DI) 946 10ms 10ms 425: ADDQ $16, DI 947 . . 426: 948 . . 427: RET 949 ROUTINE ======================== runtime.findrunnable in /usr/local/go/src/runtime/proc.go 950 0 60ms (flat, cum) 2.90% of Total 951 . . 2695: // is probably a waste of time. 952 . . 2696: // 953 . . 2697: // timerpMask tells us whether the P may have timers at all. If it 954 . . 2698: // can't, no need to check at all. 955 . . 2699: if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 956 . 10ms 2700: tnow, w, ran := checkTimers(p2, now) 957 . . 2701: now = tnow 958 . . 2702: if w != 0 && (pollUntil == 0 || w < pollUntil) { 959 . . 2703: pollUntil = w 960 . . 2704: } 961 . . 2705: if ran { 962 . . 2706: // Running the timers may have 963 . . 2707: // made an arbitrary number of G's 964 . . 2708: // ready and added them to this P's 965 . . 2709: // local run queue. That invalidates 966 . . 2710: // the assumption of runqsteal 967 . . 2711: // that is always has room to add 968 . . 2712: // stolen G's. So check now if there 969 . . 2713: // is a local G to run. 970 . . 2714: if gp, inheritTime := runqget(_p_); gp != nil { 971 . . 2715: return gp, inheritTime 972 . . 2716: } 973 . . 2717: ranTimer = true 974 . . 2718: } 975 . . 2719: } 976 . . 2720: 977 . . 2721: // Don't bother to attempt to steal if p2 is idle. 978 . . 2722: if !idlepMask.read(enum.position()) { 979 . . 2723: if gp := runqsteal(_p_, p2, stealTimersOrRunNextG); gp != nil { 980 . . 2724: return gp, false 981 . . 2725: } 982 . . 2726: } 983 . . 2727: } 984 . . 2728: } 985 . . 2729: if ranTimer { 986 . . 2730: // Running a timer may have made some goroutine ready. 987 . . 2731: goto top 988 . . 2732: } 989 . . 2733: 990 . . 2734:stop: 991 . . 2735: 992 . . 2736: // We have nothing to do. If we're in the GC mark phase, can 993 . . 2737: // safely scan and blacken objects, and have work to do, run 994 . . 2738: // idle-time marking rather than give up the P. 995 . . 2739: if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) { 996 . . 2740: node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 997 . . 2741: if node != nil { 998 . . 2742: _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode 999 . . 2743: gp := node.gp.ptr() 1000 . . 2744: casgstatus(gp, _Gwaiting, _Grunnable) 1001 . . 2745: if trace.enabled { 1002 . . 2746: traceGoUnpark(gp, 0) 1003 . . 2747: } 1004 . . 2748: return gp, false 1005 . . 2749: } 1006 . . 2750: } 1007 . . 2751: 1008 . . 2752: delta := int64(-1) 1009 . . 2753: if pollUntil != 0 { 1010 . . 2754: // checkTimers ensures that polluntil > now. 1011 . . 2755: delta = pollUntil - now 1012 . . 2756: } 1013 . . 2757: 1014 . . 2758: // wasm only: 1015 . . 2759: // If a callback returned and no other goroutine is awake, 1016 . . 2760: // then wake event handler goroutine which pauses execution 1017 . . 2761: // until a callback was triggered. 1018 . . 2762: gp, otherReady := beforeIdle(delta) 1019 . . 2763: if gp != nil { 1020 . . 2764: casgstatus(gp, _Gwaiting, _Grunnable) 1021 . . 2765: if trace.enabled { 1022 . . 2766: traceGoUnpark(gp, 0) 1023 . . 2767: } 1024 . . 2768: return gp, false 1025 . . 2769: } 1026 . . 2770: if otherReady { 1027 . . 2771: goto top 1028 . . 2772: } 1029 . . 2773: 1030 . . 2774: // Before we drop our P, make a snapshot of the allp slice, 1031 . . 2775: // which can change underfoot once we no longer block 1032 . . 2776: // safe-points. We don't need to snapshot the contents because 1033 . . 2777: // everything up to cap(allp) is immutable. 1034 . . 2778: allpSnapshot := allp 1035 . . 2779: // Also snapshot masks. Value changes are OK, but we can't allow 1036 . . 2780: // len to change out from under us. 1037 . . 2781: idlepMaskSnapshot := idlepMask 1038 . . 2782: timerpMaskSnapshot := timerpMask 1039 . . 2783: 1040 . . 2784: // return P and block 1041 . . 2785: lock(&sched.lock) 1042 . . 2786: if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 { 1043 . . 2787: unlock(&sched.lock) 1044 . . 2788: goto top 1045 . . 2789: } 1046 . . 2790: if sched.runqsize != 0 { 1047 . . 2791: gp := globrunqget(_p_, 0) 1048 . . 2792: unlock(&sched.lock) 1049 . . 2793: return gp, false 1050 . . 2794: } 1051 . . 2795: if releasep() != _p_ { 1052 . . 2796: throw("findrunnable: wrong p") 1053 . . 2797: } 1054 . . 2798: pidleput(_p_) 1055 . . 2799: unlock(&sched.lock) 1056 . . 2800: 1057 . . 2801: // Delicate dance: thread transitions from spinning to non-spinning state, 1058 . . 2802: // potentially concurrently with submission of new goroutines. We must 1059 . . 2803: // drop nmspinning first and then check all per-P queues again (with 1060 . . 2804: // #StoreLoad memory barrier in between). If we do it the other way around, 1061 . . 2805: // another thread can submit a goroutine after we've checked all run queues 1062 . . 2806: // but before we drop nmspinning; as a result nobody will unpark a thread 1063 . . 2807: // to run the goroutine. 1064 . . 2808: // If we discover new work below, we need to restore m.spinning as a signal 1065 . . 2809: // for resetspinning to unpark a new worker thread (because there can be more 1066 . . 2810: // than one starving goroutine). However, if after discovering new work 1067 . . 2811: // we also observe no idle Ps, it is OK to just park the current thread: 1068 . . 2812: // the system is fully loaded so no spinning threads are required. 1069 . . 2813: // Also see "Worker thread parking/unparking" comment at the top of the file. 1070 . . 2814: wasSpinning := _g_.m.spinning 1071 . . 2815: if _g_.m.spinning { 1072 . . 2816: _g_.m.spinning = false 1073 . . 2817: if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 { 1074 . . 2818: throw("findrunnable: negative nmspinning") 1075 . . 2819: } 1076 . . 2820: } 1077 . . 2821: 1078 . . 2822: // check all runqueues once again 1079 . . 2823: for id, _p_ := range allpSnapshot { 1080 . . 2824: if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(_p_) { 1081 . . 2825: lock(&sched.lock) 1082 . . 2826: _p_ = pidleget() 1083 . . 2827: unlock(&sched.lock) 1084 . . 2828: if _p_ != nil { 1085 . . 2829: acquirep(_p_) 1086 . . 2830: if wasSpinning { 1087 . . 2831: _g_.m.spinning = true 1088 . . 2832: atomic.Xadd(&sched.nmspinning, 1) 1089 . . 2833: } 1090 . . 2834: goto top 1091 . . 2835: } 1092 . . 2836: break 1093 . . 2837: } 1094 . . 2838: } 1095 . . 2839: 1096 . . 2840: // Similar to above, check for timer creation or expiry concurrently with 1097 . . 2841: // transitioning from spinning to non-spinning. Note that we cannot use 1098 . . 2842: // checkTimers here because it calls adjusttimers which may need to allocate 1099 . . 2843: // memory, and that isn't allowed when we don't have an active P. 1100 . . 2844: for id, _p_ := range allpSnapshot { 1101 . . 2845: if timerpMaskSnapshot.read(uint32(id)) { 1102 . . 2846: w := nobarrierWakeTime(_p_) 1103 . . 2847: if w != 0 && (pollUntil == 0 || w < pollUntil) { 1104 . . 2848: pollUntil = w 1105 . . 2849: } 1106 . . 2850: } 1107 . . 2851: } 1108 . . 2852: if pollUntil != 0 { 1109 . . 2853: if now == 0 { 1110 . . 2854: now = nanotime() 1111 . . 2855: } 1112 . . 2856: delta = pollUntil - now 1113 . . 2857: if delta < 0 { 1114 . . 2858: delta = 0 1115 . . 2859: } 1116 . . 2860: } 1117 . . 2861: 1118 . . 2862: // Check for idle-priority GC work again. 1119 . . 2863: // 1120 . . 2864: // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 1121 . . 2865: // must check again after acquiring a P. 1122 . . 2866: if atomic.Load(&gcBlackenEnabled) != 0 && gcMarkWorkAvailable(nil) { 1123 . . 2867: // Work is available; we can start an idle GC worker only if 1124 . . 2868: // there is an available P and available worker G. 1125 . . 2869: // 1126 . . 2870: // We can attempt to acquire these in either order. Workers are 1127 . . 2871: // almost always available (see comment in findRunnableGCWorker 1128 . . 2872: // for the one case there may be none). Since we're slightly 1129 . . 2873: // less likely to find a P, check for that first. 1130 . . 2874: lock(&sched.lock) 1131 . . 2875: var node *gcBgMarkWorkerNode 1132 . . 2876: _p_ = pidleget() 1133 . . 2877: if _p_ != nil { 1134 . . 2878: // Now that we own a P, gcBlackenEnabled can't change 1135 . . 2879: // (as it requires STW). 1136 . . 2880: if gcBlackenEnabled != 0 { 1137 . . 2881: node = (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 1138 . . 2882: if node == nil { 1139 . . 2883: pidleput(_p_) 1140 . . 2884: _p_ = nil 1141 . . 2885: } 1142 . . 2886: } else { 1143 . . 2887: pidleput(_p_) 1144 . . 2888: _p_ = nil 1145 . . 2889: } 1146 . . 2890: } 1147 . . 2891: unlock(&sched.lock) 1148 . . 2892: if _p_ != nil { 1149 . . 2893: acquirep(_p_) 1150 . . 2894: if wasSpinning { 1151 . . 2895: _g_.m.spinning = true 1152 . . 2896: atomic.Xadd(&sched.nmspinning, 1) 1153 . . 2897: } 1154 . . 2898: 1155 . . 2899: // Run the idle worker. 1156 . . 2900: _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode 1157 . . 2901: gp := node.gp.ptr() 1158 . . 2902: casgstatus(gp, _Gwaiting, _Grunnable) 1159 . . 2903: if trace.enabled { 1160 . . 2904: traceGoUnpark(gp, 0) 1161 . . 2905: } 1162 . . 2906: return gp, false 1163 . . 2907: } 1164 . . 2908: } 1165 . . 2909: 1166 . . 2910: // poll network 1167 . . 2911: if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 { 1168 . . 2912: atomic.Store64(&sched.pollUntil, uint64(pollUntil)) 1169 . . 2913: if _g_.m.p != 0 { 1170 . . 2914: throw("findrunnable: netpoll with p") 1171 . . 2915: } 1172 . . 2916: if _g_.m.spinning { 1173 . . 2917: throw("findrunnable: netpoll with spinning") 1174 . . 2918: } 1175 . . 2919: if faketime != 0 { 1176 . . 2920: // When using fake time, just poll. 1177 . . 2921: delta = 0 1178 . . 2922: } 1179 . 10ms 2923: list := netpoll(delta) // block until new work is available 1180 . . 2924: atomic.Store64(&sched.pollUntil, 0) 1181 . . 2925: atomic.Store64(&sched.lastpoll, uint64(nanotime())) 1182 . . 2926: if faketime != 0 && list.empty() { 1183 . . 2927: // Using fake time and nothing is ready; stop M. 1184 . . 2928: // When all M's stop, checkdead will call timejump. 1185 . . 2929: stopm() 1186 . . 2930: goto top 1187 . . 2931: } 1188 . . 2932: lock(&sched.lock) 1189 . . 2933: _p_ = pidleget() 1190 . . 2934: unlock(&sched.lock) 1191 . . 2935: if _p_ == nil { 1192 . . 2936: injectglist(&list) 1193 . . 2937: } else { 1194 . . 2938: acquirep(_p_) 1195 . . 2939: if !list.empty() { 1196 . . 2940: gp := list.pop() 1197 . . 2941: injectglist(&list) 1198 . . 2942: casgstatus(gp, _Gwaiting, _Grunnable) 1199 . . 2943: if trace.enabled { 1200 . . 2944: traceGoUnpark(gp, 0) 1201 . . 2945: } 1202 . . 2946: return gp, false 1203 . . 2947: } 1204 . . 2948: if wasSpinning { 1205 . . 2949: _g_.m.spinning = true 1206 . . 2950: atomic.Xadd(&sched.nmspinning, 1) 1207 . . 2951: } 1208 . . 2952: goto top 1209 . . 2953: } 1210 . . 2954: } else if pollUntil != 0 && netpollinited() { 1211 . . 2955: pollerPollUntil := int64(atomic.Load64(&sched.pollUntil)) 1212 . . 2956: if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 1213 . . 2957: netpollBreak() 1214 . . 2958: } 1215 . . 2959: } 1216 . 40ms 2960: stopm() 1217 . . 2961: goto top 1218 . . 2962:} 1219 . . 2963: 1220 . . 2964:// pollWork reports whether there is non-background work this P could 1221 . . 2965:// be doing. This is a fairly lightweight check to be used for 1222 ROUTINE ======================== runtime.gcAssistAlloc.func1 in /usr/local/go/src/runtime/mgcmark.go 1223 0 10ms (flat, cum) 0.48% of Total 1224 . . 444: traceGCMarkAssistStart() 1225 . . 445: } 1226 . . 446: 1227 . . 447: // Perform assist work 1228 . . 448: systemstack(func() { 1229 . 10ms 449: gcAssistAlloc1(gp, scanWork) 1230 . . 450: // The user stack may have moved, so this can't touch 1231 . . 451: // anything on it until it returns from systemstack. 1232 . . 452: }) 1233 . . 453: 1234 . . 454: completed := gp.param != nil 1235 ROUTINE ======================== runtime.gcAssistAlloc1 in /usr/local/go/src/runtime/mgcmark.go 1236 0 10ms (flat, cum) 0.48% of Total 1237 . . 533: gp.waitreason = waitReasonGCAssistMarking 1238 . . 534: 1239 . . 535: // drain own cached work first in the hopes that it 1240 . . 536: // will be more cache friendly. 1241 . . 537: gcw := &getg().m.p.ptr().gcw 1242 . 10ms 538: workDone := gcDrainN(gcw, scanWork) 1243 . . 539: 1244 . . 540: casgstatus(gp, _Gwaiting, _Grunning) 1245 . . 541: 1246 . . 542: // Record that we did this much scan work. 1247 . . 543: // 1248 ROUTINE ======================== runtime.gcBgMarkWorker in /usr/local/go/src/runtime/mgc.go 1249 0 30ms (flat, cum) 1.45% of Total 1250 . . 1962: if decnwait == work.nproc { 1251 . . 1963: println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1252 . . 1964: throw("work.nwait was > work.nproc") 1253 . . 1965: } 1254 . . 1966: 1255 . 30ms 1967: systemstack(func() { 1256 . . 1968: // Mark our goroutine preemptible so its stack 1257 . . 1969: // can be scanned. This lets two mark workers 1258 . . 1970: // scan each other (otherwise, they would 1259 . . 1971: // deadlock). We must not modify anything on 1260 . . 1972: // the G stack. However, stack shrinking is 1261 ROUTINE ======================== runtime.gcBgMarkWorker.func1 in /usr/local/go/src/runtime/mgc.go 1262 0 10ms (flat, cum) 0.48% of Total 1263 . . 1929: // after parking the G. 1264 . . 1930: releasem(mp) 1265 . . 1931: } 1266 . . 1932: 1267 . . 1933: // Release this G to the pool. 1268 . 10ms 1934: gcBgMarkWorkerPool.push(&node.node) 1269 . . 1935: // Note that at this point, the G may immediately be 1270 . . 1936: // rescheduled and may be running. 1271 . . 1937: return true 1272 . . 1938: }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0) 1273 . . 1939: 1274 ROUTINE ======================== runtime.gcBgMarkWorker.func2 in /usr/local/go/src/runtime/mgc.go 1275 0 50ms (flat, cum) 2.42% of Total 1276 . . 1975: casgstatus(gp, _Grunning, _Gwaiting) 1277 . . 1976: switch pp.gcMarkWorkerMode { 1278 . . 1977: default: 1279 . . 1978: throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1280 . . 1979: case gcMarkWorkerDedicatedMode: 1281 . 10ms 1980: gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) 1282 . . 1981: if gp.preempt { 1283 . . 1982: // We were preempted. This is 1284 . . 1983: // a useful signal to kick 1285 . . 1984: // everything out of the run 1286 . . 1985: // queue so it can run 1287 . . 1986: // somewhere else. 1288 . . 1987: lock(&sched.lock) 1289 . . 1988: for { 1290 . . 1989: gp, _ := runqget(pp) 1291 . . 1990: if gp == nil { 1292 . . 1991: break 1293 . . 1992: } 1294 . . 1993: globrunqput(gp) 1295 . . 1994: } 1296 . . 1995: unlock(&sched.lock) 1297 . . 1996: } 1298 . . 1997: // Go back to draining, this time 1299 . . 1998: // without preemption. 1300 . 30ms 1999: gcDrain(&pp.gcw, gcDrainFlushBgCredit) 1301 . . 2000: case gcMarkWorkerFractionalMode: 1302 . . 2001: gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) 1303 . . 2002: case gcMarkWorkerIdleMode: 1304 . 10ms 2003: gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) 1305 . . 2004: } 1306 . . 2005: casgstatus(gp, _Gwaiting, _Grunning) 1307 . . 2006: }) 1308 . . 2007: 1309 . . 2008: // Account for time. 1310 ROUTINE ======================== runtime.gcDrain in /usr/local/go/src/runtime/mgcmark.go 1311 0 50ms (flat, cum) 2.42% of Total 1312 . . 1009: for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) { 1313 . . 1010: job := atomic.Xadd(&work.markrootNext, +1) - 1 1314 . . 1011: if job >= work.markrootJobs { 1315 . . 1012: break 1316 . . 1013: } 1317 . 10ms 1014: markroot(gcw, job) 1318 . . 1015: if check != nil && check() { 1319 . . 1016: goto done 1320 . . 1017: } 1321 . . 1018: } 1322 . . 1019: } 1323 . . 1020: 1324 . . 1021: // Drain heap marking jobs. 1325 . . 1022: // Stop if we're preemptible or if someone wants to STW. 1326 . . 1023: for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) { 1327 . . 1024: // Try to keep work available on the global queue. We used to 1328 . . 1025: // check if there were waiting workers, but it's better to 1329 . . 1026: // just keep work available than to make workers wait. In the 1330 . . 1027: // worst case, we'll do O(log(_WorkbufSize)) unnecessary 1331 . . 1028: // balances. 1332 . . 1029: if work.full == 0 { 1333 . 30ms 1030: gcw.balance() 1334 . . 1031: } 1335 . . 1032: 1336 . . 1033: b := gcw.tryGetFast() 1337 . . 1034: if b == 0 { 1338 . . 1035: b = gcw.tryGet() 1339 . . 1036: if b == 0 { 1340 . . 1037: // Flush the write barrier 1341 . . 1038: // buffer; this may create 1342 . . 1039: // more work. 1343 . . 1040: wbBufFlush(nil, 0) 1344 . . 1041: b = gcw.tryGet() 1345 . . 1042: } 1346 . . 1043: } 1347 . . 1044: if b == 0 { 1348 . . 1045: // Unable to get work. 1349 . . 1046: break 1350 . . 1047: } 1351 . 10ms 1048: scanobject(b, gcw) 1352 . . 1049: 1353 . . 1050: // Flush background scan work credit to the global 1354 . . 1051: // account if we've accumulated enough locally so 1355 . . 1052: // mutator assists can draw on it. 1356 . . 1053: if gcw.scanWork >= gcCreditSlack { 1357 ROUTINE ======================== runtime.gcDrainN in /usr/local/go/src/runtime/mgcmark.go 1358 0 10ms (flat, cum) 0.48% of Total 1359 . . 1103: 1360 . . 1104: gp := getg().m.curg 1361 . . 1105: for !gp.preempt && workFlushed+gcw.scanWork < scanWork { 1362 . . 1106: // See gcDrain comment. 1363 . . 1107: if work.full == 0 { 1364 . 10ms 1108: gcw.balance() 1365 . . 1109: } 1366 . . 1110: 1367 . . 1111: // This might be a good place to add prefetch code... 1368 . . 1112: // if(wbuf.nobj > 4) { 1369 . . 1113: // PREFETCH(wbuf->obj[wbuf.nobj - 3]; 1370 ROUTINE ======================== runtime.gcStart.func2 in /usr/local/go/src/runtime/mgc.go 1371 0 70ms (flat, cum) 3.38% of Total 1372 . . 1438: // returns, so make sure we're not preemptible. 1373 . . 1439: mp = acquirem() 1374 . . 1440: 1375 . . 1441: // Concurrent mark. 1376 . . 1442: systemstack(func() { 1377 . 70ms 1443: now = startTheWorldWithSema(trace.enabled) 1378 . . 1444: work.pauseNS += now - work.pauseStart 1379 . . 1445: work.tMark = now 1380 . . 1446: memstats.gcPauseDist.record(now - work.pauseStart) 1381 . . 1447: }) 1382 . . 1448: 1383 ROUTINE ======================== runtime.gentraceback in /usr/local/go/src/runtime/traceback.go 1384 0 10ms (flat, cum) 0.48% of Total 1385 . . 317: frame.continpc = 0 1386 . . 318: } 1387 . . 319: } 1388 . . 320: 1389 . . 321: if callback != nil { 1390 . 10ms 322: if !callback((*stkframe)(noescape(unsafe.Pointer(&frame))), v) { 1391 . . 323: return n 1392 . . 324: } 1393 . . 325: } 1394 . . 326: 1395 . . 327: if pcbuf != nil { 1396 ROUTINE ======================== runtime.handoff in /usr/local/go/src/runtime/mgcwork.go 1397 0 10ms (flat, cum) 0.48% of Total 1398 . . 431: // Make new buffer with half of b's pointers. 1399 . . 432: b1 := getempty() 1400 . . 433: n := b.nobj / 2 1401 . . 434: b.nobj -= n 1402 . . 435: b1.nobj = n 1403 . 10ms 436: memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0])) 1404 . . 437: 1405 . . 438: // Put b on full list - let first half of b get stolen. 1406 . . 439: putfull(b) 1407 . . 440: return b1 1408 . . 441:} 1409 ROUTINE ======================== runtime.heapBits.initSpan in /usr/local/go/src/runtime/mbitmap.go 1410 0 10ms (flat, cum) 0.48% of Total 1411 . . 760: for i := uintptr(0); i < nbyte; i++ { 1412 . . 761: *bitp = bitPointerAll | bitScanAll 1413 . . 762: bitp = add1(bitp) 1414 . . 763: } 1415 . . 764: } else { 1416 . 10ms 765: memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte) 1417 . . 766: } 1418 . . 767: h = hNext 1419 . . 768: nw -= anw 1420 . . 769: } 1421 . . 770:} 1422 ROUTINE ======================== runtime.heapBitsForAddr in /usr/local/go/src/runtime/mbitmap.go 1423 20ms 20ms (flat, cum) 0.97% of Total 1424 . . 306:// nosplit because it is used during write barriers and must not be preempted. 1425 . . 307://go:nosplit 1426 . . 308:func heapBitsForAddr(addr uintptr) (h heapBits) { 1427 . . 309: // 2 bits per word, 4 pairs per byte, and a mask is hard coded. 1428 . . 310: arena := arenaIndex(addr) 1429 10ms 10ms 311: ha := mheap_.arenas[arena.l1()][arena.l2()] 1430 . . 312: // The compiler uses a load for nil checking ha, but in this 1431 . . 313: // case we'll almost never hit that cache line again, so it 1432 . . 314: // makes more sense to do a value check. 1433 . . 315: if ha == nil { 1434 . . 316: // addr is not in the heap. Return nil heapBits, which 1435 . . 317: // we expect to crash in the caller. 1436 . . 318: return 1437 . . 319: } 1438 . . 320: h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes] 1439 10ms 10ms 321: h.shift = uint32((addr / sys.PtrSize) & 3) 1440 . . 322: h.arena = uint32(arena) 1441 . . 323: h.last = &ha.bitmap[len(ha.bitmap)-1] 1442 . . 324: return 1443 . . 325:} 1444 . . 326: 1445 ROUTINE ======================== runtime.heapBitsSetType in /usr/local/go/src/runtime/mbitmap.go 1446 40ms 60ms (flat, cum) 2.90% of Total 1447 . . 844: } 1448 . . 845: } 1449 . . 846: return 1450 . . 847: } 1451 . . 848: 1452 . 20ms 849: h := heapBitsForAddr(x) 1453 . . 850: ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) 1454 . . 851: 1455 . . 852: // 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits. 1456 . . 853: // Therefore, these objects share a heap bitmap byte with the objects next to them. 1457 . . 854: // These are called out as a special case primarily so the code below can assume all 1458 . . 855: // objects are at least 4 words long and that their bitmaps start either at the beginning 1459 . . 856: // of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively). 1460 . . 857: 1461 . . 858: if size == 2*sys.PtrSize { 1462 . . 859: if typ.size == sys.PtrSize { 1463 . . 860: // We're allocating a block big enough to hold two pointers. 1464 . . 861: // On 64-bit, that means the actual object must be two pointers, 1465 . . 862: // or else we'd have used the one-pointer-sized block. 1466 . . 863: // On 32-bit, however, this is the 8-byte block, the smallest one. 1467 . . 864: // So it could be that we're allocating one pointer and this was 1468 . . 865: // just the smallest block available. Distinguish by checking dataSize. 1469 . . 866: // (In general the number of instances of typ being allocated is 1470 . . 867: // dataSize/typ.size.) 1471 . . 868: if sys.PtrSize == 4 && dataSize == sys.PtrSize { 1472 . . 869: // 1 pointer object. On 32-bit machines clear the bit for the 1473 . . 870: // unused second word. 1474 . . 871: *h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift 1475 . . 872: *h.bitp |= (bitPointer | bitScan) << h.shift 1476 . . 873: } else { 1477 . . 874: // 2-element array of pointer. 1478 . . 875: *h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift 1479 . . 876: } 1480 . . 877: return 1481 . . 878: } 1482 . . 879: // Otherwise typ.size must be 2*sys.PtrSize, 1483 . . 880: // and typ.kind&kindGCProg == 0. 1484 . . 881: if doubleCheck { 1485 . . 882: if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { 1486 . . 883: print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") 1487 . . 884: throw("heapBitsSetType") 1488 . . 885: } 1489 . . 886: } 1490 . . 887: b := uint32(*ptrmask) 1491 . . 888: hb := b & 3 1492 . . 889: hb |= bitScanAll & ((bitScan << (typ.ptrdata / sys.PtrSize)) - 1) 1493 . . 890: // Clear the bits for this object so we can set the 1494 . . 891: // appropriate ones. 1495 . . 892: *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift 1496 . . 893: *h.bitp |= uint8(hb << h.shift) 1497 . . 894: return 1498 10ms 10ms 895: } else if size == 3*sys.PtrSize { 1499 . . 896: b := uint8(*ptrmask) 1500 . . 897: if doubleCheck { 1501 . . 898: if b == 0 { 1502 . . 899: println("runtime: invalid type ", typ.string()) 1503 . . 900: throw("heapBitsSetType: called with non-pointer type") 1504 . . 901: } 1505 . . 902: if sys.PtrSize != 8 { 1506 . . 903: throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit") 1507 . . 904: } 1508 . . 905: if typ.kind&kindGCProg != 0 { 1509 . . 906: throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class") 1510 . . 907: } 1511 . . 908: if typ.size == 2*sys.PtrSize { 1512 . . 909: print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, "\n") 1513 . . 910: throw("heapBitsSetType: inconsistent object sizes") 1514 . . 911: } 1515 . . 912: } 1516 . . 913: if typ.size == sys.PtrSize { 1517 . . 914: // The type contains a pointer otherwise heapBitsSetType wouldn't have been called. 1518 . . 915: // Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1. 1519 . . 916: if doubleCheck && *typ.gcdata != 1 { 1520 . . 917: print("runtime: heapBitsSetType size=", size, " typ.size=", typ.size, "but *typ.gcdata", *typ.gcdata, "\n") 1521 . . 918: throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class") 1522 . . 919: } 1523 . . 920: // 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111. 1524 . . 921: b = 7 1525 . . 922: } 1526 . . 923: 1527 . . 924: hb := b & 7 1528 . . 925: // Set bitScan bits for all pointers. 1529 . . 926: hb |= hb << wordsPerBitmapByte 1530 . . 927: // First bitScan bit is always set since the type contains pointers. 1531 . . 928: hb |= bitScan 1532 . . 929: // Second bitScan bit needs to also be set if the third bitScan bit is set. 1533 . . 930: hb |= hb & (bitScan << (2 * heapBitsShift)) >> 1 1534 . . 931: 1535 . . 932: // For h.shift > 1 heap bits cross a byte boundary and need to be written part 1536 . . 933: // to h.bitp and part to the next h.bitp. 1537 . . 934: switch h.shift { 1538 . . 935: case 0: 1539 20ms 20ms 936: *h.bitp &^= mask3 << 0 1540 10ms 10ms 937: *h.bitp |= hb << 0 1541 . . 938: case 1: 1542 . . 939: *h.bitp &^= mask3 << 1 1543 . . 940: *h.bitp |= hb << 1 1544 . . 941: case 2: 1545 . . 942: *h.bitp &^= mask2 << 2 1546 ROUTINE ======================== runtime.kevent in /usr/local/go/src/runtime/sys_darwin.go 1547 90ms 90ms (flat, cum) 4.35% of Total 1548 . . 344:func kqueue_trampoline() 1549 . . 345: 1550 . . 346://go:nosplit 1551 . . 347://go:cgo_unsafe_args 1552 . . 348:func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32 { 1553 90ms 90ms 349: return libcCall(unsafe.Pointer(funcPC(kevent_trampoline)), unsafe.Pointer(&kq)) 1554 . . 350:} 1555 . . 351:func kevent_trampoline() 1556 . . 352: 1557 . . 353://go:nosplit 1558 . . 354://go:cgo_unsafe_args 1559 ROUTINE ======================== runtime.mPark in /usr/local/go/src/runtime/proc.go 1560 0 40ms (flat, cum) 1.93% of Total 1561 . . 1335:// only way that m's should park themselves. 1562 . . 1336://go:nosplit 1563 . . 1337:func mPark() { 1564 . . 1338: g := getg() 1565 . . 1339: for { 1566 . 40ms 1340: notesleep(&g.m.park) 1567 . . 1341: // Note, because of signal handling by this parked m, 1568 . . 1342: // a preemptive mDoFixup() may actually occur via 1569 . . 1343: // mDoFixupAndOSYield(). (See golang.org/issue/44193) 1570 . . 1344: noteclear(&g.m.park) 1571 . . 1345: if !mDoFixup() { 1572 ROUTINE ======================== runtime.madvise in /usr/local/go/src/runtime/sys_darwin.go 1573 120ms 120ms (flat, cum) 5.80% of Total 1574 . . 176:func munmap_trampoline() 1575 . . 177: 1576 . . 178://go:nosplit 1577 . . 179://go:cgo_unsafe_args 1578 . . 180:func madvise(addr unsafe.Pointer, n uintptr, flags int32) { 1579 120ms 120ms 181: libcCall(unsafe.Pointer(funcPC(madvise_trampoline)), unsafe.Pointer(&addr)) 1580 . . 182:} 1581 . . 183:func madvise_trampoline() 1582 . . 184: 1583 . . 185://go:nosplit 1584 . . 186://go:cgo_unsafe_args 1585 ROUTINE ======================== runtime.mallocgc in /usr/local/go/src/runtime/malloc.go 1586 120ms 310ms (flat, cum) 14.98% of Total 1587 . . 900:} 1588 . . 901: 1589 . . 902:// Allocate an object of size bytes. 1590 . . 903:// Small objects are allocated from the per-P cache's free lists. 1591 . . 904:// Large objects (> 32 kB) are allocated straight from the heap. 1592 20ms 20ms 905:func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 1593 . . 906: if gcphase == _GCmarktermination { 1594 . . 907: throw("mallocgc called with gcphase == _GCmarktermination") 1595 . . 908: } 1596 . . 909: 1597 . . 910: if size == 0 { 1598 . . 911: return unsafe.Pointer(&zerobase) 1599 . . 912: } 1600 . . 913: 1601 . . 914: if debug.malloc { 1602 . . 915: if debug.sbrk != 0 { 1603 . . 916: align := uintptr(16) 1604 . . 917: if typ != nil { 1605 . . 918: // TODO(austin): This should be just 1606 . . 919: // align = uintptr(typ.align) 1607 . . 920: // but that's only 4 on 32-bit platforms, 1608 . . 921: // even if there's a uint64 field in typ (see #599). 1609 . . 922: // This causes 64-bit atomic accesses to panic. 1610 . . 923: // Hence, we use stricter alignment that matches 1611 . . 924: // the normal allocator better. 1612 . . 925: if size&7 == 0 { 1613 . . 926: align = 8 1614 . . 927: } else if size&3 == 0 { 1615 . . 928: align = 4 1616 . . 929: } else if size&1 == 0 { 1617 . . 930: align = 2 1618 . . 931: } else { 1619 . . 932: align = 1 1620 . . 933: } 1621 . . 934: } 1622 . . 935: return persistentalloc(size, align, &memstats.other_sys) 1623 . . 936: } 1624 . . 937: 1625 . . 938: if inittrace.active && inittrace.id == getg().goid { 1626 . . 939: // Init functions are executed sequentially in a single Go routine. 1627 . . 940: inittrace.allocs += 1 1628 . . 941: } 1629 . . 942: } 1630 . . 943: 1631 . . 944: // assistG is the G to charge for this allocation, or nil if 1632 . . 945: // GC is not currently active. 1633 . . 946: var assistG *g 1634 10ms 10ms 947: if gcBlackenEnabled != 0 { 1635 . . 948: // Charge the current user G for this allocation. 1636 . . 949: assistG = getg() 1637 . . 950: if assistG.m.curg != nil { 1638 . . 951: assistG = assistG.m.curg 1639 . . 952: } 1640 . . 953: // Charge the allocation against the G. We'll account 1641 . . 954: // for internal fragmentation at the end of mallocgc. 1642 . . 955: assistG.gcAssistBytes -= int64(size) 1643 . . 956: 1644 . . 957: if assistG.gcAssistBytes < 0 { 1645 . . 958: // This G is in debt. Assist the GC to correct 1646 . . 959: // this before allocating. This must happen 1647 . . 960: // before disabling preemption. 1648 . . 961: gcAssistAlloc(assistG) 1649 . . 962: } 1650 . . 963: } 1651 . . 964: 1652 . . 965: // Set mp.mallocing to keep from being preempted by GC. 1653 . . 966: mp := acquirem() 1654 10ms 10ms 967: if mp.mallocing != 0 { 1655 . . 968: throw("malloc deadlock") 1656 . . 969: } 1657 . . 970: if mp.gsignal == getg() { 1658 . . 971: throw("malloc during signal") 1659 . . 972: } 1660 . . 973: mp.mallocing = 1 1661 . . 974: 1662 . . 975: shouldhelpgc := false 1663 . . 976: dataSize := size 1664 . . 977: c := getMCache() 1665 . . 978: if c == nil { 1666 . . 979: throw("mallocgc called without a P or outside bootstrapping") 1667 . . 980: } 1668 . . 981: var span *mspan 1669 . . 982: var x unsafe.Pointer 1670 . . 983: noscan := typ == nil || typ.ptrdata == 0 1671 . . 984: if size <= maxSmallSize { 1672 . . 985: if noscan && size < maxTinySize { 1673 . . 986: // Tiny allocator. 1674 . . 987: // 1675 . . 988: // Tiny allocator combines several tiny allocation requests 1676 . . 989: // into a single memory block. The resulting memory block 1677 . . 990: // is freed when all subobjects are unreachable. The subobjects 1678 . . 991: // must be noscan (don't have pointers), this ensures that 1679 . . 992: // the amount of potentially wasted memory is bounded. 1680 . . 993: // 1681 . . 994: // Size of the memory block used for combining (maxTinySize) is tunable. 1682 . . 995: // Current setting is 16 bytes, which relates to 2x worst case memory 1683 . . 996: // wastage (when all but one subobjects are unreachable). 1684 . . 997: // 8 bytes would result in no wastage at all, but provides less 1685 . . 998: // opportunities for combining. 1686 . . 999: // 32 bytes provides more opportunities for combining, 1687 . . 1000: // but can lead to 4x worst case wastage. 1688 . . 1001: // The best case winning is 8x regardless of block size. 1689 . . 1002: // 1690 . . 1003: // Objects obtained from tiny allocator must not be freed explicitly. 1691 . . 1004: // So when an object will be freed explicitly, we ensure that 1692 . . 1005: // its size >= maxTinySize. 1693 . . 1006: // 1694 . . 1007: // SetFinalizer has a special case for objects potentially coming 1695 . . 1008: // from tiny allocator, it such case it allows to set finalizers 1696 . . 1009: // for an inner byte of a memory block. 1697 . . 1010: // 1698 . . 1011: // The main targets of tiny allocator are small strings and 1699 . . 1012: // standalone escaping variables. On a json benchmark 1700 . . 1013: // the allocator reduces number of allocations by ~12% and 1701 . . 1014: // reduces heap size by ~20%. 1702 . . 1015: off := c.tinyoffset 1703 . . 1016: // Align tiny pointer for required (conservative) alignment. 1704 . . 1017: if size&7 == 0 { 1705 . . 1018: off = alignUp(off, 8) 1706 . . 1019: } else if sys.PtrSize == 4 && size == 12 { 1707 . . 1020: // Conservatively align 12-byte objects to 8 bytes on 32-bit 1708 . . 1021: // systems so that objects whose first field is a 64-bit 1709 . . 1022: // value is aligned to 8 bytes and does not cause a fault on 1710 . . 1023: // atomic access. See issue 37262. 1711 . . 1024: // TODO(mknyszek): Remove this workaround if/when issue 36606 1712 . . 1025: // is resolved. 1713 . . 1026: off = alignUp(off, 8) 1714 . . 1027: } else if size&3 == 0 { 1715 . . 1028: off = alignUp(off, 4) 1716 . . 1029: } else if size&1 == 0 { 1717 . . 1030: off = alignUp(off, 2) 1718 . . 1031: } 1719 . . 1032: if off+size <= maxTinySize && c.tiny != 0 { 1720 . . 1033: // The object fits into existing tiny block. 1721 . . 1034: x = unsafe.Pointer(c.tiny + off) 1722 . . 1035: c.tinyoffset = off + size 1723 . . 1036: c.tinyAllocs++ 1724 . . 1037: mp.mallocing = 0 1725 . . 1038: releasem(mp) 1726 . . 1039: return x 1727 . . 1040: } 1728 . . 1041: // Allocate a new maxTinySize block. 1729 . . 1042: span = c.alloc[tinySpanClass] 1730 . . 1043: v := nextFreeFast(span) 1731 . . 1044: if v == 0 { 1732 . . 1045: v, span, shouldhelpgc = c.nextFree(tinySpanClass) 1733 . . 1046: } 1734 . . 1047: x = unsafe.Pointer(v) 1735 10ms 10ms 1048: (*[2]uint64)(x)[0] = 0 1736 . . 1049: (*[2]uint64)(x)[1] = 0 1737 . . 1050: // See if we need to replace the existing tiny block with the new one 1738 . . 1051: // based on amount of remaining free space. 1739 . . 1052: if size < c.tinyoffset || c.tiny == 0 { 1740 . . 1053: c.tiny = uintptr(x) 1741 . . 1054: c.tinyoffset = size 1742 . . 1055: } 1743 . . 1056: size = maxTinySize 1744 . . 1057: } else { 1745 . . 1058: var sizeclass uint8 1746 . . 1059: if size <= smallSizeMax-8 { 1747 20ms 20ms 1060: sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)] 1748 . . 1061: } else { 1749 . . 1062: sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)] 1750 . . 1063: } 1751 10ms 10ms 1064: size = uintptr(class_to_size[sizeclass]) 1752 . . 1065: spc := makeSpanClass(sizeclass, noscan) 1753 . . 1066: span = c.alloc[spc] 1754 . 30ms 1067: v := nextFreeFast(span) 1755 . . 1068: if v == 0 { 1756 . 80ms 1069: v, span, shouldhelpgc = c.nextFree(spc) 1757 . . 1070: } 1758 . . 1071: x = unsafe.Pointer(v) 1759 . . 1072: if needzero && span.needzero != 0 { 1760 . . 1073: memclrNoHeapPointers(unsafe.Pointer(v), size) 1761 . . 1074: } 1762 . . 1075: } 1763 . . 1076: } else { 1764 . . 1077: shouldhelpgc = true 1765 . . 1078: span = c.allocLarge(size, needzero, noscan) 1766 . . 1079: span.freeindex = 1 1767 . . 1080: span.allocCount = 1 1768 . . 1081: x = unsafe.Pointer(span.base()) 1769 . . 1082: size = span.elemsize 1770 . . 1083: } 1771 . . 1084: 1772 . . 1085: var scanSize uintptr 1773 . . 1086: if !noscan { 1774 . . 1087: // If allocating a defer+arg block, now that we've picked a malloc size 1775 . . 1088: // large enough to hold everything, cut the "asked for" size down to 1776 . . 1089: // just the defer header, so that the GC bitmap will record the arg block 1777 . . 1090: // as containing nothing at all (as if it were unused space at the end of 1778 . . 1091: // a malloc block caused by size rounding). 1779 . . 1092: // The defer arg areas are scanned as part of scanstack. 1780 . . 1093: if typ == deferType { 1781 . . 1094: dataSize = unsafe.Sizeof(_defer{}) 1782 . . 1095: } 1783 10ms 70ms 1096: heapBitsSetType(uintptr(x), size, dataSize, typ) 1784 . . 1097: if dataSize > typ.size { 1785 . . 1098: // Array allocation. If there are any 1786 . . 1099: // pointers, GC has to scan to the last 1787 . . 1100: // element. 1788 . . 1101: if typ.ptrdata != 0 { 1789 . . 1102: scanSize = dataSize - typ.size + typ.ptrdata 1790 . . 1103: } 1791 . . 1104: } else { 1792 . . 1105: scanSize = typ.ptrdata 1793 . . 1106: } 1794 . . 1107: c.scanAlloc += scanSize 1795 . . 1108: } 1796 . . 1109: 1797 . . 1110: // Ensure that the stores above that initialize x to 1798 . . 1111: // type-safe memory and set the heap bits occur before 1799 . . 1112: // the caller can make x observable to the garbage 1800 . . 1113: // collector. Otherwise, on weakly ordered machines, 1801 . . 1114: // the garbage collector could follow a pointer to x, 1802 . . 1115: // but see uninitialized memory or stale heap bits. 1803 . . 1116: publicationBarrier() 1804 . . 1117: 1805 . . 1118: // Allocate black during GC. 1806 . . 1119: // All slots hold nil so no scanning is needed. 1807 . . 1120: // This may be racing with GC so do it atomically if there can be 1808 . . 1121: // a race marking the bit. 1809 10ms 10ms 1122: if gcphase != _GCoff { 1810 . . 1123: gcmarknewobject(span, uintptr(x), size, scanSize) 1811 . . 1124: } 1812 . . 1125: 1813 . . 1126: if raceenabled { 1814 . . 1127: racemalloc(x, size) 1815 . . 1128: } 1816 . . 1129: 1817 . . 1130: if msanenabled { 1818 . . 1131: msanmalloc(x, size) 1819 . . 1132: } 1820 . . 1133: 1821 . . 1134: mp.mallocing = 0 1822 . 20ms 1135: releasem(mp) 1823 . . 1136: 1824 . . 1137: if debug.malloc { 1825 . . 1138: if debug.allocfreetrace != 0 { 1826 . . 1139: tracealloc(x, size, typ) 1827 . . 1140: } 1828 . . 1141: 1829 . . 1142: if inittrace.active && inittrace.id == getg().goid { 1830 . . 1143: // Init functions are executed sequentially in a single Go routine. 1831 . . 1144: inittrace.bytes += uint64(size) 1832 . . 1145: } 1833 . . 1146: } 1834 . . 1147: 1835 . . 1148: if rate := MemProfileRate; rate > 0 { 1836 20ms 20ms 1149: if rate != 1 && size < c.nextSample { 1837 . . 1150: c.nextSample -= size 1838 . . 1151: } else { 1839 . . 1152: mp := acquirem() 1840 . . 1153: profilealloc(mp, x, size) 1841 . . 1154: releasem(mp) 1842 ROUTINE ======================== runtime.mapaccess1_faststr in /usr/local/go/src/runtime/map_faststr.go 1843 20ms 20ms (flat, cum) 0.97% of Total 1844 . . 42: } 1845 . . 43: // long key, try not to do more comparisons than necessary 1846 . . 44: keymaybe := uintptr(bucketCnt) 1847 . . 45: for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { 1848 . . 46: k := (*stringStruct)(kptr) 1849 10ms 10ms 47: if k.len != key.len || isEmpty(b.tophash[i]) { 1850 . . 48: if b.tophash[i] == emptyRest { 1851 . . 49: break 1852 . . 50: } 1853 . . 51: continue 1854 . . 52: } 1855 . . 53: if k.str == key.str { 1856 . . 54: return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)) 1857 . . 55: } 1858 . . 56: // check first 4 bytes 1859 . . 57: if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { 1860 . . 58: continue 1861 . . 59: } 1862 . . 60: // check last 4 bytes 1863 . . 61: if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) { 1864 . . 62: continue 1865 . . 63: } 1866 . . 64: if keymaybe != bucketCnt { 1867 . . 65: // Two keys are potential matches. Use hash to distinguish them. 1868 . . 66: goto dohash 1869 . . 67: } 1870 . . 68: keymaybe = i 1871 . . 69: } 1872 . . 70: if keymaybe != bucketCnt { 1873 . . 71: k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) 1874 10ms 10ms 72: if memequal(k.str, key.str, uintptr(key.len)) { 1875 . . 73: return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize)) 1876 . . 74: } 1877 . . 75: } 1878 . . 76: return unsafe.Pointer(&zeroVal[0]) 1879 . . 77: } 1880 ROUTINE ======================== runtime.mapaccess2_faststr in /usr/local/go/src/runtime/map_faststr.go 1881 120ms 280ms (flat, cum) 13.53% of Total 1882 . . 102: } 1883 . . 103: } 1884 . . 104: return unsafe.Pointer(&zeroVal[0]) 1885 . . 105:} 1886 . . 106: 1887 10ms 10ms 107:func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { 1888 . . 108: if raceenabled && h != nil { 1889 . . 109: callerpc := getcallerpc() 1890 . . 110: racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr)) 1891 . . 111: } 1892 . . 112: if h == nil || h.count == 0 { 1893 . . 113: return unsafe.Pointer(&zeroVal[0]), false 1894 . . 114: } 1895 . . 115: if h.flags&hashWriting != 0 { 1896 . . 116: throw("concurrent map read and map write") 1897 . . 117: } 1898 . . 118: key := stringStructOf(&ky) 1899 10ms 10ms 119: if h.B == 0 { 1900 . . 120: // One-bucket table. 1901 . . 121: b := (*bmap)(h.buckets) 1902 . . 122: if key.len < 32 { 1903 . . 123: // short key, doing lots of comparisons is ok 1904 . . 124: for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { 1905 . . 125: k := (*stringStruct)(kptr) 1906 . . 126: if k.len != key.len || isEmpty(b.tophash[i]) { 1907 . . 127: if b.tophash[i] == emptyRest { 1908 . . 128: break 1909 . . 129: } 1910 . . 130: continue 1911 . . 131: } 1912 . . 132: if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { 1913 . . 133: return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true 1914 . . 134: } 1915 . . 135: } 1916 . . 136: return unsafe.Pointer(&zeroVal[0]), false 1917 . . 137: } 1918 . . 138: // long key, try not to do more comparisons than necessary 1919 . . 139: keymaybe := uintptr(bucketCnt) 1920 . . 140: for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { 1921 . . 141: k := (*stringStruct)(kptr) 1922 . . 142: if k.len != key.len || isEmpty(b.tophash[i]) { 1923 . . 143: if b.tophash[i] == emptyRest { 1924 . . 144: break 1925 . . 145: } 1926 . . 146: continue 1927 . . 147: } 1928 . . 148: if k.str == key.str { 1929 . . 149: return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true 1930 . . 150: } 1931 . . 151: // check first 4 bytes 1932 . . 152: if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { 1933 . . 153: continue 1934 . . 154: } 1935 . . 155: // check last 4 bytes 1936 . . 156: if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) { 1937 . . 157: continue 1938 . . 158: } 1939 . . 159: if keymaybe != bucketCnt { 1940 . . 160: // Two keys are potential matches. Use hash to distinguish them. 1941 . . 161: goto dohash 1942 . . 162: } 1943 . . 163: keymaybe = i 1944 . . 164: } 1945 . . 165: if keymaybe != bucketCnt { 1946 . . 166: k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) 1947 . . 167: if memequal(k.str, key.str, uintptr(key.len)) { 1948 . . 168: return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize)), true 1949 . . 169: } 1950 . . 170: } 1951 . . 171: return unsafe.Pointer(&zeroVal[0]), false 1952 . . 172: } 1953 . . 173:dohash: 1954 10ms 90ms 174: hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) 1955 20ms 40ms 175: m := bucketMask(h.B) 1956 . . 176: b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) 1957 . . 177: if c := h.oldbuckets; c != nil { 1958 . . 178: if !h.sameSizeGrow() { 1959 . . 179: // There used to be half as many buckets; mask down one more power of two. 1960 . . 180: m >>= 1 1961 . . 181: } 1962 . . 182: oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) 1963 . . 183: if !evacuated(oldb) { 1964 . . 184: b = oldb 1965 . . 185: } 1966 . . 186: } 1967 . . 187: top := tophash(hash) 1968 . . 188: for ; b != nil; b = b.overflow(t) { 1969 . . 189: for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { 1970 . . 190: k := (*stringStruct)(kptr) 1971 50ms 50ms 191: if k.len != key.len || b.tophash[i] != top { 1972 . . 192: continue 1973 . . 193: } 1974 10ms 60ms 194: if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { 1975 10ms 20ms 195: return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true 1976 . . 196: } 1977 . . 197: } 1978 . . 198: } 1979 . . 199: return unsafe.Pointer(&zeroVal[0]), false 1980 . . 200:} 1981 ROUTINE ======================== runtime.markroot in /usr/local/go/src/runtime/mgcmark.go 1982 0 10ms (flat, cum) 0.48% of Total 1983 . . 201: gp.waitsince = work.tstart 1984 . . 202: } 1985 . . 203: 1986 . . 204: // scanstack must be done on the system stack in case 1987 . . 205: // we're trying to scan our own stack. 1988 . 10ms 206: systemstack(func() { 1989 . . 207: // If this is a self-scan, put the user G in 1990 . . 208: // _Gwaiting to prevent self-deadlock. It may 1991 . . 209: // already be in _Gwaiting if this is a mark 1992 . . 210: // worker or we're in mark termination. 1993 . . 211: userG := getg().m.curg 1994 ROUTINE ======================== runtime.markroot.func1 in /usr/local/go/src/runtime/mgcmark.go 1995 0 10ms (flat, cum) 0.48% of Total 1996 . . 228: return 1997 . . 229: } 1998 . . 230: if gp.gcscandone { 1999 . . 231: throw("g already scanned") 2000 . . 232: } 2001 . 10ms 233: scanstack(gp, gcw) 2002 . . 234: gp.gcscandone = true 2003 . . 235: resumeG(stopped) 2004 . . 236: 2005 . . 237: if selfScan { 2006 . . 238: casgstatus(userG, _Gwaiting, _Grunning) 2007 ROUTINE ======================== runtime.mcall in /usr/local/go/src/runtime/asm_amd64.s 2008 0 70ms (flat, cum) 3.38% of Total 2009 . . 322: MOVQ SI, g(CX) // g = m->g0 2010 . . 323: MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 2011 . . 324: PUSHQ AX 2012 . . 325: MOVQ DI, DX 2013 . . 326: MOVQ 0(DI), DI 2014 . 70ms 327: CALL DI 2015 . . 328: POPQ AX 2016 . . 329: MOVQ $runtime·badmcall2(SB), AX 2017 . . 330: JMP AX 2018 . . 331: RET 2019 . . 332: 2020 ROUTINE ======================== runtime.memclrNoHeapPointers in /usr/local/go/src/runtime/memclr_amd64.s 2021 50ms 50ms (flat, cum) 2.42% of Total 2022 . . 34: PXOR X0, X0 2023 . . 35: CMPQ BX, $32 2024 . . 36: JBE _17through32 2025 . . 37: CMPQ BX, $64 2026 . . 38: JBE _33through64 2027 10ms 10ms 39: CMPQ BX, $128 2028 . . 40: JBE _65through128 2029 . . 41: CMPQ BX, $256 2030 . . 42: JBE _129through256 2031 . . 43: CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 2032 . . 44: JE loop_preheader_avx2 2033 . . 45: // TODO: for really big clears, use MOVNTDQ, even without AVX2. 2034 . . 46: 2035 . . 47:loop: 2036 . . 48: MOVOU X0, 0(DI) 2037 . . 49: MOVOU X0, 16(DI) 2038 . . 50: MOVOU X0, 32(DI) 2039 . . 51: MOVOU X0, 48(DI) 2040 . . 52: MOVOU X0, 64(DI) 2041 . . 53: MOVOU X0, 80(DI) 2042 . . 54: MOVOU X0, 96(DI) 2043 . . 55: MOVOU X0, 112(DI) 2044 . . 56: MOVOU X0, 128(DI) 2045 . . 57: MOVOU X0, 144(DI) 2046 . . 58: MOVOU X0, 160(DI) 2047 . . 59: MOVOU X0, 176(DI) 2048 . . 60: MOVOU X0, 192(DI) 2049 . . 61: MOVOU X0, 208(DI) 2050 . . 62: MOVOU X0, 224(DI) 2051 . . 63: MOVOU X0, 240(DI) 2052 . . 64: SUBQ $256, BX 2053 . . 65: ADDQ $256, DI 2054 . . 66: CMPQ BX, $256 2055 . . 67: JAE loop 2056 . . 68: JMP tail 2057 . . 69: 2058 . . 70:loop_preheader_avx2: 2059 . . 71: VPXOR Y0, Y0, Y0 2060 . . 72: // For smaller sizes MOVNTDQ may be faster or slower depending on hardware. 2061 . . 73: // For larger sizes it is always faster, even on dual Xeons with 30M cache. 2062 . . 74: // TODO take into account actual LLC size. E. g. glibc uses LLC size/2. 2063 . . 75: CMPQ BX, $0x2000000 2064 . . 76: JAE loop_preheader_avx2_huge 2065 . . 77:loop_avx2: 2066 10ms 10ms 78: VMOVDQU Y0, 0(DI) 2067 20ms 20ms 79: VMOVDQU Y0, 32(DI) 2068 . . 80: VMOVDQU Y0, 64(DI) 2069 10ms 10ms 81: VMOVDQU Y0, 96(DI) 2070 . . 82: SUBQ $128, BX 2071 . . 83: ADDQ $128, DI 2072 . . 84: CMPQ BX, $128 2073 . . 85: JAE loop_avx2 2074 . . 86: VMOVDQU Y0, -32(DI)(BX*1) 2075 ROUTINE ======================== runtime.memequal in /usr/local/go/src/internal/bytealg/equal_amd64.s 2076 10ms 10ms (flat, cum) 0.48% of Total 2077 . . 5:#include "go_asm.h" 2078 . . 6:#include "textflag.h" 2079 . . 7: 2080 . . 8:// memequal(a, b unsafe.Pointer, size uintptr) bool 2081 . . 9:TEXT runtime·memequal(SB),NOSPLIT,$0-25 2082 10ms 10ms 10: MOVQ a+0(FP), SI 2083 . . 11: MOVQ b+8(FP), DI 2084 . . 12: CMPQ SI, DI 2085 . . 13: JEQ eq 2086 . . 14: MOVQ size+16(FP), BX 2087 . . 15: LEAQ ret+24(FP), AX 2088 ROUTINE ======================== runtime.memmove in /usr/local/go/src/runtime/memmove_amd64.s 2089 40ms 40ms (flat, cum) 1.93% of Total 2090 . . 50: // BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing. 2091 . . 51: TESTQ BX, BX 2092 . . 52: JEQ move_0 2093 . . 53: CMPQ BX, $2 2094 . . 54: JBE move_1or2 2095 10ms 10ms 55: CMPQ BX, $4 2096 . . 56: JB move_3 2097 . . 57: JBE move_4 2098 . . 58: CMPQ BX, $8 2099 . . 59: JB move_5through7 2100 . . 60: JE move_8 2101 . . 61: CMPQ BX, $16 2102 . . 62: JBE move_9through16 2103 . . 63: CMPQ BX, $32 2104 . . 64: JBE move_17through32 2105 . . 65: CMPQ BX, $64 2106 . . 66: JBE move_33through64 2107 . . 67: CMPQ BX, $128 2108 . . 68: JBE move_65through128 2109 . . 69: CMPQ BX, $256 2110 . . 70: JBE move_129through256 2111 . . 71: 2112 . . 72: TESTB $1, runtime·useAVXmemmove(SB) 2113 . . 73: JNZ avxUnaligned 2114 . . 74: 2115 . . 75:/* 2116 . . 76: * check and set for backwards 2117 . . 77: */ 2118 . . 78: CMPQ SI, DI 2119 . . 79: JLS back 2120 . . 80: 2121 . . 81:/* 2122 . . 82: * forward copy loop 2123 . . 83: */ 2124 . . 84:forward: 2125 . . 85: CMPQ BX, $2048 2126 . . 86: JLS move_256through2048 2127 . . 87: 2128 . . 88: // If REP MOVSB isn't fast, don't use it 2129 . . 89: CMPB internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB 2130 . . 90: JNE fwdBy8 2131 . . 91: 2132 . . 92: // Check alignment 2133 . . 93: MOVL SI, AX 2134 . . 94: ORL DI, AX 2135 . . 95: TESTL $7, AX 2136 . . 96: JEQ fwdBy8 2137 . . 97: 2138 . . 98: // Do 1 byte at a time 2139 . . 99: MOVQ BX, CX 2140 . . 100: REP; MOVSB 2141 . . 101: RET 2142 . . 102: 2143 . . 103:fwdBy8: 2144 . . 104: // Do 8 bytes at a time 2145 . . 105: MOVQ BX, CX 2146 . . 106: SHRQ $3, CX 2147 . . 107: ANDQ $7, BX 2148 . . 108: REP; MOVSQ 2149 . . 109: JMP tail 2150 . . 110: 2151 . . 111:back: 2152 . . 112:/* 2153 . . 113: * check overlap 2154 . . 114: */ 2155 . . 115: MOVQ SI, CX 2156 . . 116: ADDQ BX, CX 2157 . . 117: CMPQ CX, DI 2158 . . 118: JLS forward 2159 . . 119:/* 2160 . . 120: * whole thing backwards has 2161 . . 121: * adjusted addresses 2162 . . 122: */ 2163 . . 123: ADDQ BX, DI 2164 . . 124: ADDQ BX, SI 2165 . . 125: STD 2166 . . 126: 2167 . . 127:/* 2168 . . 128: * copy 2169 . . 129: */ 2170 . . 130: MOVQ BX, CX 2171 . . 131: SHRQ $3, CX 2172 . . 132: ANDQ $7, BX 2173 . . 133: 2174 . . 134: SUBQ $8, DI 2175 . . 135: SUBQ $8, SI 2176 . . 136: REP; MOVSQ 2177 . . 137: 2178 . . 138: CLD 2179 . . 139: ADDQ $8, DI 2180 . . 140: ADDQ $8, SI 2181 . . 141: SUBQ BX, DI 2182 . . 142: SUBQ BX, SI 2183 . . 143: JMP tail 2184 . . 144: 2185 . . 145:move_1or2: 2186 . . 146: MOVB (SI), AX 2187 . . 147: MOVB -1(SI)(BX*1), CX 2188 10ms 10ms 148: MOVB AX, (DI) 2189 . . 149: MOVB CX, -1(DI)(BX*1) 2190 . . 150: RET 2191 . . 151:move_0: 2192 . . 152: RET 2193 . . 153:move_4: 2194 . . 154: MOVL (SI), AX 2195 . . 155: MOVL AX, (DI) 2196 . . 156: RET 2197 . . 157:move_3: 2198 . . 158: MOVW (SI), AX 2199 . . 159: MOVB 2(SI), CX 2200 . . 160: MOVW AX, (DI) 2201 . . 161: MOVB CX, 2(DI) 2202 . . 162: RET 2203 . . 163:move_5through7: 2204 . . 164: MOVL (SI), AX 2205 . . 165: MOVL -4(SI)(BX*1), CX 2206 . . 166: MOVL AX, (DI) 2207 . . 167: MOVL CX, -4(DI)(BX*1) 2208 . . 168: RET 2209 . . 169:move_8: 2210 . . 170: // We need a separate case for 8 to make sure we write pointers atomically. 2211 . . 171: MOVQ (SI), AX 2212 . . 172: MOVQ AX, (DI) 2213 . . 173: RET 2214 . . 174:move_9through16: 2215 . . 175: MOVQ (SI), AX 2216 . . 176: MOVQ -8(SI)(BX*1), CX 2217 . . 177: MOVQ AX, (DI) 2218 . . 178: MOVQ CX, -8(DI)(BX*1) 2219 . . 179: RET 2220 . . 180:move_17through32: 2221 . . 181: MOVOU (SI), X0 2222 . . 182: MOVOU -16(SI)(BX*1), X1 2223 . . 183: MOVOU X0, (DI) 2224 . . 184: MOVOU X1, -16(DI)(BX*1) 2225 . . 185: RET 2226 . . 186:move_33through64: 2227 . . 187: MOVOU (SI), X0 2228 . . 188: MOVOU 16(SI), X1 2229 . . 189: MOVOU -32(SI)(BX*1), X2 2230 10ms 10ms 190: MOVOU -16(SI)(BX*1), X3 2231 . . 191: MOVOU X0, (DI) 2232 . . 192: MOVOU X1, 16(DI) 2233 . . 193: MOVOU X2, -32(DI)(BX*1) 2234 . . 194: MOVOU X3, -16(DI)(BX*1) 2235 . . 195: RET 2236 . . 196:move_65through128: 2237 . . 197: MOVOU (SI), X0 2238 . . 198: MOVOU 16(SI), X1 2239 . . 199: MOVOU 32(SI), X2 2240 . . 200: MOVOU 48(SI), X3 2241 . . 201: MOVOU -64(SI)(BX*1), X4 2242 . . 202: MOVOU -48(SI)(BX*1), X5 2243 . . 203: MOVOU -32(SI)(BX*1), X6 2244 . . 204: MOVOU -16(SI)(BX*1), X7 2245 . . 205: MOVOU X0, (DI) 2246 . . 206: MOVOU X1, 16(DI) 2247 . . 207: MOVOU X2, 32(DI) 2248 . . 208: MOVOU X3, 48(DI) 2249 . . 209: MOVOU X4, -64(DI)(BX*1) 2250 . . 210: MOVOU X5, -48(DI)(BX*1) 2251 . . 211: MOVOU X6, -32(DI)(BX*1) 2252 . . 212: MOVOU X7, -16(DI)(BX*1) 2253 . . 213: RET 2254 . . 214:move_129through256: 2255 10ms 10ms 215: MOVOU (SI), X0 2256 . . 216: MOVOU 16(SI), X1 2257 . . 217: MOVOU 32(SI), X2 2258 . . 218: MOVOU 48(SI), X3 2259 . . 219: MOVOU 64(SI), X4 2260 . . 220: MOVOU 80(SI), X5 2261 ROUTINE ======================== runtime.mstart in /usr/local/go/src/runtime/proc.go 2262 0 230ms (flat, cum) 11.11% of Total 2263 . . 1241:// May run during STW (because it doesn't have a P yet), so write 2264 . . 1242:// barriers are not allowed. 2265 . . 1243:// 2266 . . 1244://go:nosplit 2267 . . 1245://go:nowritebarrierrec 2268 . 230ms 1246:func mstart() { 2269 . . 1247: _g_ := getg() 2270 . . 1248: 2271 . . 1249: osStack := _g_.stack.lo == 0 2272 . . 1250: if osStack { 2273 . . 1251: // Initialize stack bounds from system stack. 2274 ROUTINE ======================== runtime.nanotime in /usr/local/go/src/runtime/time_nofake.go 2275 0 10ms (flat, cum) 0.48% of Total 2276 . . 14:// Zero means not to use faketime. 2277 . . 15:var faketime int64 2278 . . 16: 2279 . . 17://go:nosplit 2280 . . 18:func nanotime() int64 { 2281 . 10ms 19: return nanotime1() 2282 . . 20:} 2283 . . 21: 2284 . . 22:func walltime() (sec int64, nsec int32) { 2285 . . 23: return walltime1() 2286 . . 24:} 2287 ROUTINE ======================== runtime.nanotime1 in /usr/local/go/src/runtime/sys_darwin.go 2288 10ms 10ms (flat, cum) 0.48% of Total 2289 . . 242:func open_trampoline() 2290 . . 243: 2291 . . 244://go:nosplit 2292 . . 245://go:cgo_unsafe_args 2293 . . 246:func nanotime1() int64 { 2294 10ms 10ms 247: var r struct { 2295 . . 248: t int64 // raw timer 2296 . . 249: numer, denom uint32 // conversion factors. nanoseconds = t * numer / denom. 2297 . . 250: } 2298 . . 251: libcCall(unsafe.Pointer(funcPC(nanotime_trampoline)), unsafe.Pointer(&r)) 2299 . . 252: // Note: Apple seems unconcerned about overflow here. See 2300 ROUTINE ======================== runtime.netpoll in /usr/local/go/src/runtime/netpoll_kqueue.go 2301 0 90ms (flat, cum) 4.35% of Total 2302 . . 122: } 2303 . . 123: tp = &ts 2304 . . 124: } 2305 . . 125: var events [64]keventt 2306 . . 126:retry: 2307 . 90ms 127: n := kevent(kq, nil, 0, &events[0], int32(len(events)), tp) 2308 . . 128: if n < 0 { 2309 . . 129: if n != -_EINTR { 2310 . . 130: println("runtime: kevent on fd", kq, "failed with", -n) 2311 . . 131: throw("runtime: netpoll failed") 2312 . . 132: } 2313 ROUTINE ======================== runtime.newobject in /usr/local/go/src/runtime/malloc.go 2314 10ms 290ms (flat, cum) 14.01% of Total 2315 . . 1172: 2316 . . 1173:// implementation of new builtin 2317 . . 1174:// compiler (both frontend and SSA backend) knows the signature 2318 . . 1175:// of this function 2319 . . 1176:func newobject(typ *_type) unsafe.Pointer { 2320 10ms 290ms 1177: return mallocgc(typ.size, typ, true) 2321 . . 1178:} 2322 . . 1179: 2323 . . 1180://go:linkname reflect_unsafe_New reflect.unsafe_New 2324 . . 1181:func reflect_unsafe_New(typ *_type) unsafe.Pointer { 2325 . . 1182: return mallocgc(typ.size, typ, true) 2326 ROUTINE ======================== runtime.nextFreeFast in /usr/local/go/src/runtime/malloc.go 2327 30ms 30ms (flat, cum) 1.45% of Total 2328 . . 841:var zerobase uintptr 2329 . . 842: 2330 . . 843:// nextFreeFast returns the next free object if one is quickly available. 2331 . . 844:// Otherwise it returns 0. 2332 . . 845:func nextFreeFast(s *mspan) gclinkptr { 2333 30ms 30ms 846: theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache? 2334 . . 847: if theBit < 64 { 2335 . . 848: result := s.freeindex + uintptr(theBit) 2336 . . 849: if result < s.nelems { 2337 . . 850: freeidx := result + 1 2338 . . 851: if freeidx%64 == 0 && freeidx != s.nelems { 2339 ROUTINE ======================== runtime.notesleep in /usr/local/go/src/runtime/lock_sema.go 2340 0 40ms (flat, cum) 1.93% of Total 2341 . . 176: return 2342 . . 177: } 2343 . . 178: // Queued. Sleep. 2344 . . 179: gp.m.blocked = true 2345 . . 180: if *cgo_yield == nil { 2346 . 40ms 181: semasleep(-1) 2347 . . 182: } else { 2348 . . 183: // Sleep for an arbitrary-but-moderate interval to poll libc interceptors. 2349 . . 184: const ns = 10e6 2350 . . 185: for atomic.Loaduintptr(&n.key) == 0 { 2351 . . 186: semasleep(ns) 2352 ROUTINE ======================== runtime.park_m in /usr/local/go/src/runtime/proc.go 2353 0 70ms (flat, cum) 3.38% of Total 2354 . . 3302: 2355 . . 3303: casgstatus(gp, _Grunning, _Gwaiting) 2356 . . 3304: dropg() 2357 . . 3305: 2358 . . 3306: if fn := _g_.m.waitunlockf; fn != nil { 2359 . 10ms 3307: ok := fn(gp, _g_.m.waitlock) 2360 . . 3308: _g_.m.waitunlockf = nil 2361 . . 3309: _g_.m.waitlock = nil 2362 . . 3310: if !ok { 2363 . . 3311: if trace.enabled { 2364 . . 3312: traceGoUnpark(gp, 2) 2365 . . 3313: } 2366 . . 3314: casgstatus(gp, _Gwaiting, _Grunnable) 2367 . . 3315: execute(gp, true) // Schedule it back, never returns. 2368 . . 3316: } 2369 . . 3317: } 2370 . 60ms 3318: schedule() 2371 . . 3319:} 2372 . . 3320: 2373 . . 3321:func goschedImpl(gp *g) { 2374 . . 3322: status := readgstatus(gp) 2375 . . 3323: if status&^_Gscan != _Grunning { 2376 ROUTINE ======================== runtime.preemptM in /usr/local/go/src/runtime/signal_unix.go 2377 0 30ms (flat, cum) 1.45% of Total 2378 . . 364: // If multiple threads are preempting the same M, it may send many 2379 . . 365: // signals to the same M such that it hardly make progress, causing 2380 . . 366: // live-lock problem. Apparently this could happen on darwin. See 2381 . . 367: // issue #37741. 2382 . . 368: // Only send a signal if there isn't already one pending. 2383 . 30ms 369: signalM(mp, sigPreempt) 2384 . . 370: } 2385 . . 371: 2386 . . 372: if GOOS == "darwin" || GOOS == "ios" { 2387 . . 373: execLock.runlock() 2388 . . 374: } 2389 ROUTINE ======================== runtime.preemptone in /usr/local/go/src/runtime/proc.go 2390 0 30ms (flat, cum) 1.45% of Total 2391 . . 5420: gp.stackguard0 = stackPreempt 2392 . . 5421: 2393 . . 5422: // Request an async preemption of this P. 2394 . . 5423: if preemptMSupported && debug.asyncpreemptoff == 0 { 2395 . . 5424: _p_.preempt = true 2396 . 30ms 5425: preemptM(mp) 2397 . . 5426: } 2398 . . 5427: 2399 . . 5428: return true 2400 . . 5429:} 2401 . . 5430: 2402 ROUTINE ======================== runtime.pthread_cond_wait in /usr/local/go/src/runtime/sys_darwin.go 2403 40ms 40ms (flat, cum) 1.93% of Total 2404 . . 379:func pthread_cond_init_trampoline() 2405 . . 380: 2406 . . 381://go:nosplit 2407 . . 382://go:cgo_unsafe_args 2408 . . 383:func pthread_cond_wait(c *pthreadcond, m *pthreadmutex) int32 { 2409 40ms 40ms 384: return libcCall(unsafe.Pointer(funcPC(pthread_cond_wait_trampoline)), unsafe.Pointer(&c)) 2410 . . 385:} 2411 . . 386:func pthread_cond_wait_trampoline() 2412 . . 387: 2413 . . 388://go:nosplit 2414 . . 389://go:cgo_unsafe_args 2415 ROUTINE ======================== runtime.pthread_kill in /usr/local/go/src/runtime/sys_darwin.go 2416 30ms 30ms (flat, cum) 1.45% of Total 2417 . . 143:func pthread_self_trampoline() 2418 . . 144: 2419 . . 145://go:nosplit 2420 . . 146://go:cgo_unsafe_args 2421 . . 147:func pthread_kill(t pthread, sig uint32) { 2422 30ms 30ms 148: libcCall(unsafe.Pointer(funcPC(pthread_kill_trampoline)), unsafe.Pointer(&t)) 2423 . . 149: return 2424 . . 150:} 2425 . . 151:func pthread_kill_trampoline() 2426 . . 152: 2427 . . 153:// mmap is used to do low-level memory allocation via mmap. Don't allow stack 2428 ROUTINE ======================== runtime.rawstring in /usr/local/go/src/runtime/string.go 2429 0 30ms (flat, cum) 1.45% of Total 2430 . . 258:// rawstring allocates storage for a new string. The returned 2431 . . 259:// string and byte slice both refer to the same storage. 2432 . . 260:// The storage is not zeroed. Callers should use 2433 . . 261:// b to set the string contents and then drop b. 2434 . . 262:func rawstring(size int) (s string, b []byte) { 2435 . 30ms 263: p := mallocgc(uintptr(size), nil, false) 2436 . . 264: 2437 . . 265: stringStructOf(&s).str = p 2438 . . 266: stringStructOf(&s).len = size 2439 . . 267: 2440 . . 268: *(*slice)(unsafe.Pointer(&b)) = slice{p, size, size} 2441 ROUTINE ======================== runtime.rawstringtmp in /usr/local/go/src/runtime/string.go 2442 10ms 40ms (flat, cum) 1.93% of Total 2443 . . 122: stk := getg().stack 2444 . . 123: return stk.lo <= ptr && ptr < stk.hi 2445 . . 124:} 2446 . . 125: 2447 . . 126:func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) { 2448 10ms 10ms 127: if buf != nil && l <= len(buf) { 2449 . . 128: b = buf[:l] 2450 . . 129: s = slicebytetostringtmp(&b[0], len(b)) 2451 . . 130: } else { 2452 . 30ms 131: s, b = rawstring(l) 2453 . . 132: } 2454 . . 133: return 2455 . . 134:} 2456 . . 135: 2457 . . 136:// slicebytetostringtmp returns a "string" referring to the actual []byte bytes. 2458 ROUTINE ======================== runtime.releasem in /usr/local/go/src/runtime/runtime1.go 2459 20ms 20ms (flat, cum) 0.97% of Total 2460 . . 471:} 2461 . . 472: 2462 . . 473://go:nosplit 2463 . . 474:func releasem(mp *m) { 2464 . . 475: _g_ := getg() 2465 10ms 10ms 476: mp.locks-- 2466 10ms 10ms 477: if mp.locks == 0 && _g_.preempt { 2467 . . 478: // restore the preemption request in case we've cleared it in newstack 2468 . . 479: _g_.stackguard0 = stackPreempt 2469 . . 480: } 2470 . . 481:} 2471 . . 482: 2472 ROUTINE ======================== runtime.scanblock in /usr/local/go/src/runtime/mgcmark.go 2473 10ms 10ms (flat, cum) 0.48% of Total 2474 . . 1176: bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) 2475 . . 1177: if bits == 0 { 2476 . . 1178: i += sys.PtrSize * 8 2477 . . 1179: continue 2478 . . 1180: } 2479 10ms 10ms 1181: for j := 0; j < 8 && i < n; j++ { 2480 . . 1182: if bits&1 != 0 { 2481 . . 1183: // Same work as in scanobject; see comments there. 2482 . . 1184: p := *(*uintptr)(unsafe.Pointer(b + i)) 2483 . . 1185: if p != 0 { 2484 . . 1186: if obj, span, objIndex := findObject(p, b, i); obj != 0 { 2485 ROUTINE ======================== runtime.scanframeworker in /usr/local/go/src/runtime/mgcmark.go 2486 0 10ms (flat, cum) 0.48% of Total 2487 . . 913: locals, args, objs := getStackMap(frame, &state.cache, false) 2488 . . 914: 2489 . . 915: // Scan local variables if stack frame has been allocated. 2490 . . 916: if locals.n > 0 { 2491 . . 917: size := uintptr(locals.n) * sys.PtrSize 2492 . 10ms 918: scanblock(frame.varp-size, size, locals.bytedata, gcw, state) 2493 . . 919: } 2494 . . 920: 2495 . . 921: // Scan arguments. 2496 . . 922: if args.n > 0 { 2497 . . 923: scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state) 2498 ROUTINE ======================== runtime.scanobject in /usr/local/go/src/runtime/mgcmark.go 2499 10ms 10ms (flat, cum) 0.48% of Total 2500 . . 1258: // Avoid needless hbits.next() on last iteration. 2501 . . 1259: hbits = hbits.next() 2502 . . 1260: } 2503 . . 1261: // Load bits once. See CL 22712 and issue 16973 for discussion. 2504 . . 1262: bits := hbits.bits() 2505 10ms 10ms 1263: if bits&bitScan == 0 { 2506 . . 1264: break // no more pointers in this object 2507 . . 1265: } 2508 . . 1266: if bits&bitPointer == 0 { 2509 . . 1267: continue // not a pointer 2510 . . 1268: } 2511 ROUTINE ======================== runtime.scanstack in /usr/local/go/src/runtime/mgcmark.go 2512 0 10ms (flat, cum) 0.48% of Total 2513 . . 744: // Scan the stack. Accumulate a list of stack objects. 2514 . . 745: scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { 2515 . . 746: scanframeworker(frame, &state, gcw) 2516 . . 747: return true 2517 . . 748: } 2518 . 10ms 749: gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) 2519 . . 750: 2520 . . 751: // Find additional pointers that point into the stack from the heap. 2521 . . 752: // Currently this includes defers and panics. See also function copystack. 2522 . . 753: 2523 . . 754: // Find and trace all defer arguments. 2524 ROUTINE ======================== runtime.scanstack.func1 in /usr/local/go/src/runtime/mgcmark.go 2525 0 10ms (flat, cum) 0.48% of Total 2526 . . 741: scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state) 2527 . . 742: } 2528 . . 743: 2529 . . 744: // Scan the stack. Accumulate a list of stack objects. 2530 . . 745: scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { 2531 . 10ms 746: scanframeworker(frame, &state, gcw) 2532 . . 747: return true 2533 . . 748: } 2534 . . 749: gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) 2535 . . 750: 2536 . . 751: // Find additional pointers that point into the stack from the heap. 2537 ROUTINE ======================== runtime.schedule in /usr/local/go/src/runtime/proc.go 2538 0 60ms (flat, cum) 2.90% of Total 2539 . . 3164: gp, inheritTime = runqget(_g_.m.p.ptr()) 2540 . . 3165: // We can see gp != nil here even if the M is spinning, 2541 . . 3166: // if checkTimers added a local goroutine via goready. 2542 . . 3167: } 2543 . . 3168: if gp == nil { 2544 . 60ms 3169: gp, inheritTime = findrunnable() // blocks until work is available 2545 . . 3170: } 2546 . . 3171: 2547 . . 3172: // This thread is going to run a goroutine and is not spinning anymore, 2548 . . 3173: // so if it was marked as spinning we need to reset it now and potentially 2549 . . 3174: // start a new spinning M. 2550 ROUTINE ======================== runtime.semasleep in /usr/local/go/src/runtime/os_darwin.go 2551 0 40ms (flat, cum) 1.93% of Total 2552 . . 58: if err == _ETIMEDOUT { 2553 . . 59: pthread_mutex_unlock(&mp.mutex) 2554 . . 60: return -1 2555 . . 61: } 2556 . . 62: } else { 2557 . 40ms 63: pthread_cond_wait(&mp.cond, &mp.mutex) 2558 . . 64: } 2559 . . 65: } 2560 . . 66:} 2561 . . 67: 2562 . . 68://go:nosplit 2563 ROUTINE ======================== runtime.signalM in /usr/local/go/src/runtime/os_darwin.go 2564 0 30ms (flat, cum) 1.45% of Total 2565 . . 429: executablePath = executablePath[len(prefix):] 2566 . . 430: } 2567 . . 431:} 2568 . . 432: 2569 . . 433:func signalM(mp *m, sig int) { 2570 . 30ms 434: pthread_kill(pthread(mp.procid), uint32(sig)) 2571 . . 435:} 2572 ROUTINE ======================== runtime.startTheWorld.func1 in /usr/local/go/src/runtime/proc.go 2573 0 10ms (flat, cum) 0.48% of Total 2574 . . 998: }) 2575 . . 999:} 2576 . . 1000: 2577 . . 1001:// startTheWorld undoes the effects of stopTheWorld. 2578 . . 1002:func startTheWorld() { 2579 . 10ms 1003: systemstack(func() { startTheWorldWithSema(false) }) 2580 . . 1004: 2581 . . 1005: // worldsema must be held over startTheWorldWithSema to ensure 2582 . . 1006: // gomaxprocs cannot change while worldsema is held. 2583 . . 1007: // 2584 . . 1008: // Release worldsema with direct handoff to the next waiter, but 2585 ROUTINE ======================== runtime.startTheWorldWithSema in /usr/local/go/src/runtime/proc.go 2586 0 80ms (flat, cum) 3.86% of Total 2587 . . 1151:func startTheWorldWithSema(emitTraceEvent bool) int64 { 2588 . . 1152: assertWorldStopped() 2589 . . 1153: 2590 . . 1154: mp := acquirem() // disable preemption because it can be holding p in a local var 2591 . . 1155: if netpollinited() { 2592 . 80ms 1156: list := netpoll(0) // non-blocking 2593 . . 1157: injectglist(&list) 2594 . . 1158: } 2595 . . 1159: lock(&sched.lock) 2596 . . 1160: 2597 . . 1161: procs := gomaxprocs 2598 ROUTINE ======================== runtime.stopm in /usr/local/go/src/runtime/proc.go 2599 0 40ms (flat, cum) 1.93% of Total 2600 . . 2296: } 2601 . . 2297: 2602 . . 2298: lock(&sched.lock) 2603 . . 2299: mput(_g_.m) 2604 . . 2300: unlock(&sched.lock) 2605 . 40ms 2301: mPark() 2606 . . 2302: acquirep(_g_.m.nextp.ptr()) 2607 . . 2303: _g_.m.nextp = 0 2608 . . 2304:} 2609 . . 2305: 2610 . . 2306:func mspinning() { 2611 ROUTINE ======================== runtime.strhash in /usr/local/go/src/runtime/asm_amd64.s 2612 10ms 10ms (flat, cum) 0.48% of Total 2613 . . 895:// func strhash(p unsafe.Pointer, h uintptr) uintptr 2614 . . 896:TEXT runtime·strhash(SB),NOSPLIT,$0-24 2615 . . 897: CMPB runtime·useAeshash(SB), $0 2616 . . 898: JEQ noaes 2617 . . 899: MOVQ p+0(FP), AX // ptr to string struct 2618 10ms 10ms 900: MOVQ 8(AX), CX // length of string 2619 . . 901: MOVQ (AX), AX // string data 2620 . . 902: LEAQ ret+16(FP), DX 2621 . . 903: JMP aeshashbody<>(SB) 2622 . . 904:noaes: 2623 . . 905: JMP runtime·strhashFallback(SB) 2624 ROUTINE ======================== runtime.sysUsed in /usr/local/go/src/runtime/mem_darwin.go 2625 0 120ms (flat, cum) 5.80% of Total 2626 . . 28: 2627 . . 29:func sysUsed(v unsafe.Pointer, n uintptr) { 2628 . . 30: // MADV_FREE_REUSE is necessary to keep the kernel's accounting 2629 . . 31: // accurate. If called on any memory region that hasn't been 2630 . . 32: // MADV_FREE_REUSABLE'd, it's a no-op. 2631 . 120ms 33: madvise(v, n, _MADV_FREE_REUSE) 2632 . . 34:} 2633 . . 35: 2634 . . 36:func sysHugePage(v unsafe.Pointer, n uintptr) { 2635 . . 37:} 2636 . . 38: 2637 ROUTINE ======================== runtime.systemstack in /usr/local/go/src/runtime/asm_amd64.s 2638 0 300ms (flat, cum) 14.49% of Total 2639 . . 374: MOVQ BX, SP 2640 . . 375: 2641 . . 376: // call target function 2642 . . 377: MOVQ DI, DX 2643 . . 378: MOVQ 0(DI), DI 2644 . 300ms 379: CALL DI 2645 . . 380: 2646 . . 381: // switch back to g 2647 . . 382: get_tls(CX) 2648 . . 383: MOVQ g(CX), AX 2649 . . 384: MOVQ g_m(AX), BX 2650 ROUTINE ======================== strconv.ParseFloat in /usr/local/go/src/strconv/atof.go 2651 0 90ms (flat, cum) 4.35% of Total 2652 . . 686:// ParseFloat returns f = ±Inf, err.Err = ErrRange. 2653 . . 687:// 2654 . . 688:// ParseFloat recognizes the strings "NaN", and the (possibly signed) strings "Inf" and "Infinity" 2655 . . 689:// as their respective special floating point values. It ignores case when matching. 2656 . . 690:func ParseFloat(s string, bitSize int) (float64, error) { 2657 . 90ms 691: f, n, err := parseFloatPrefix(s, bitSize) 2658 . . 692: if err == nil && n != len(s) { 2659 . . 693: return 0, syntaxError(fnParseFloat, s) 2660 . . 694: } 2661 . . 695: return f, err 2662 . . 696:} 2663 ROUTINE ======================== strconv.atof64 in /usr/local/go/src/strconv/atof.go 2664 10ms 80ms (flat, cum) 3.86% of Total 2665 . . 615:func atof64(s string) (f float64, n int, err error) { 2666 . . 616: if val, n, ok := special(s); ok { 2667 . . 617: return val, n, nil 2668 . . 618: } 2669 . . 619: 2670 10ms 60ms 620: mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) 2671 . . 621: if !ok { 2672 . . 622: return 0, n, syntaxError(fnParseFloat, s) 2673 . . 623: } 2674 . . 624: 2675 . . 625: if hex { 2676 . . 626: f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc) 2677 . . 627: return f, n, err 2678 . . 628: } 2679 . . 629: 2680 . . 630: if optimize { 2681 . . 631: // Try pure floating-point arithmetic conversion, and if that fails, 2682 . . 632: // the Eisel-Lemire algorithm. 2683 . . 633: if !trunc { 2684 . 20ms 634: if f, ok := atof64exact(mantissa, exp, neg); ok { 2685 . . 635: return f, n, nil 2686 . . 636: } 2687 . . 637: } 2688 . . 638: f, ok := eiselLemire64(mantissa, exp, neg) 2689 . . 639: if ok { 2690 ROUTINE ======================== strconv.atof64exact in /usr/local/go/src/strconv/atof.go 2691 20ms 20ms (flat, cum) 0.97% of Total 2692 . . 422:// Three common cases: 2693 . . 423:// value is exact integer 2694 . . 424:// value is exact integer * exact power of ten 2695 . . 425:// value is exact integer / exact power of ten 2696 . . 426:// These all produce potentially inexact but correctly rounded answers. 2697 10ms 10ms 427:func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) { 2698 . . 428: if mantissa>>float64info.mantbits != 0 { 2699 . . 429: return 2700 . . 430: } 2701 . . 431: f = float64(mantissa) 2702 . . 432: if neg { 2703 . . 433: f = -f 2704 . . 434: } 2705 . . 435: switch { 2706 10ms 10ms 436: case exp == 0: 2707 . . 437: // an integer. 2708 . . 438: return f, true 2709 . . 439: // Exact integers are <= 10^15. 2710 . . 440: // Exact powers of ten are <= 10^22. 2711 . . 441: case exp > 0 && exp <= 15+22: // int * 10^k 2712 ROUTINE ======================== strconv.parseFloatPrefix in /usr/local/go/src/strconv/atof.go 2713 10ms 90ms (flat, cum) 4.35% of Total 2714 . . 693: return 0, syntaxError(fnParseFloat, s) 2715 . . 694: } 2716 . . 695: return f, err 2717 . . 696:} 2718 . . 697: 2719 10ms 10ms 698:func parseFloatPrefix(s string, bitSize int) (float64, int, error) { 2720 . . 699: if bitSize == 32 { 2721 . . 700: f, n, err := atof32(s) 2722 . . 701: return float64(f), n, err 2723 . . 702: } 2724 . 80ms 703: return atof64(s) 2725 . . 704:} 2726 ROUTINE ======================== strconv.readFloat in /usr/local/go/src/strconv/atof.go 2727 50ms 50ms (flat, cum) 2.42% of Total 2728 . . 202: sawdigits := false 2729 . . 203: nd := 0 2730 . . 204: ndMant := 0 2731 . . 205: dp := 0 2732 . . 206:loop: 2733 10ms 10ms 207: for ; i < len(s); i++ { 2734 . . 208: switch c := s[i]; true { 2735 . . 209: case c == '_': 2736 . . 210: underscores = true 2737 . . 211: continue 2738 . . 212: 2739 10ms 10ms 213: case c == '.': 2740 . . 214: if sawdot { 2741 . . 215: break loop 2742 . . 216: } 2743 . . 217: sawdot = true 2744 . . 218: dp = nd 2745 . . 219: continue 2746 . . 220: 2747 . . 221: case '0' <= c && c <= '9': 2748 . . 222: sawdigits = true 2749 . . 223: if c == '0' && nd == 0 { // ignore leading zeros 2750 . . 224: dp-- 2751 . . 225: continue 2752 . . 226: } 2753 . . 227: nd++ 2754 . . 228: if ndMant < maxMantDigits { 2755 . . 229: mantissa *= base 2756 . . 230: mantissa += uint64(c - '0') 2757 . . 231: ndMant++ 2758 . . 232: } else if c != '0' { 2759 . . 233: trunc = true 2760 . . 234: } 2761 . . 235: continue 2762 . . 236: 2763 . . 237: case base == 16 && 'a' <= lower(c) && lower(c) <= 'f': 2764 . . 238: sawdigits = true 2765 . . 239: nd++ 2766 . . 240: if ndMant < maxMantDigits { 2767 . . 241: mantissa *= 16 2768 . . 242: mantissa += uint64(lower(c) - 'a' + 10) 2769 . . 243: ndMant++ 2770 . . 244: } else { 2771 . . 245: trunc = true 2772 . . 246: } 2773 . . 247: continue 2774 . . 248: } 2775 . . 249: break 2776 . . 250: } 2777 . . 251: if !sawdigits { 2778 . . 252: return 2779 . . 253: } 2780 . . 254: if !sawdot { 2781 . . 255: dp = nd 2782 . . 256: } 2783 . . 257: 2784 . . 258: if base == 16 { 2785 . . 259: dp *= 4 2786 . . 260: ndMant *= 4 2787 . . 261: } 2788 . . 262: 2789 . . 263: // optional exponent moves decimal point. 2790 . . 264: // if we read a very large, very long number, 2791 . . 265: // just be sure to move the decimal point by 2792 . . 266: // a lot (say, 100000). it doesn't matter if it's 2793 . . 267: // not the exact number. 2794 10ms 10ms 268: if i < len(s) && lower(s[i]) == expChar { 2795 . . 269: i++ 2796 . . 270: if i >= len(s) { 2797 . . 271: return 2798 . . 272: } 2799 . . 273: esign := 1 2800 . . 274: if s[i] == '+' { 2801 . . 275: i++ 2802 . . 276: } else if s[i] == '-' { 2803 . . 277: i++ 2804 . . 278: esign = -1 2805 . . 279: } 2806 . . 280: if i >= len(s) || s[i] < '0' || s[i] > '9' { 2807 . . 281: return 2808 . . 282: } 2809 . . 283: e := 0 2810 . . 284: for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { 2811 . . 285: if s[i] == '_' { 2812 . . 286: underscores = true 2813 . . 287: continue 2814 . . 288: } 2815 . . 289: if e < 10000 { 2816 . . 290: e = e*10 + int(s[i]) - '0' 2817 . . 291: } 2818 . . 292: } 2819 . . 293: dp += e * esign 2820 . . 294: } else if base == 16 { 2821 . . 295: // Must have exponent. 2822 20ms 20ms 296: return 2823 . . 297: } 2824 . . 298: 2825 . . 299: if mantissa != 0 { 2826 . . 300: exp = dp - ndMant 2827 . . 301: } 2828 ROUTINE ======================== sync.(*RWMutex).RLock in /usr/local/go/src/sync/rwmutex.go 2829 20ms 20ms (flat, cum) 0.97% of Total 2830 . . 56:func (rw *RWMutex) RLock() { 2831 . . 57: if race.Enabled { 2832 . . 58: _ = rw.w.state 2833 . . 59: race.Disable() 2834 . . 60: } 2835 20ms 20ms 61: if atomic.AddInt32(&rw.readerCount, 1) < 0 { 2836 . . 62: // A writer is pending, wait for it. 2837 . . 63: runtime_SemacquireMutex(&rw.readerSem, false, 0) 2838 . . 64: } 2839 . . 65: if race.Enabled { 2840 . . 66: race.Enable() 2841 ROUTINE ======================== testing.(*B).launch in /usr/local/go/src/testing/benchmark.go 2842 0 1.73s (flat, cum) 83.57% of Total 2843 . . 320: n = min(n, 100*last) 2844 . . 321: // Be sure to run at least one more than last time. 2845 . . 322: n = max(n, last+1) 2846 . . 323: // Don't run more than 1e9 times. (This also keeps n in int range on 32 bit platforms.) 2847 . . 324: n = min(n, 1e9) 2848 . 1.73s 325: b.runN(int(n)) 2849 . . 326: } 2850 . . 327: } 2851 . . 328: b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra} 2852 . . 329:} 2853 . . 330: 2854 ROUTINE ======================== testing.(*B).runN in /usr/local/go/src/testing/benchmark.go 2855 0 1.73s (flat, cum) 83.57% of Total 2856 . . 187: b.raceErrors = -race.Errors() 2857 . . 188: b.N = n 2858 . . 189: b.parallelism = 1 2859 . . 190: b.ResetTimer() 2860 . . 191: b.StartTimer() 2861 . 1.73s 192: b.benchFunc(b) 2862 . . 193: b.StopTimer() 2863 . . 194: b.previousN = n 2864 . . 195: b.previousDuration = b.duration 2865 . . 196: b.raceErrors += race.Errors() 2866 . . 197: if b.raceErrors > 0 {