github.com/lxt1045/json@v0.0.0-20231013032136-54d6b1d6e525/cpu.txt (about)

     1  Total: 2.07s
     2  ROUTINE ======================== aeshashbody in /usr/local/go/src/runtime/asm_amd64.s
     3        70ms       70ms (flat, cum)  3.38% of Total
     4           .          .    909:// DX: address to put return value
     5           .          .    910:TEXT aeshashbody<>(SB),NOSPLIT,$0-0
     6           .          .    911:	// Fill an SSE register with our seeds.
     7           .          .    912:	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
     8           .          .    913:	PINSRW	$4, CX, X0			// 16 bits of length
     9        10ms       10ms    914:	PSHUFHW $0, X0, X0			// repeat length 4 times total
    10           .          .    915:	MOVO	X0, X1				// save unscrambled seed
    11           .          .    916:	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
    12           .          .    917:	AESENC	X0, X0				// scramble seed
    13           .          .    918:
    14        10ms       10ms    919:	CMPQ	CX, $16
    15           .          .    920:	JB	aes0to15
    16           .          .    921:	JE	aes16
    17           .          .    922:	CMPQ	CX, $32
    18           .          .    923:	JBE	aes17to32
    19           .          .    924:	CMPQ	CX, $64
    20           .          .    925:	JBE	aes33to64
    21           .          .    926:	CMPQ	CX, $128
    22           .          .    927:	JBE	aes65to128
    23           .          .    928:	JMP	aes129plus
    24           .          .    929:
    25           .          .    930:aes0to15:
    26           .          .    931:	TESTQ	CX, CX
    27           .          .    932:	JE	aes0
    28           .          .    933:
    29           .          .    934:	ADDQ	$16, AX
    30           .          .    935:	TESTW	$0xff0, AX
    31           .          .    936:	JE	endofpage
    32           .          .    937:
    33           .          .    938:	// 16 bytes loaded at this address won't cross
    34           .          .    939:	// a page boundary, so we can load it directly.
    35           .          .    940:	MOVOU	-16(AX), X1
    36           .          .    941:	ADDQ	CX, CX
    37           .          .    942:	MOVQ	$masks<>(SB), AX
    38           .          .    943:	PAND	(AX)(CX*8), X1
    39           .          .    944:final1:
    40        10ms       10ms    945:	PXOR	X0, X1	// xor data with seed
    41        10ms       10ms    946:	AESENC	X1, X1	// scramble combo 3 times
    42           .          .    947:	AESENC	X1, X1
    43        20ms       20ms    948:	AESENC	X1, X1
    44        10ms       10ms    949:	MOVQ	X1, (DX)
    45           .          .    950:	RET
    46           .          .    951:
    47           .          .    952:endofpage:
    48           .          .    953:	// address ends in 1111xxxx. Might be up against
    49           .          .    954:	// a page boundary, so load ending at last byte.
    50  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.(*TagInfo).Set in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct.go
    51        20ms      150ms (flat, cum)  7.25% of Total
    52           .          .    128:
    53           .          .    129:	Marshalable marshalable `json:"-"`
    54           .          .    130:}
    55           .          .    131:
    56           .          .    132:func (t *TagInfo) Set(pStruct unsafe.Pointer, pIn unsafe.Pointer) {
    57        20ms      150ms    133:	t.Marshalable.Set(t.StructField, pStruct, pIn)
    58           .          .    134:}
    59           .          .    135:func (t *TagInfo) Get(pStruct unsafe.Pointer, pOut unsafe.Pointer) {
    60           .          .    136:	t.Marshalable.Get(t.StructField, pStruct, pOut)
    61           .          .    137:}
    62           .          .    138:
    63  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.BenchmarkMyUnmarshal.func1 in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct_bench_test.go
    64        10ms      1.73s (flat, cum) 83.57% of Total
    65           .          .    204:	}
    66           .          .    205:
    67           .          .    206:	name := "Unmarshal"
    68           .          .    207:	b.Run(name, func(b *testing.B) {
    69           .          .    208:		b.ReportAllocs()
    70        10ms       10ms    209:		for i := 0; i < b.N; i++ {
    71           .      1.72s    210:			Unmarshal(bsJSON, &d)
    72           .          .    211:		}
    73           .          .    212:		b.SetBytes(int64(b.N))
    74           .          .    213:		b.StopTimer()
    75           .          .    214:	})
    76           .          .    215:}
    77  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.IsSpace in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
    78       110ms      110ms (flat, cum)  5.31% of Total
    79           .          .    132:}
    80           .          .    133:
    81           .          .    134:const charSpace uint32 = 1<<('\t'-1) | 1<<('\n'-1) | 1<<('\v'-1) | 1<<('\f'-1) | 1<<('\r'-1) | 1<<(' '-1)
    82           .          .    135:
    83           .          .    136:func IsSpace(b byte) bool {
    84       110ms      110ms    137:	return b == 0x85 || b == 0xA0 || (charSpace>>(b-1)&0x1 > 0)
    85           .          .    138:	// switch b {
    86           .          .    139:	// case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
    87           .          .    140:	// 	return true
    88           .          .    141:	// }
    89           .          .    142:	// return false
    90  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.LoadTagNode in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct.go
    91        10ms       50ms (flat, cum)  2.42% of Total
    92           .          .     13:var (
    93           .          .     14:	cacheStructTagInfo = make(map[string]*tagNode) //map[type]map[string]TagInfo
    94           .          .     15:	cacheLock          sync.RWMutex
    95           .          .     16:)
    96           .          .     17:
    97        10ms       10ms     18:func LoadTagNode(key string) (n *tagNode) {
    98           .       20ms     19:	cacheLock.RLock()
    99           .       20ms     20:	n = cacheStructTagInfo[key]
   100           .          .     21:	if n != nil {
   101           .          .     22:		cacheLock.RUnlock()
   102           .          .     23:		return
   103           .          .     24:	}
   104           .          .     25:	cacheLock.RUnlock()
   105  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.Unmarshal in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/struct.go
   106        30ms      1.72s (flat, cum) 83.09% of Total
   107           .          .    225:	for typ.Kind() == reflect.Ptr {
   108           .          .    226:		vi.Set(reflect.New(vi.Type().Elem()))
   109           .          .    227:		vi = vi.Elem()
   110           .          .    228:		typ = typ.Elem()
   111           .          .    229:	}
   112        30ms      180ms    230:	node := LoadTagNode(typ.PkgPath() + "." + typ.Name())
   113           .          .    231:	tagInfo, err := node.GetTagInfo(typ)
   114           .          .    232:	if err != nil {
   115           .          .    233:		return
   116           .          .    234:	}
   117           .          .    235:
   118           .          .    236:	defer func() {
   119           .          .    237:		if e := recover(); e != nil {
   120           .          .    238:			err = e.(error)
   121           .          .    239:			err = errors.New(err.Error())
   122           .          .    240:			return
   123           .          .    241:		}
   124           .          .    242:	}()
   125           .          .    243:	empty := (*emptyInterface)(unsafe.Pointer(&in))
   126           .      1.54s    244:	parseNextUnit(bs, empty.word, tagInfo)
   127           .          .    245:	if err != nil {
   128           .          .    246:		return
   129           .          .    247:	}
   130           .          .    248:
   131           .          .    249:	return
   132  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.parseNum in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   133           0       20ms (flat, cum)  0.97% of Total
   134           .          .    262:}
   135           .          .    263:
   136           .          .    264:func parseNum(stream []byte) (raw []byte, i int) {
   137           .          .    265:	for ; i < len(stream); i++ {
   138           .          .    266:		c := stream[i]
   139           .       20ms    267:		if IsSpace(c) || c == ']' || c == '}' || c == ',' {
   140           .          .    268:			raw, i = stream[:i], i+1
   141           .          .    269:			return
   142           .          .    270:		}
   143           .          .    271:	}
   144           .          .    272:	raw = stream
   145  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.parseNextUnit in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   146       220ms      2.09s (flat, cum) 100.97% of Total
   147           .          .    155:	if len(stream) < 2 || stream[0] != '{' {
   148           .          .    156:		panicIncorrectFormat(stream[:])
   149           .          .    157:	}
   150           .          .    158:	var key []byte
   151           .          .    159:	for i = 1; i < len(stream); {
   152        40ms      140ms    160:		i += trimSpace(stream[i:])
   153           .          .    161:		if stream[i] == ']' || stream[i] == '}' {
   154           .          .    162:			i++
   155           .          .    163:			break // 此 struct 结束语法分析
   156           .          .    164:		}
   157           .          .    165:		switch stream[i] {
   158           .          .    166:		default:
   159           .          .    167:			if (stream[i] >= '0' && stream[i] <= '9') || stream[i] == '-' {
   160           .          .    168:				if len(key) <= 0 {
   161           .          .    169:					panicIncorrectFormat(stream[i:])
   162           .          .    170:				}
   163           .       20ms    171:				raw, size := parseNum(stream[i:])
   164           .          .    172:				i += size
   165        10ms       90ms    173:				if tag, ok := tis[string(key)]; ok && pObj != nil {
   166           .      210ms    174:					setNumberField(pObj, tag, raw, Number)
   167           .          .    175:				}
   168           .          .    176:				key = nil
   169           .          .    177:			} else {
   170           .          .    178:				panicIncorrectFormat(stream[i:])
   171           .          .    179:			}
   172           .          .    180:		case '{': // obj
   173        10ms       10ms    181:			if len(key) <= 0 {
   174           .          .    182:				panicIncorrectFormat(stream[i:])
   175           .          .    183:			}
   176        10ms       50ms    184:			if tag, ok := tis[string(key)]; ok {
   177        20ms      570ms    185:				i += setObjField(pObj, tag, stream[i:])
   178           .          .    186:			} else {
   179           .          .    187:				i += parseNextUnit(stream[i:], nil, tag.Children)
   180           .          .    188:			}
   181           .          .    189:			key = nil
   182           .          .    190:		case '[': // obj
   183           .          .    191:			if len(key) <= 0 {
   184           .          .    192:				panicIncorrectFormat(stream[i:])
   185           .          .    193:			}
   186           .          .    194:			if tag, ok := tis[string(key)]; ok {
   187           .          .    195:				i += setObjField(pObj, tag, stream[i:])
   188           .          .    196:			} else {
   189           .          .    197:				i += parseNextUnit(stream[i:], nil, tag.Children)
   190           .          .    198:			}
   191           .          .    199:			key = nil
   192        10ms       10ms    200:		case 'n':
   193           .          .    201:			if len(key) <= 0 {
   194           .          .    202:				panicIncorrectFormat(stream[i:])
   195           .          .    203:			}
   196           .          .    204:			if stream[i+1] != 'u' || stream[i+2] != 'l' || stream[i+3] != 'l' {
   197           .          .    205:				panicIncorrectFormat(stream[i:])
   198           .          .    206:			}
   199           .          .    207:			i += 4
   200           .          .    208:			key = nil
   201           .          .    209:		case 't':
   202           .          .    210:			if len(key) <= 0 {
   203           .          .    211:				panicIncorrectFormat(stream[i:])
   204           .          .    212:			}
   205           .          .    213:			if stream[i+1] != 'r' || stream[i+2] != 'u' || stream[i+3] != 'e' {
   206           .          .    214:				panicIncorrectFormat(stream[i:])
   207           .          .    215:			}
   208           .          .    216:			i += 4
   209           .          .    217:			if tag, ok := tis[string(key)]; ok && pObj != nil {
   210           .          .    218:				setBoolField(pObj, tag, true)
   211           .          .    219:			}
   212           .          .    220:			key = nil
   213           .          .    221:		case 'f':
   214           .          .    222:			if len(key) <= 0 {
   215           .          .    223:				panicIncorrectFormat(stream[i:])
   216           .          .    224:			}
   217           .          .    225:			if stream[i+1] != 'a' || stream[i+2] != 'l' || stream[i+3] != 's' || stream[i+4] != 'e' {
   218           .          .    226:				panicIncorrectFormat(stream[i:])
   219           .          .    227:			}
   220           .          .    228:			i += 5
   221           .          .    229:			if tag, ok := tis[string(key)]; ok && pObj != nil {
   222           .          .    230:				setBoolField(pObj, tag, false)
   223           .          .    231:			}
   224           .          .    232:			key = nil
   225           .          .    233:		case '"':
   226           .          .    234:			if len(key) <= 0 {
   227        30ms       50ms    235:				i += trimSpace(stream[i:])
   228           .          .    236:				size := 0
   229        20ms      190ms    237:				key, size = parseStr(stream[i:]) //先解析key 再解析value
   230           .          .    238:				i += size
   231        20ms       60ms    239:				i += trimSpace(stream[i:])
   232           .          .    240:				if stream[i] != ':' {
   233           .          .    241:					panicIncorrectFormat(stream[i:])
   234           .          .    242:				}
   235           .          .    243:				i++
   236        20ms       20ms    244:				i += trimSpace(stream[i:])
   237           .          .    245:				continue
   238           .          .    246:			} else {
   239           .       80ms    247:				raw, size := parseStr(stream[i:])
   240           .          .    248:				i += size
   241           .      160ms    249:				if tag, ok := tis[string(key)]; ok && pObj != nil {
   242        10ms      360ms    250:					setStringField(pObj, tag, raw)
   243           .          .    251:				}
   244           .          .    252:				key = nil
   245           .          .    253:			}
   246           .          .    254:		}
   247        10ms       60ms    255:		i += trimSpace(stream[i:])
   248        10ms       10ms    256:		if stream[i] == ',' {
   249           .          .    257:			i++
   250           .          .    258:			continue
   251           .          .    259:		}
   252           .          .    260:	}
   253           .          .    261:	return
   254  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.parseStr in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   255       250ms      250ms (flat, cum) 12.08% of Total
   256           .          .    273:	return
   257           .          .    274:}
   258           .          .    275:
   259           .          .    276://stream: "fgshw1321"...
   260           .          .    277:func parseStr(stream []byte) (raw []byte, i int) {
   261        70ms       70ms    278:	for i = 1; i < len(stream); {
   262        20ms       20ms    279:		if stream[i] == '"' {
   263        10ms       10ms    280:			if len(raw) <= 0 {
   264        10ms       10ms    281:				raw = stream[1:i]
   265           .          .    282:			}
   266           .          .    283:			return raw, i + 1
   267           .          .    284:		}
   268        10ms       10ms    285:		if stream[i] == '\\' {
   269           .          .    286:			word, wordSize := unescapeStr(stream[i:])
   270       100ms      100ms    287:			if len(raw) <= 0 {
   271           .          .    288:				raw = stream[1:i]
   272           .          .    289:			}
   273           .          .    290:			raw = append(raw, word...)
   274           .          .    291:			i += wordSize
   275           .          .    292:			continue
   276           .          .    293:		}
   277           .          .    294:		if len(raw) > 0 {
   278           .          .    295:			raw = append(raw, stream[i])
   279           .          .    296:		}
   280        30ms       30ms    297:		i++
   281           .          .    298:	}
   282           .          .    299:	return
   283           .          .    300:}
   284           .          .    301:
   285           .          .    302:// unescape unescapes a string
   286  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/marshalable_func.go
   287        20ms       60ms (flat, cum)  2.90% of Total
   288           .          .     95:}
   289           .          .     96:
   290           .          .     97:func setField(field reflect.StructField, pStruct unsafe.Pointer, pIn unsafe.Pointer) {
   291           .          .     98:	pValue := unsafe.Pointer(uintptr(pStruct) + uintptr(field.Offset))
   292           .          .     99:	typ := field.Type
   293           .       10ms    100:	if typ.Kind() != reflect.Ptr {
   294           .          .    101:		from := SliceHeader{
   295           .          .    102:			Data: uintptr(pIn),
   296           .       20ms    103:			Len:  int(typ.Size()),
   297           .          .    104:			Cap:  int(typ.Size()),
   298           .          .    105:		}
   299           .          .    106:		to := SliceHeader{
   300           .          .    107:			Data: uintptr(pValue),
   301           .          .    108:			Len:  int(typ.Size()),
   302        10ms       10ms    109:			Cap:  int(typ.Size()),
   303           .          .    110:		}
   304        10ms       20ms    111:		copy(*(*[]byte)(unsafe.Pointer(&to)), *(*[]byte)(unsafe.Pointer(&from)))
   305           .          .    112:		return
   306           .          .    113:	}
   307           .          .    114:	setPointerField(field, pStruct, pIn)
   308           .          .    115:	return
   309           .          .    116:}
   310  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setFieldString in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/marshalable_func.go
   311        30ms       30ms (flat, cum)  1.45% of Total
   312           .          .    127:	*(*unsafe.Pointer)(pValue) = *(*unsafe.Pointer)(pIn)
   313           .          .    128:	return
   314           .          .    129:}
   315           .          .    130:
   316           .          .    131:func setFieldString(field reflect.StructField, pStruct unsafe.Pointer, pIn unsafe.Pointer) {
   317        10ms       10ms    132:	pValue := unsafe.Pointer(uintptr(pStruct) + uintptr(field.Offset))
   318           .          .    133:	typ := field.Type
   319           .          .    134:	if typ.Kind() != reflect.Ptr {
   320           .          .    135:		*(*string)(pValue) = *(*string)(pIn)
   321        20ms       20ms    136:		return
   322           .          .    137:	}
   323           .          .    138:	setPointerField(field, pStruct, pIn)
   324           .          .    139:	return
   325           .          .    140:}
   326  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setNumberField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   327           0      210ms (flat, cum) 10.14% of Total
   328           .          .     57:}
   329           .          .     58:func setNumberField(pObj unsafe.Pointer, tag *TagInfo, raw []byte, typ Type) (i int) {
   330           .          .     59:	if tag.Kind < reflect.Int || tag.Kind > reflect.Float64 {
   331           .          .     60:		panicIncorrectType(False, tag)
   332           .          .     61:	}
   333           .      120ms     62:	num, err := strconv.ParseFloat(bytesString(raw), 64)
   334           .          .     63:	if err != nil {
   335           .          .     64:		panicIncorrectFormat([]byte("error:" + err.Error() + ", stream:" + string(raw)))
   336           .          .     65:	}
   337           .          .     66:	switch tag.Kind {
   338           .          .     67:	case reflect.Int8:
   339           .          .     68:		i8 := int8(num)
   340           .          .     69:		tag.Set(pObj, unsafe.Pointer(&i8))
   341           .          .     70:	case reflect.Uint8:
   342           .          .     71:		u8 := int8(num)
   343           .          .     72:		tag.Set(pObj, unsafe.Pointer(&u8))
   344           .          .     73:	case reflect.Uint16:
   345           .          .     74:		u := uint16(num)
   346           .          .     75:		tag.Set(pObj, unsafe.Pointer(&u))
   347           .          .     76:	case reflect.Int16:
   348           .          .     77:		i := int16(num)
   349           .          .     78:		tag.Set(pObj, unsafe.Pointer(&i))
   350           .          .     79:	case reflect.Uint32:
   351           .          .     80:		u := uint32(num)
   352           .          .     81:		tag.Set(pObj, unsafe.Pointer(&u))
   353           .          .     82:	case reflect.Int32:
   354           .          .     83:		i := int32(num)
   355           .          .     84:		tag.Set(pObj, unsafe.Pointer(&i))
   356           .          .     85:	case reflect.Uint64:
   357           .          .     86:		u := uint64(num)
   358           .          .     87:		tag.Set(pObj, unsafe.Pointer(&u))
   359           .          .     88:	case reflect.Int64:
   360           .       10ms     89:		i := int64(num)
   361           .       40ms     90:		tag.Set(pObj, unsafe.Pointer(&i))
   362           .          .     91:	case reflect.Int:
   363           .          .     92:		u := int(num)
   364           .       40ms     93:		tag.Set(pObj, unsafe.Pointer(&u))
   365           .          .     94:	case reflect.Uint:
   366           .          .     95:		i := uint(num)
   367           .          .     96:		tag.Set(pObj, unsafe.Pointer(&i))
   368           .          .     97:	case reflect.Float32:
   369           .          .     98:		u := float32(num)
   370  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setObjField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   371           0      550ms (flat, cum) 26.57% of Total
   372           .          .    124:func setObjField(pObj unsafe.Pointer, tag *TagInfo, raw []byte) (i int) {
   373           .          .    125:	if tag.Kind != reflect.Struct {
   374           .          .    126:		panicIncorrectType(False, tag)
   375           .          .    127:	}
   376           .          .    128:	pField := unsafe.Pointer(uintptr(pObj) + uintptr(tag.StructField.Offset))
   377           .      550ms    129:	size := parseNextUnit(raw, pField, tag.Children)
   378           .          .    130:	i += size
   379           .          .    131:	return
   380           .          .    132:}
   381           .          .    133:
   382           .          .    134:const charSpace uint32 = 1<<('\t'-1) | 1<<('\n'-1) | 1<<('\v'-1) | 1<<('\f'-1) | 1<<('\r'-1) | 1<<(' '-1)
   383  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.setStringField in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   384        30ms      350ms (flat, cum) 16.91% of Total
   385           .          .    110:		panicIncorrectType(typ, tag)
   386           .          .    111:	}
   387           .          .    112:
   388           .          .    113:	return
   389           .          .    114:}
   390        30ms      280ms    115:func setStringField(pObj unsafe.Pointer, tag *TagInfo, raw []byte) {
   391           .          .    116:	if tag.Kind != reflect.String {
   392           .          .    117:		panicIncorrectType(False, tag)
   393           .          .    118:	}
   394           .          .    119:	// str := bytesString(raw)
   395           .          .    120:	// tag.Set(pObj, unsafe.Pointer(&str))
   396           .       70ms    121:	tag.Set(pObj, unsafe.Pointer(&raw))
   397           .          .    122:	return
   398           .          .    123:}
   399           .          .    124:func setObjField(pObj unsafe.Pointer, tag *TagInfo, raw []byte) (i int) {
   400           .          .    125:	if tag.Kind != reflect.Struct {
   401           .          .    126:		panicIncorrectType(False, tag)
   402  ROUTINE ======================== github.com/lxt1045/Experiment/golang/json/pkg/json.trimSpace in /Users/bytedance/go/src/github.com/lxt1045/Experiment/golang/json/pkg/json/json.go
   403       120ms      210ms (flat, cum) 10.14% of Total
   404           .          .    140:	// 	return true
   405           .          .    141:	// }
   406           .          .    142:	// return false
   407           .          .    143:}
   408           .          .    144:func trimSpace(stream []byte) (i int) {
   409       100ms      100ms    145:	for i = range stream {
   410           .       90ms    146:		if !IsSpace(stream[i]) {
   411           .          .    147:			break
   412           .          .    148:		}
   413           .          .    149:	}
   414        20ms       20ms    150:	return
   415           .          .    151:}
   416           .          .    152:
   417           .          .    153://解析 obj: {}, 或 []
   418           .          .    154:func parseNextUnit(stream []byte, pObj unsafe.Pointer, tis map[string]*TagInfo) (i int) {
   419           .          .    155:	if len(stream) < 2 || stream[0] != '{' {
   420  ROUTINE ======================== memeqbody in /usr/local/go/src/internal/bytealg/equal_amd64.s
   421        40ms       40ms (flat, cum)  1.93% of Total
   422           .          .     98:	// 8 bytes at a time using 64-bit register
   423           .          .     99:bigloop:
   424           .          .    100:	CMPQ	BX, $8
   425           .          .    101:	JBE	leftover
   426           .          .    102:	MOVQ	(SI), CX
   427        10ms       10ms    103:	MOVQ	(DI), DX
   428           .          .    104:	ADDQ	$8, SI
   429           .          .    105:	ADDQ	$8, DI
   430           .          .    106:	SUBQ	$8, BX
   431           .          .    107:	CMPQ	CX, DX
   432           .          .    108:	JEQ	bigloop
   433           .          .    109:	MOVB	$0, (AX)
   434           .          .    110:	RET
   435           .          .    111:
   436           .          .    112:	// remaining 0-8 bytes
   437           .          .    113:leftover:
   438        10ms       10ms    114:	MOVQ	-8(SI)(BX*1), CX
   439           .          .    115:	MOVQ	-8(DI)(BX*1), DX
   440           .          .    116:	CMPQ	CX, DX
   441           .          .    117:	SETEQ	(AX)
   442           .          .    118:	RET
   443           .          .    119:
   444           .          .    120:small:
   445           .          .    121:	CMPQ	BX, $0
   446           .          .    122:	JEQ	equal
   447           .          .    123:
   448           .          .    124:	LEAQ	0(BX*8), CX
   449           .          .    125:	NEGQ	CX
   450           .          .    126:
   451           .          .    127:	CMPB	SI, $0xf8
   452           .          .    128:	JA	si_high
   453           .          .    129:
   454           .          .    130:	// load at SI won't cross a page boundary.
   455           .          .    131:	MOVQ	(SI), SI
   456           .          .    132:	JMP	si_finish
   457           .          .    133:si_high:
   458           .          .    134:	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   459           .          .    135:	MOVQ	-8(SI)(BX*1), SI
   460           .          .    136:	SHRQ	CX, SI
   461           .          .    137:si_finish:
   462           .          .    138:
   463           .          .    139:	// same for DI.
   464           .          .    140:	CMPB	DI, $0xf8
   465           .          .    141:	JA	di_high
   466           .          .    142:	MOVQ	(DI), DI
   467           .          .    143:	JMP	di_finish
   468           .          .    144:di_high:
   469           .          .    145:	MOVQ	-8(DI)(BX*1), DI
   470           .          .    146:	SHRQ	CX, DI
   471           .          .    147:di_finish:
   472           .          .    148:
   473        10ms       10ms    149:	SUBQ	SI, DI
   474           .          .    150:	SHLQ	CX, DI
   475           .          .    151:equal:
   476        10ms       10ms    152:	SETEQ	(AX)
   477           .          .    153:	RET
   478           .          .    154:
   479  ROUTINE ======================== reflect.(*rtype).Kind in /usr/local/go/src/reflect/type.go
   480        10ms       10ms (flat, cum)  0.48% of Total
   481           .          .    775:
   482           .          .    776:func (t *rtype) Align() int { return int(t.align) }
   483           .          .    777:
   484           .          .    778:func (t *rtype) FieldAlign() int { return int(t.fieldAlign) }
   485           .          .    779:
   486        10ms       10ms    780:func (t *rtype) Kind() Kind { return Kind(t.kind & kindMask) }
   487           .          .    781:
   488           .          .    782:func (t *rtype) pointers() bool { return t.ptrdata != 0 }
   489           .          .    783:
   490           .          .    784:func (t *rtype) common() *rtype { return t }
   491           .          .    785:
   492  ROUTINE ======================== reflect.(*rtype).PkgPath in /usr/local/go/src/reflect/type.go
   493        10ms       20ms (flat, cum)  0.97% of Total
   494           .          .    857:	}
   495           .          .    858:	ut := t.uncommon()
   496           .          .    859:	if ut == nil {
   497           .          .    860:		return ""
   498           .          .    861:	}
   499        10ms       20ms    862:	return t.nameOff(ut.pkgPath).name()
   500           .          .    863:}
   501           .          .    864:
   502           .          .    865:func (t *rtype) hasName() bool {
   503           .          .    866:	return t.tflag&tflagNamed != 0
   504           .          .    867:}
   505  ROUTINE ======================== reflect.(*rtype).Size in /usr/local/go/src/reflect/type.go
   506        20ms       20ms (flat, cum)  0.97% of Total
   507           .          .    758:		return s[1:]
   508           .          .    759:	}
   509           .          .    760:	return s
   510           .          .    761:}
   511           .          .    762:
   512        20ms       20ms    763:func (t *rtype) Size() uintptr { return t.size }
   513           .          .    764:
   514           .          .    765:func (t *rtype) Bits() int {
   515           .          .    766:	if t == nil {
   516           .          .    767:		panic("reflect: Bits of nil Type")
   517           .          .    768:	}
   518  ROUTINE ======================== reflect.(*rtype).nameOff in /usr/local/go/src/reflect/type.go
   519        10ms       10ms (flat, cum)  0.48% of Total
   520           .          .    681:type nameOff int32 // offset to a name
   521           .          .    682:type typeOff int32 // offset to an *rtype
   522           .          .    683:type textOff int32 // offset from top of text section
   523           .          .    684:
   524           .          .    685:func (t *rtype) nameOff(off nameOff) name {
   525        10ms       10ms    686:	return name{(*byte)(resolveNameOff(unsafe.Pointer(t), int32(off)))}
   526           .          .    687:}
   527           .          .    688:
   528           .          .    689:func (t *rtype) typeOff(off typeOff) *rtype {
   529           .          .    690:	return (*rtype)(resolveTypeOff(unsafe.Pointer(t), int32(off)))
   530           .          .    691:}
   531  ROUTINE ======================== runtime.(*addrRanges).removeGreaterEqual in /usr/local/go/src/runtime/mranges.go
   532           0       10ms (flat, cum)  0.48% of Total
   533           .          .    346:		removed += r.size()
   534           .          .    347:		r = r.removeGreaterEqual(addr)
   535           .          .    348:		if r.size() == 0 {
   536           .          .    349:			pivot--
   537           .          .    350:		} else {
   538           .       10ms    351:			removed -= r.size()
   539           .          .    352:			a.ranges[pivot-1] = r
   540           .          .    353:		}
   541           .          .    354:	}
   542           .          .    355:	a.ranges = a.ranges[:pivot]
   543           .          .    356:	a.totalBytes -= removed
   544  ROUTINE ======================== runtime.(*gcControllerState).enlistWorker in /usr/local/go/src/runtime/mgc.go
   545           0       30ms (flat, cum)  1.45% of Total
   546           .          .    705:		}
   547           .          .    706:		p := allp[id]
   548           .          .    707:		if p.status != _Prunning {
   549           .          .    708:			continue
   550           .          .    709:		}
   551           .       30ms    710:		if preemptone(p) {
   552           .          .    711:			return
   553           .          .    712:		}
   554           .          .    713:	}
   555           .          .    714:}
   556           .          .    715:
   557  ROUTINE ======================== runtime.(*gcWork).balance in /usr/local/go/src/runtime/mgcwork.go
   558           0       40ms (flat, cum)  1.93% of Total
   559           .          .    290:	if wbuf := w.wbuf2; wbuf.nobj != 0 {
   560           .          .    291:		putfull(wbuf)
   561           .          .    292:		w.flushedWork = true
   562           .          .    293:		w.wbuf2 = getempty()
   563           .          .    294:	} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
   564           .       10ms    295:		w.wbuf1 = handoff(wbuf)
   565           .          .    296:		w.flushedWork = true // handoff did putfull
   566           .          .    297:	} else {
   567           .          .    298:		return
   568           .          .    299:	}
   569           .          .    300:	// We flushed a buffer to the full list, so wake a worker.
   570           .          .    301:	if gcphase == _GCmark {
   571           .       30ms    302:		gcController.enlistWorker()
   572           .          .    303:	}
   573           .          .    304:}
   574           .          .    305:
   575           .          .    306:// empty reports whether w has no mark work available.
   576           .          .    307://go:nowritebarrierrec
   577  ROUTINE ======================== runtime.(*lfstack).push in /usr/local/go/src/runtime/lfstack.go
   578        10ms       10ms (flat, cum)  0.48% of Total
   579           .          .     30:		throw("lfstack.push")
   580           .          .     31:	}
   581           .          .     32:	for {
   582           .          .     33:		old := atomic.Load64((*uint64)(head))
   583           .          .     34:		node.next = old
   584        10ms       10ms     35:		if atomic.Cas64((*uint64)(head), old, new) {
   585           .          .     36:			break
   586           .          .     37:		}
   587           .          .     38:	}
   588           .          .     39:}
   589           .          .     40:
   590  ROUTINE ======================== runtime.(*mcache).nextFree in /usr/local/go/src/runtime/malloc.go
   591           0       80ms (flat, cum)  3.86% of Total
   592           .          .    877:		// The span is full.
   593           .          .    878:		if uintptr(s.allocCount) != s.nelems {
   594           .          .    879:			println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
   595           .          .    880:			throw("s.allocCount != s.nelems && freeIndex == s.nelems")
   596           .          .    881:		}
   597           .       80ms    882:		c.refill(spc)
   598           .          .    883:		shouldhelpgc = true
   599           .          .    884:		s = c.alloc[spc]
   600           .          .    885:
   601           .          .    886:		freeIndex = s.nextFreeIndex()
   602           .          .    887:	}
   603  ROUTINE ======================== runtime.(*mcache).refill in /usr/local/go/src/runtime/mcache.go
   604           0       80ms (flat, cum)  3.86% of Total
   605           .          .    157:		}
   606           .          .    158:		mheap_.central[spc].mcentral.uncacheSpan(s)
   607           .          .    159:	}
   608           .          .    160:
   609           .          .    161:	// Get a new cached span from the central lists.
   610           .       80ms    162:	s = mheap_.central[spc].mcentral.cacheSpan()
   611           .          .    163:	if s == nil {
   612           .          .    164:		throw("out of memory")
   613           .          .    165:	}
   614           .          .    166:
   615           .          .    167:	if uintptr(s.allocCount) == s.nelems {
   616  ROUTINE ======================== runtime.(*mcentral).cacheSpan in /usr/local/go/src/runtime/mcentral.go
   617           0       80ms (flat, cum)  3.86% of Total
   618           .          .    153:		traceGCSweepDone()
   619           .          .    154:		traceDone = true
   620           .          .    155:	}
   621           .          .    156:
   622           .          .    157:	// We failed to get a span from the mcentral so get one from mheap.
   623           .       80ms    158:	s = c.grow()
   624           .          .    159:	if s == nil {
   625           .          .    160:		return nil
   626           .          .    161:	}
   627           .          .    162:
   628           .          .    163:	// At this point s is a span that should have free slots.
   629  ROUTINE ======================== runtime.(*mcentral).grow in /usr/local/go/src/runtime/mcentral.go
   630           0       80ms (flat, cum)  3.86% of Total
   631           .          .    227:// grow allocates a new empty span from the heap and initializes it for c's size class.
   632           .          .    228:func (c *mcentral) grow() *mspan {
   633           .          .    229:	npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
   634           .          .    230:	size := uintptr(class_to_size[c.spanclass.sizeclass()])
   635           .          .    231:
   636           .       70ms    232:	s := mheap_.alloc(npages, c.spanclass, true)
   637           .          .    233:	if s == nil {
   638           .          .    234:		return nil
   639           .          .    235:	}
   640           .          .    236:
   641           .          .    237:	// Use division by multiplication and shifts to quickly compute:
   642           .          .    238:	// n := (npages << _PageShift) / size
   643           .          .    239:	n := (npages << _PageShift) >> s.divShift * uintptr(s.divMul) >> s.divShift2
   644           .          .    240:	s.limit = s.base() + size*n
   645           .       10ms    241:	heapBitsForAddr(s.base()).initSpan(s)
   646           .          .    242:	return s
   647           .          .    243:}
   648  ROUTINE ======================== runtime.(*mheap).alloc in /usr/local/go/src/runtime/mheap.go
   649           0       70ms (flat, cum)  3.38% of Total
   650           .          .    899:func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
   651           .          .    900:	// Don't do any operations that lock the heap on the G stack.
   652           .          .    901:	// It might trigger stack growth, and the stack growth code needs
   653           .          .    902:	// to be able to allocate heap.
   654           .          .    903:	var s *mspan
   655           .       30ms    904:	systemstack(func() {
   656           .          .    905:		// To prevent excessive heap growth, before allocating n pages
   657           .          .    906:		// we need to sweep and reclaim at least n pages.
   658           .          .    907:		if h.sweepdone == 0 {
   659           .          .    908:			h.reclaim(npages)
   660           .          .    909:		}
   661           .          .    910:		s = h.allocSpan(npages, spanAllocHeap, spanclass)
   662           .          .    911:	})
   663           .          .    912:
   664           .          .    913:	if s != nil {
   665           .          .    914:		if needzero && s.needzero != 0 {
   666           .       40ms    915:			memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
   667           .          .    916:		}
   668           .          .    917:		s.needzero = 0
   669           .          .    918:	}
   670           .          .    919:	return s
   671           .          .    920:}
   672  ROUTINE ======================== runtime.(*mheap).alloc.func1 in /usr/local/go/src/runtime/mheap.go
   673           0      150ms (flat, cum)  7.25% of Total
   674           .          .    905:		// To prevent excessive heap growth, before allocating n pages
   675           .          .    906:		// we need to sweep and reclaim at least n pages.
   676           .          .    907:		if h.sweepdone == 0 {
   677           .          .    908:			h.reclaim(npages)
   678           .          .    909:		}
   679           .      150ms    910:		s = h.allocSpan(npages, spanAllocHeap, spanclass)
   680           .          .    911:	})
   681           .          .    912:
   682           .          .    913:	if s != nil {
   683           .          .    914:		if needzero && s.needzero != 0 {
   684           .          .    915:			memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
   685  ROUTINE ======================== runtime.(*mheap).allocSpan in /usr/local/go/src/runtime/mheap.go
   686           0      150ms (flat, cum)  7.25% of Total
   687           .          .   1205:	unlock(&h.lock)
   688           .          .   1206:
   689           .          .   1207:HaveSpan:
   690           .          .   1208:	// At this point, both s != nil and base != 0, and the heap
   691           .          .   1209:	// lock is no longer held. Initialize the span.
   692           .       30ms   1210:	s.init(base, npages)
   693           .          .   1211:	if h.allocNeedsZero(base, npages) {
   694           .          .   1212:		s.needzero = 1
   695           .          .   1213:	}
   696           .          .   1214:	nbytes := npages * pageSize
   697           .          .   1215:	if typ.manual() {
   698           .          .   1216:		s.manualFreeList = 0
   699           .          .   1217:		s.nelems = 0
   700           .          .   1218:		s.limit = s.base() + s.npages*pageSize
   701           .          .   1219:		s.state.set(mSpanManual)
   702           .          .   1220:	} else {
   703           .          .   1221:		// We must set span properties before the span is published anywhere
   704           .          .   1222:		// since we're not holding the heap lock.
   705           .          .   1223:		s.spanclass = spanclass
   706           .          .   1224:		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
   707           .          .   1225:			s.elemsize = nbytes
   708           .          .   1226:			s.nelems = 1
   709           .          .   1227:
   710           .          .   1228:			s.divShift = 0
   711           .          .   1229:			s.divMul = 0
   712           .          .   1230:			s.divShift2 = 0
   713           .          .   1231:			s.baseMask = 0
   714           .          .   1232:		} else {
   715           .          .   1233:			s.elemsize = uintptr(class_to_size[sizeclass])
   716           .          .   1234:			s.nelems = nbytes / s.elemsize
   717           .          .   1235:
   718           .          .   1236:			m := &class_to_divmagic[sizeclass]
   719           .          .   1237:			s.divShift = m.shift
   720           .          .   1238:			s.divMul = m.mul
   721           .          .   1239:			s.divShift2 = m.shift2
   722           .          .   1240:			s.baseMask = m.baseMask
   723           .          .   1241:		}
   724           .          .   1242:
   725           .          .   1243:		// Initialize mark and allocation structures.
   726           .          .   1244:		s.freeindex = 0
   727           .          .   1245:		s.allocCache = ^uint64(0) // all 1s indicating all free.
   728           .          .   1246:		s.gcmarkBits = newMarkBits(s.nelems)
   729           .          .   1247:		s.allocBits = newAllocBits(s.nelems)
   730           .          .   1248:
   731           .          .   1249:		// It's safe to access h.sweepgen without the heap lock because it's
   732           .          .   1250:		// only ever updated with the world stopped and we run on the
   733           .          .   1251:		// systemstack which blocks a STW transition.
   734           .          .   1252:		atomic.PoolStore(&s.sweepgen, h.sweepgen)
   735           .          .   1253:
   736           .          .   1254:		// Now that the span is filled in, set its state. This
   737           .          .   1255:		// is a publication barrier for the other fields in
   738           .          .   1256:		// the span. While valid pointers into this span
   739           .          .   1257:		// should never be visible until the span is returned,
   740           .          .   1258:		// if the garbage collector finds an invalid pointer,
   741           .          .   1259:		// access to the span may race with initialization of
   742           .          .   1260:		// the span. We resolve this race by atomically
   743           .          .   1261:		// setting the state after the span is fully
   744           .          .   1262:		// initialized, and atomically checking the state in
   745           .          .   1263:		// any situation where a pointer is suspect.
   746           .          .   1264:		s.state.set(mSpanInUse)
   747           .          .   1265:	}
   748           .          .   1266:
   749           .          .   1267:	// Commit and account for any scavenged memory that the span now owns.
   750           .          .   1268:	if scav != 0 {
   751           .          .   1269:		// sysUsed all the pages that are actually available
   752           .          .   1270:		// in the span since some of them might be scavenged.
   753           .      120ms   1271:		sysUsed(unsafe.Pointer(base), nbytes)
   754           .          .   1272:		atomic.Xadd64(&memstats.heap_released, -int64(scav))
   755           .          .   1273:	}
   756           .          .   1274:	// Update stats.
   757           .          .   1275:	if typ == spanAllocHeap {
   758           .          .   1276:		atomic.Xadd64(&memstats.heap_inuse, int64(nbytes))
   759  ROUTINE ======================== runtime.(*mspan).init in /usr/local/go/src/runtime/mheap.go
   760        30ms       30ms (flat, cum)  1.45% of Total
   761           .          .   1522:}
   762           .          .   1523:
   763           .          .   1524:// Initialize a new span with the given start and npages.
   764           .          .   1525:func (span *mspan) init(base uintptr, npages uintptr) {
   765           .          .   1526:	// span is *not* zeroed.
   766        30ms       30ms   1527:	span.next = nil
   767           .          .   1528:	span.prev = nil
   768           .          .   1529:	span.list = nil
   769           .          .   1530:	span.startAddr = base
   770           .          .   1531:	span.npages = npages
   771           .          .   1532:	span.allocCount = 0
   772  ROUTINE ======================== runtime.(*pageAlloc).scavenge in /usr/local/go/src/runtime/mgcscavenge.go
   773           0       10ms (flat, cum)  0.48% of Total
   774           .          .    404:		gen   uint32
   775           .          .    405:	)
   776           .          .    406:	released := uintptr(0)
   777           .          .    407:	for released < nbytes {
   778           .          .    408:		if addrs.size() == 0 {
   779           .       10ms    409:			if addrs, gen = p.scavengeReserve(); addrs.size() == 0 {
   780           .          .    410:				break
   781           .          .    411:			}
   782           .          .    412:		}
   783           .          .    413:		r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock)
   784           .          .    414:		released += r
   785  ROUTINE ======================== runtime.(*pageAlloc).scavengeReserve in /usr/local/go/src/runtime/mgcscavenge.go
   786           0       10ms (flat, cum)  0.48% of Total
   787           .          .    515:	// the scavenger, so align down, potentially extending
   788           .          .    516:	// the range.
   789           .          .    517:	newBase := alignDown(r.base.addr(), pallocChunkBytes)
   790           .          .    518:
   791           .          .    519:	// Remove from inUse however much extra we just pulled out.
   792           .       10ms    520:	p.scav.inUse.removeGreaterEqual(newBase)
   793           .          .    521:	r.base = offAddr{newBase}
   794           .          .    522:	return r, p.scav.gen
   795           .          .    523:}
   796           .          .    524:
   797           .          .    525:// scavengeUnreserve returns an unscavenged portion of a range that was
   798  ROUTINE ======================== runtime.add in /usr/local/go/src/runtime/stubs.go
   799        10ms       10ms (flat, cum)  0.48% of Total
   800           .          .      7:import "unsafe"
   801           .          .      8:
   802           .          .      9:// Should be a built-in for unsafe.Pointer?
   803           .          .     10://go:nosplit
   804           .          .     11:func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
   805        10ms       10ms     12:	return unsafe.Pointer(uintptr(p) + x)
   806           .          .     13:}
   807           .          .     14:
   808           .          .     15:// getg returns the pointer to the current g.
   809           .          .     16:// The compiler rewrites calls to this function into instructions
   810           .          .     17:// that fetch the g directly (from TLS or from the dedicated register).
   811  ROUTINE ======================== runtime.addrRange.size in /usr/local/go/src/runtime/mranges.go
   812        10ms       10ms (flat, cum)  0.48% of Total
   813           .          .     42:	if !a.base.lessThan(a.limit) {
   814           .          .     43:		return 0
   815           .          .     44:	}
   816           .          .     45:	// Subtraction is safe because limit and base must be in the same
   817           .          .     46:	// segment of the address space.
   818        10ms       10ms     47:	return a.limit.diff(a.base)
   819           .          .     48:}
   820           .          .     49:
   821           .          .     50:// contains returns whether or not the range contains a given address.
   822           .          .     51:func (a addrRange) contains(addr uintptr) bool {
   823           .          .     52:	return a.base.lessEqual(offAddr{addr}) && (offAddr{addr}).lessThan(a.limit)
   824  ROUTINE ======================== runtime.bgscavenge in /usr/local/go/src/runtime/mgcscavenge.go
   825           0       10ms (flat, cum)  0.48% of Total
   826           .          .    287:		// Time in scavenging critical section.
   827           .          .    288:		crit := float64(0)
   828           .          .    289:
   829           .          .    290:		// Run on the system stack since we grab the heap lock,
   830           .          .    291:		// and a stack growth with the heap lock means a deadlock.
   831           .       10ms    292:		systemstack(func() {
   832           .          .    293:			lock(&mheap_.lock)
   833           .          .    294:
   834           .          .    295:			// If background scavenging is disabled or if there's no work to do just park.
   835           .          .    296:			retained, goal := heapRetained(), mheap_.scavengeGoal
   836           .          .    297:			if retained <= goal {
   837  ROUTINE ======================== runtime.bgscavenge.func2 in /usr/local/go/src/runtime/mgcscavenge.go
   838           0       10ms (flat, cum)  0.48% of Total
   839           .          .    299:				return
   840           .          .    300:			}
   841           .          .    301:
   842           .          .    302:			// Scavenge one page, and measure the amount of time spent scavenging.
   843           .          .    303:			start := nanotime()
   844           .       10ms    304:			released = mheap_.pages.scavenge(physPageSize, true)
   845           .          .    305:			mheap_.pages.scav.released += released
   846           .          .    306:			crit = float64(nanotime() - start)
   847           .          .    307:
   848           .          .    308:			unlock(&mheap_.lock)
   849           .          .    309:		})
   850  ROUTINE ======================== runtime.bucketMask in /usr/local/go/src/runtime/map.go
   851        10ms       20ms (flat, cum)  0.97% of Total
   852           .          .    185:	return uintptr(1) << (b & (sys.PtrSize*8 - 1))
   853           .          .    186:}
   854           .          .    187:
   855           .          .    188:// bucketMask returns 1<<b - 1, optimized for code generation.
   856           .          .    189:func bucketMask(b uint8) uintptr {
   857        10ms       20ms    190:	return bucketShift(b) - 1
   858           .          .    191:}
   859           .          .    192:
   860           .          .    193:// tophash calculates the tophash value for hash.
   861           .          .    194:func tophash(hash uintptr) uint8 {
   862           .          .    195:	top := uint8(hash >> (sys.PtrSize*8 - 8))
   863  ROUTINE ======================== runtime.bucketShift in /usr/local/go/src/runtime/map.go
   864        10ms       10ms (flat, cum)  0.48% of Total
   865           .          .    180:}
   866           .          .    181:
   867           .          .    182:// bucketShift returns 1<<b, optimized for code generation.
   868           .          .    183:func bucketShift(b uint8) uintptr {
   869           .          .    184:	// Masking the shift amount allows overflow checks to be elided.
   870        10ms       10ms    185:	return uintptr(1) << (b & (sys.PtrSize*8 - 1))
   871           .          .    186:}
   872           .          .    187:
   873           .          .    188:// bucketMask returns 1<<b - 1, optimized for code generation.
   874           .          .    189:func bucketMask(b uint8) uintptr {
   875           .          .    190:	return bucketShift(b) - 1
   876  ROUTINE ======================== runtime.checkTimers in /usr/local/go/src/runtime/proc.go
   877           0       10ms (flat, cum)  0.48% of Total
   878           .          .   3244:		// No timers to run or adjust.
   879           .          .   3245:		return now, 0, false
   880           .          .   3246:	}
   881           .          .   3247:
   882           .          .   3248:	if now == 0 {
   883           .       10ms   3249:		now = nanotime()
   884           .          .   3250:	}
   885           .          .   3251:	if now < next {
   886           .          .   3252:		// Next timer is not ready to run, but keep going
   887           .          .   3253:		// if we would clear deleted timers.
   888           .          .   3254:		// This corresponds to the condition below where
   889  ROUTINE ======================== runtime.concatstring3 in /usr/local/go/src/runtime/string.go
   890        10ms       80ms (flat, cum)  3.86% of Total
   891           .          .     58:func concatstring2(buf *tmpBuf, a [2]string) string {
   892           .          .     59:	return concatstrings(buf, a[:])
   893           .          .     60:}
   894           .          .     61:
   895           .          .     62:func concatstring3(buf *tmpBuf, a [3]string) string {
   896        10ms       80ms     63:	return concatstrings(buf, a[:])
   897           .          .     64:}
   898           .          .     65:
   899           .          .     66:func concatstring4(buf *tmpBuf, a [4]string) string {
   900           .          .     67:	return concatstrings(buf, a[:])
   901           .          .     68:}
   902  ROUTINE ======================== runtime.concatstrings in /usr/local/go/src/runtime/string.go
   903        10ms       70ms (flat, cum)  3.38% of Total
   904           .          .     45:	// or our result does not escape the calling frame (buf != nil),
   905           .          .     46:	// then we can return that string directly.
   906           .          .     47:	if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) {
   907           .          .     48:		return a[idx]
   908           .          .     49:	}
   909           .       40ms     50:	s, b := rawstringtmp(buf, l)
   910           .          .     51:	for _, x := range a {
   911        10ms       30ms     52:		copy(b, x)
   912           .          .     53:		b = b[len(x):]
   913           .          .     54:	}
   914           .          .     55:	return s
   915           .          .     56:}
   916           .          .     57:
   917  ROUTINE ======================== runtime.duffcopy in /usr/local/go/src/runtime/duff_amd64.s
   918        40ms       40ms (flat, cum)  1.93% of Total
   919           .          .    398:	ADDQ	$16, SI
   920           .          .    399:	MOVUPS	X0, (DI)
   921           .          .    400:	ADDQ	$16, DI
   922           .          .    401:
   923           .          .    402:	MOVUPS	(SI), X0
   924        10ms       10ms    403:	ADDQ	$16, SI
   925        10ms       10ms    404:	MOVUPS	X0, (DI)
   926           .          .    405:	ADDQ	$16, DI
   927           .          .    406:
   928           .          .    407:	MOVUPS	(SI), X0
   929           .          .    408:	ADDQ	$16, SI
   930           .          .    409:	MOVUPS	X0, (DI)
   931           .          .    410:	ADDQ	$16, DI
   932           .          .    411:
   933           .          .    412:	MOVUPS	(SI), X0
   934           .          .    413:	ADDQ	$16, SI
   935           .          .    414:	MOVUPS	X0, (DI)
   936        10ms       10ms    415:	ADDQ	$16, DI
   937           .          .    416:
   938           .          .    417:	MOVUPS	(SI), X0
   939           .          .    418:	ADDQ	$16, SI
   940           .          .    419:	MOVUPS	X0, (DI)
   941           .          .    420:	ADDQ	$16, DI
   942           .          .    421:
   943           .          .    422:	MOVUPS	(SI), X0
   944           .          .    423:	ADDQ	$16, SI
   945           .          .    424:	MOVUPS	X0, (DI)
   946        10ms       10ms    425:	ADDQ	$16, DI
   947           .          .    426:
   948           .          .    427:	RET
   949  ROUTINE ======================== runtime.findrunnable in /usr/local/go/src/runtime/proc.go
   950           0       60ms (flat, cum)  2.90% of Total
   951           .          .   2695:			// is probably a waste of time.
   952           .          .   2696:			//
   953           .          .   2697:			// timerpMask tells us whether the P may have timers at all. If it
   954           .          .   2698:			// can't, no need to check at all.
   955           .          .   2699:			if stealTimersOrRunNextG && timerpMask.read(enum.position()) {
   956           .       10ms   2700:				tnow, w, ran := checkTimers(p2, now)
   957           .          .   2701:				now = tnow
   958           .          .   2702:				if w != 0 && (pollUntil == 0 || w < pollUntil) {
   959           .          .   2703:					pollUntil = w
   960           .          .   2704:				}
   961           .          .   2705:				if ran {
   962           .          .   2706:					// Running the timers may have
   963           .          .   2707:					// made an arbitrary number of G's
   964           .          .   2708:					// ready and added them to this P's
   965           .          .   2709:					// local run queue. That invalidates
   966           .          .   2710:					// the assumption of runqsteal
   967           .          .   2711:					// that is always has room to add
   968           .          .   2712:					// stolen G's. So check now if there
   969           .          .   2713:					// is a local G to run.
   970           .          .   2714:					if gp, inheritTime := runqget(_p_); gp != nil {
   971           .          .   2715:						return gp, inheritTime
   972           .          .   2716:					}
   973           .          .   2717:					ranTimer = true
   974           .          .   2718:				}
   975           .          .   2719:			}
   976           .          .   2720:
   977           .          .   2721:			// Don't bother to attempt to steal if p2 is idle.
   978           .          .   2722:			if !idlepMask.read(enum.position()) {
   979           .          .   2723:				if gp := runqsteal(_p_, p2, stealTimersOrRunNextG); gp != nil {
   980           .          .   2724:					return gp, false
   981           .          .   2725:				}
   982           .          .   2726:			}
   983           .          .   2727:		}
   984           .          .   2728:	}
   985           .          .   2729:	if ranTimer {
   986           .          .   2730:		// Running a timer may have made some goroutine ready.
   987           .          .   2731:		goto top
   988           .          .   2732:	}
   989           .          .   2733:
   990           .          .   2734:stop:
   991           .          .   2735:
   992           .          .   2736:	// We have nothing to do. If we're in the GC mark phase, can
   993           .          .   2737:	// safely scan and blacken objects, and have work to do, run
   994           .          .   2738:	// idle-time marking rather than give up the P.
   995           .          .   2739:	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
   996           .          .   2740:		node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
   997           .          .   2741:		if node != nil {
   998           .          .   2742:			_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
   999           .          .   2743:			gp := node.gp.ptr()
  1000           .          .   2744:			casgstatus(gp, _Gwaiting, _Grunnable)
  1001           .          .   2745:			if trace.enabled {
  1002           .          .   2746:				traceGoUnpark(gp, 0)
  1003           .          .   2747:			}
  1004           .          .   2748:			return gp, false
  1005           .          .   2749:		}
  1006           .          .   2750:	}
  1007           .          .   2751:
  1008           .          .   2752:	delta := int64(-1)
  1009           .          .   2753:	if pollUntil != 0 {
  1010           .          .   2754:		// checkTimers ensures that polluntil > now.
  1011           .          .   2755:		delta = pollUntil - now
  1012           .          .   2756:	}
  1013           .          .   2757:
  1014           .          .   2758:	// wasm only:
  1015           .          .   2759:	// If a callback returned and no other goroutine is awake,
  1016           .          .   2760:	// then wake event handler goroutine which pauses execution
  1017           .          .   2761:	// until a callback was triggered.
  1018           .          .   2762:	gp, otherReady := beforeIdle(delta)
  1019           .          .   2763:	if gp != nil {
  1020           .          .   2764:		casgstatus(gp, _Gwaiting, _Grunnable)
  1021           .          .   2765:		if trace.enabled {
  1022           .          .   2766:			traceGoUnpark(gp, 0)
  1023           .          .   2767:		}
  1024           .          .   2768:		return gp, false
  1025           .          .   2769:	}
  1026           .          .   2770:	if otherReady {
  1027           .          .   2771:		goto top
  1028           .          .   2772:	}
  1029           .          .   2773:
  1030           .          .   2774:	// Before we drop our P, make a snapshot of the allp slice,
  1031           .          .   2775:	// which can change underfoot once we no longer block
  1032           .          .   2776:	// safe-points. We don't need to snapshot the contents because
  1033           .          .   2777:	// everything up to cap(allp) is immutable.
  1034           .          .   2778:	allpSnapshot := allp
  1035           .          .   2779:	// Also snapshot masks. Value changes are OK, but we can't allow
  1036           .          .   2780:	// len to change out from under us.
  1037           .          .   2781:	idlepMaskSnapshot := idlepMask
  1038           .          .   2782:	timerpMaskSnapshot := timerpMask
  1039           .          .   2783:
  1040           .          .   2784:	// return P and block
  1041           .          .   2785:	lock(&sched.lock)
  1042           .          .   2786:	if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
  1043           .          .   2787:		unlock(&sched.lock)
  1044           .          .   2788:		goto top
  1045           .          .   2789:	}
  1046           .          .   2790:	if sched.runqsize != 0 {
  1047           .          .   2791:		gp := globrunqget(_p_, 0)
  1048           .          .   2792:		unlock(&sched.lock)
  1049           .          .   2793:		return gp, false
  1050           .          .   2794:	}
  1051           .          .   2795:	if releasep() != _p_ {
  1052           .          .   2796:		throw("findrunnable: wrong p")
  1053           .          .   2797:	}
  1054           .          .   2798:	pidleput(_p_)
  1055           .          .   2799:	unlock(&sched.lock)
  1056           .          .   2800:
  1057           .          .   2801:	// Delicate dance: thread transitions from spinning to non-spinning state,
  1058           .          .   2802:	// potentially concurrently with submission of new goroutines. We must
  1059           .          .   2803:	// drop nmspinning first and then check all per-P queues again (with
  1060           .          .   2804:	// #StoreLoad memory barrier in between). If we do it the other way around,
  1061           .          .   2805:	// another thread can submit a goroutine after we've checked all run queues
  1062           .          .   2806:	// but before we drop nmspinning; as a result nobody will unpark a thread
  1063           .          .   2807:	// to run the goroutine.
  1064           .          .   2808:	// If we discover new work below, we need to restore m.spinning as a signal
  1065           .          .   2809:	// for resetspinning to unpark a new worker thread (because there can be more
  1066           .          .   2810:	// than one starving goroutine). However, if after discovering new work
  1067           .          .   2811:	// we also observe no idle Ps, it is OK to just park the current thread:
  1068           .          .   2812:	// the system is fully loaded so no spinning threads are required.
  1069           .          .   2813:	// Also see "Worker thread parking/unparking" comment at the top of the file.
  1070           .          .   2814:	wasSpinning := _g_.m.spinning
  1071           .          .   2815:	if _g_.m.spinning {
  1072           .          .   2816:		_g_.m.spinning = false
  1073           .          .   2817:		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
  1074           .          .   2818:			throw("findrunnable: negative nmspinning")
  1075           .          .   2819:		}
  1076           .          .   2820:	}
  1077           .          .   2821:
  1078           .          .   2822:	// check all runqueues once again
  1079           .          .   2823:	for id, _p_ := range allpSnapshot {
  1080           .          .   2824:		if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(_p_) {
  1081           .          .   2825:			lock(&sched.lock)
  1082           .          .   2826:			_p_ = pidleget()
  1083           .          .   2827:			unlock(&sched.lock)
  1084           .          .   2828:			if _p_ != nil {
  1085           .          .   2829:				acquirep(_p_)
  1086           .          .   2830:				if wasSpinning {
  1087           .          .   2831:					_g_.m.spinning = true
  1088           .          .   2832:					atomic.Xadd(&sched.nmspinning, 1)
  1089           .          .   2833:				}
  1090           .          .   2834:				goto top
  1091           .          .   2835:			}
  1092           .          .   2836:			break
  1093           .          .   2837:		}
  1094           .          .   2838:	}
  1095           .          .   2839:
  1096           .          .   2840:	// Similar to above, check for timer creation or expiry concurrently with
  1097           .          .   2841:	// transitioning from spinning to non-spinning. Note that we cannot use
  1098           .          .   2842:	// checkTimers here because it calls adjusttimers which may need to allocate
  1099           .          .   2843:	// memory, and that isn't allowed when we don't have an active P.
  1100           .          .   2844:	for id, _p_ := range allpSnapshot {
  1101           .          .   2845:		if timerpMaskSnapshot.read(uint32(id)) {
  1102           .          .   2846:			w := nobarrierWakeTime(_p_)
  1103           .          .   2847:			if w != 0 && (pollUntil == 0 || w < pollUntil) {
  1104           .          .   2848:				pollUntil = w
  1105           .          .   2849:			}
  1106           .          .   2850:		}
  1107           .          .   2851:	}
  1108           .          .   2852:	if pollUntil != 0 {
  1109           .          .   2853:		if now == 0 {
  1110           .          .   2854:			now = nanotime()
  1111           .          .   2855:		}
  1112           .          .   2856:		delta = pollUntil - now
  1113           .          .   2857:		if delta < 0 {
  1114           .          .   2858:			delta = 0
  1115           .          .   2859:		}
  1116           .          .   2860:	}
  1117           .          .   2861:
  1118           .          .   2862:	// Check for idle-priority GC work again.
  1119           .          .   2863:	//
  1120           .          .   2864:	// N.B. Since we have no P, gcBlackenEnabled may change at any time; we
  1121           .          .   2865:	// must check again after acquiring a P.
  1122           .          .   2866:	if atomic.Load(&gcBlackenEnabled) != 0 && gcMarkWorkAvailable(nil) {
  1123           .          .   2867:		// Work is available; we can start an idle GC worker only if
  1124           .          .   2868:		// there is an available P and available worker G.
  1125           .          .   2869:		//
  1126           .          .   2870:		// We can attempt to acquire these in either order. Workers are
  1127           .          .   2871:		// almost always available (see comment in findRunnableGCWorker
  1128           .          .   2872:		// for the one case there may be none). Since we're slightly
  1129           .          .   2873:		// less likely to find a P, check for that first.
  1130           .          .   2874:		lock(&sched.lock)
  1131           .          .   2875:		var node *gcBgMarkWorkerNode
  1132           .          .   2876:		_p_ = pidleget()
  1133           .          .   2877:		if _p_ != nil {
  1134           .          .   2878:			// Now that we own a P, gcBlackenEnabled can't change
  1135           .          .   2879:			// (as it requires STW).
  1136           .          .   2880:			if gcBlackenEnabled != 0 {
  1137           .          .   2881:				node = (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
  1138           .          .   2882:				if node == nil {
  1139           .          .   2883:					pidleput(_p_)
  1140           .          .   2884:					_p_ = nil
  1141           .          .   2885:				}
  1142           .          .   2886:			} else {
  1143           .          .   2887:				pidleput(_p_)
  1144           .          .   2888:				_p_ = nil
  1145           .          .   2889:			}
  1146           .          .   2890:		}
  1147           .          .   2891:		unlock(&sched.lock)
  1148           .          .   2892:		if _p_ != nil {
  1149           .          .   2893:			acquirep(_p_)
  1150           .          .   2894:			if wasSpinning {
  1151           .          .   2895:				_g_.m.spinning = true
  1152           .          .   2896:				atomic.Xadd(&sched.nmspinning, 1)
  1153           .          .   2897:			}
  1154           .          .   2898:
  1155           .          .   2899:			// Run the idle worker.
  1156           .          .   2900:			_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
  1157           .          .   2901:			gp := node.gp.ptr()
  1158           .          .   2902:			casgstatus(gp, _Gwaiting, _Grunnable)
  1159           .          .   2903:			if trace.enabled {
  1160           .          .   2904:				traceGoUnpark(gp, 0)
  1161           .          .   2905:			}
  1162           .          .   2906:			return gp, false
  1163           .          .   2907:		}
  1164           .          .   2908:	}
  1165           .          .   2909:
  1166           .          .   2910:	// poll network
  1167           .          .   2911:	if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
  1168           .          .   2912:		atomic.Store64(&sched.pollUntil, uint64(pollUntil))
  1169           .          .   2913:		if _g_.m.p != 0 {
  1170           .          .   2914:			throw("findrunnable: netpoll with p")
  1171           .          .   2915:		}
  1172           .          .   2916:		if _g_.m.spinning {
  1173           .          .   2917:			throw("findrunnable: netpoll with spinning")
  1174           .          .   2918:		}
  1175           .          .   2919:		if faketime != 0 {
  1176           .          .   2920:			// When using fake time, just poll.
  1177           .          .   2921:			delta = 0
  1178           .          .   2922:		}
  1179           .       10ms   2923:		list := netpoll(delta) // block until new work is available
  1180           .          .   2924:		atomic.Store64(&sched.pollUntil, 0)
  1181           .          .   2925:		atomic.Store64(&sched.lastpoll, uint64(nanotime()))
  1182           .          .   2926:		if faketime != 0 && list.empty() {
  1183           .          .   2927:			// Using fake time and nothing is ready; stop M.
  1184           .          .   2928:			// When all M's stop, checkdead will call timejump.
  1185           .          .   2929:			stopm()
  1186           .          .   2930:			goto top
  1187           .          .   2931:		}
  1188           .          .   2932:		lock(&sched.lock)
  1189           .          .   2933:		_p_ = pidleget()
  1190           .          .   2934:		unlock(&sched.lock)
  1191           .          .   2935:		if _p_ == nil {
  1192           .          .   2936:			injectglist(&list)
  1193           .          .   2937:		} else {
  1194           .          .   2938:			acquirep(_p_)
  1195           .          .   2939:			if !list.empty() {
  1196           .          .   2940:				gp := list.pop()
  1197           .          .   2941:				injectglist(&list)
  1198           .          .   2942:				casgstatus(gp, _Gwaiting, _Grunnable)
  1199           .          .   2943:				if trace.enabled {
  1200           .          .   2944:					traceGoUnpark(gp, 0)
  1201           .          .   2945:				}
  1202           .          .   2946:				return gp, false
  1203           .          .   2947:			}
  1204           .          .   2948:			if wasSpinning {
  1205           .          .   2949:				_g_.m.spinning = true
  1206           .          .   2950:				atomic.Xadd(&sched.nmspinning, 1)
  1207           .          .   2951:			}
  1208           .          .   2952:			goto top
  1209           .          .   2953:		}
  1210           .          .   2954:	} else if pollUntil != 0 && netpollinited() {
  1211           .          .   2955:		pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
  1212           .          .   2956:		if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
  1213           .          .   2957:			netpollBreak()
  1214           .          .   2958:		}
  1215           .          .   2959:	}
  1216           .       40ms   2960:	stopm()
  1217           .          .   2961:	goto top
  1218           .          .   2962:}
  1219           .          .   2963:
  1220           .          .   2964:// pollWork reports whether there is non-background work this P could
  1221           .          .   2965:// be doing. This is a fairly lightweight check to be used for
  1222  ROUTINE ======================== runtime.gcAssistAlloc.func1 in /usr/local/go/src/runtime/mgcmark.go
  1223           0       10ms (flat, cum)  0.48% of Total
  1224           .          .    444:		traceGCMarkAssistStart()
  1225           .          .    445:	}
  1226           .          .    446:
  1227           .          .    447:	// Perform assist work
  1228           .          .    448:	systemstack(func() {
  1229           .       10ms    449:		gcAssistAlloc1(gp, scanWork)
  1230           .          .    450:		// The user stack may have moved, so this can't touch
  1231           .          .    451:		// anything on it until it returns from systemstack.
  1232           .          .    452:	})
  1233           .          .    453:
  1234           .          .    454:	completed := gp.param != nil
  1235  ROUTINE ======================== runtime.gcAssistAlloc1 in /usr/local/go/src/runtime/mgcmark.go
  1236           0       10ms (flat, cum)  0.48% of Total
  1237           .          .    533:	gp.waitreason = waitReasonGCAssistMarking
  1238           .          .    534:
  1239           .          .    535:	// drain own cached work first in the hopes that it
  1240           .          .    536:	// will be more cache friendly.
  1241           .          .    537:	gcw := &getg().m.p.ptr().gcw
  1242           .       10ms    538:	workDone := gcDrainN(gcw, scanWork)
  1243           .          .    539:
  1244           .          .    540:	casgstatus(gp, _Gwaiting, _Grunning)
  1245           .          .    541:
  1246           .          .    542:	// Record that we did this much scan work.
  1247           .          .    543:	//
  1248  ROUTINE ======================== runtime.gcBgMarkWorker in /usr/local/go/src/runtime/mgc.go
  1249           0       30ms (flat, cum)  1.45% of Total
  1250           .          .   1962:		if decnwait == work.nproc {
  1251           .          .   1963:			println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
  1252           .          .   1964:			throw("work.nwait was > work.nproc")
  1253           .          .   1965:		}
  1254           .          .   1966:
  1255           .       30ms   1967:		systemstack(func() {
  1256           .          .   1968:			// Mark our goroutine preemptible so its stack
  1257           .          .   1969:			// can be scanned. This lets two mark workers
  1258           .          .   1970:			// scan each other (otherwise, they would
  1259           .          .   1971:			// deadlock). We must not modify anything on
  1260           .          .   1972:			// the G stack. However, stack shrinking is
  1261  ROUTINE ======================== runtime.gcBgMarkWorker.func1 in /usr/local/go/src/runtime/mgc.go
  1262           0       10ms (flat, cum)  0.48% of Total
  1263           .          .   1929:				// after parking the G.
  1264           .          .   1930:				releasem(mp)
  1265           .          .   1931:			}
  1266           .          .   1932:
  1267           .          .   1933:			// Release this G to the pool.
  1268           .       10ms   1934:			gcBgMarkWorkerPool.push(&node.node)
  1269           .          .   1935:			// Note that at this point, the G may immediately be
  1270           .          .   1936:			// rescheduled and may be running.
  1271           .          .   1937:			return true
  1272           .          .   1938:		}, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
  1273           .          .   1939:
  1274  ROUTINE ======================== runtime.gcBgMarkWorker.func2 in /usr/local/go/src/runtime/mgc.go
  1275           0       50ms (flat, cum)  2.42% of Total
  1276           .          .   1975:			casgstatus(gp, _Grunning, _Gwaiting)
  1277           .          .   1976:			switch pp.gcMarkWorkerMode {
  1278           .          .   1977:			default:
  1279           .          .   1978:				throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
  1280           .          .   1979:			case gcMarkWorkerDedicatedMode:
  1281           .       10ms   1980:				gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
  1282           .          .   1981:				if gp.preempt {
  1283           .          .   1982:					// We were preempted. This is
  1284           .          .   1983:					// a useful signal to kick
  1285           .          .   1984:					// everything out of the run
  1286           .          .   1985:					// queue so it can run
  1287           .          .   1986:					// somewhere else.
  1288           .          .   1987:					lock(&sched.lock)
  1289           .          .   1988:					for {
  1290           .          .   1989:						gp, _ := runqget(pp)
  1291           .          .   1990:						if gp == nil {
  1292           .          .   1991:							break
  1293           .          .   1992:						}
  1294           .          .   1993:						globrunqput(gp)
  1295           .          .   1994:					}
  1296           .          .   1995:					unlock(&sched.lock)
  1297           .          .   1996:				}
  1298           .          .   1997:				// Go back to draining, this time
  1299           .          .   1998:				// without preemption.
  1300           .       30ms   1999:				gcDrain(&pp.gcw, gcDrainFlushBgCredit)
  1301           .          .   2000:			case gcMarkWorkerFractionalMode:
  1302           .          .   2001:				gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
  1303           .          .   2002:			case gcMarkWorkerIdleMode:
  1304           .       10ms   2003:				gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
  1305           .          .   2004:			}
  1306           .          .   2005:			casgstatus(gp, _Gwaiting, _Grunning)
  1307           .          .   2006:		})
  1308           .          .   2007:
  1309           .          .   2008:		// Account for time.
  1310  ROUTINE ======================== runtime.gcDrain in /usr/local/go/src/runtime/mgcmark.go
  1311           0       50ms (flat, cum)  2.42% of Total
  1312           .          .   1009:		for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
  1313           .          .   1010:			job := atomic.Xadd(&work.markrootNext, +1) - 1
  1314           .          .   1011:			if job >= work.markrootJobs {
  1315           .          .   1012:				break
  1316           .          .   1013:			}
  1317           .       10ms   1014:			markroot(gcw, job)
  1318           .          .   1015:			if check != nil && check() {
  1319           .          .   1016:				goto done
  1320           .          .   1017:			}
  1321           .          .   1018:		}
  1322           .          .   1019:	}
  1323           .          .   1020:
  1324           .          .   1021:	// Drain heap marking jobs.
  1325           .          .   1022:	// Stop if we're preemptible or if someone wants to STW.
  1326           .          .   1023:	for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
  1327           .          .   1024:		// Try to keep work available on the global queue. We used to
  1328           .          .   1025:		// check if there were waiting workers, but it's better to
  1329           .          .   1026:		// just keep work available than to make workers wait. In the
  1330           .          .   1027:		// worst case, we'll do O(log(_WorkbufSize)) unnecessary
  1331           .          .   1028:		// balances.
  1332           .          .   1029:		if work.full == 0 {
  1333           .       30ms   1030:			gcw.balance()
  1334           .          .   1031:		}
  1335           .          .   1032:
  1336           .          .   1033:		b := gcw.tryGetFast()
  1337           .          .   1034:		if b == 0 {
  1338           .          .   1035:			b = gcw.tryGet()
  1339           .          .   1036:			if b == 0 {
  1340           .          .   1037:				// Flush the write barrier
  1341           .          .   1038:				// buffer; this may create
  1342           .          .   1039:				// more work.
  1343           .          .   1040:				wbBufFlush(nil, 0)
  1344           .          .   1041:				b = gcw.tryGet()
  1345           .          .   1042:			}
  1346           .          .   1043:		}
  1347           .          .   1044:		if b == 0 {
  1348           .          .   1045:			// Unable to get work.
  1349           .          .   1046:			break
  1350           .          .   1047:		}
  1351           .       10ms   1048:		scanobject(b, gcw)
  1352           .          .   1049:
  1353           .          .   1050:		// Flush background scan work credit to the global
  1354           .          .   1051:		// account if we've accumulated enough locally so
  1355           .          .   1052:		// mutator assists can draw on it.
  1356           .          .   1053:		if gcw.scanWork >= gcCreditSlack {
  1357  ROUTINE ======================== runtime.gcDrainN in /usr/local/go/src/runtime/mgcmark.go
  1358           0       10ms (flat, cum)  0.48% of Total
  1359           .          .   1103:
  1360           .          .   1104:	gp := getg().m.curg
  1361           .          .   1105:	for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
  1362           .          .   1106:		// See gcDrain comment.
  1363           .          .   1107:		if work.full == 0 {
  1364           .       10ms   1108:			gcw.balance()
  1365           .          .   1109:		}
  1366           .          .   1110:
  1367           .          .   1111:		// This might be a good place to add prefetch code...
  1368           .          .   1112:		// if(wbuf.nobj > 4) {
  1369           .          .   1113:		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
  1370  ROUTINE ======================== runtime.gcStart.func2 in /usr/local/go/src/runtime/mgc.go
  1371           0       70ms (flat, cum)  3.38% of Total
  1372           .          .   1438:	// returns, so make sure we're not preemptible.
  1373           .          .   1439:	mp = acquirem()
  1374           .          .   1440:
  1375           .          .   1441:	// Concurrent mark.
  1376           .          .   1442:	systemstack(func() {
  1377           .       70ms   1443:		now = startTheWorldWithSema(trace.enabled)
  1378           .          .   1444:		work.pauseNS += now - work.pauseStart
  1379           .          .   1445:		work.tMark = now
  1380           .          .   1446:		memstats.gcPauseDist.record(now - work.pauseStart)
  1381           .          .   1447:	})
  1382           .          .   1448:
  1383  ROUTINE ======================== runtime.gentraceback in /usr/local/go/src/runtime/traceback.go
  1384           0       10ms (flat, cum)  0.48% of Total
  1385           .          .    317:				frame.continpc = 0
  1386           .          .    318:			}
  1387           .          .    319:		}
  1388           .          .    320:
  1389           .          .    321:		if callback != nil {
  1390           .       10ms    322:			if !callback((*stkframe)(noescape(unsafe.Pointer(&frame))), v) {
  1391           .          .    323:				return n
  1392           .          .    324:			}
  1393           .          .    325:		}
  1394           .          .    326:
  1395           .          .    327:		if pcbuf != nil {
  1396  ROUTINE ======================== runtime.handoff in /usr/local/go/src/runtime/mgcwork.go
  1397           0       10ms (flat, cum)  0.48% of Total
  1398           .          .    431:	// Make new buffer with half of b's pointers.
  1399           .          .    432:	b1 := getempty()
  1400           .          .    433:	n := b.nobj / 2
  1401           .          .    434:	b.nobj -= n
  1402           .          .    435:	b1.nobj = n
  1403           .       10ms    436:	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
  1404           .          .    437:
  1405           .          .    438:	// Put b on full list - let first half of b get stolen.
  1406           .          .    439:	putfull(b)
  1407           .          .    440:	return b1
  1408           .          .    441:}
  1409  ROUTINE ======================== runtime.heapBits.initSpan in /usr/local/go/src/runtime/mbitmap.go
  1410           0       10ms (flat, cum)  0.48% of Total
  1411           .          .    760:			for i := uintptr(0); i < nbyte; i++ {
  1412           .          .    761:				*bitp = bitPointerAll | bitScanAll
  1413           .          .    762:				bitp = add1(bitp)
  1414           .          .    763:			}
  1415           .          .    764:		} else {
  1416           .       10ms    765:			memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte)
  1417           .          .    766:		}
  1418           .          .    767:		h = hNext
  1419           .          .    768:		nw -= anw
  1420           .          .    769:	}
  1421           .          .    770:}
  1422  ROUTINE ======================== runtime.heapBitsForAddr in /usr/local/go/src/runtime/mbitmap.go
  1423        20ms       20ms (flat, cum)  0.97% of Total
  1424           .          .    306:// nosplit because it is used during write barriers and must not be preempted.
  1425           .          .    307://go:nosplit
  1426           .          .    308:func heapBitsForAddr(addr uintptr) (h heapBits) {
  1427           .          .    309:	// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
  1428           .          .    310:	arena := arenaIndex(addr)
  1429        10ms       10ms    311:	ha := mheap_.arenas[arena.l1()][arena.l2()]
  1430           .          .    312:	// The compiler uses a load for nil checking ha, but in this
  1431           .          .    313:	// case we'll almost never hit that cache line again, so it
  1432           .          .    314:	// makes more sense to do a value check.
  1433           .          .    315:	if ha == nil {
  1434           .          .    316:		// addr is not in the heap. Return nil heapBits, which
  1435           .          .    317:		// we expect to crash in the caller.
  1436           .          .    318:		return
  1437           .          .    319:	}
  1438           .          .    320:	h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes]
  1439        10ms       10ms    321:	h.shift = uint32((addr / sys.PtrSize) & 3)
  1440           .          .    322:	h.arena = uint32(arena)
  1441           .          .    323:	h.last = &ha.bitmap[len(ha.bitmap)-1]
  1442           .          .    324:	return
  1443           .          .    325:}
  1444           .          .    326:
  1445  ROUTINE ======================== runtime.heapBitsSetType in /usr/local/go/src/runtime/mbitmap.go
  1446        40ms       60ms (flat, cum)  2.90% of Total
  1447           .          .    844:			}
  1448           .          .    845:		}
  1449           .          .    846:		return
  1450           .          .    847:	}
  1451           .          .    848:
  1452           .       20ms    849:	h := heapBitsForAddr(x)
  1453           .          .    850:	ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
  1454           .          .    851:
  1455           .          .    852:	// 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits.
  1456           .          .    853:	// Therefore, these objects share a heap bitmap byte with the objects next to them.
  1457           .          .    854:	// These are called out as a special case primarily so the code below can assume all
  1458           .          .    855:	// objects are at least 4 words long and that their bitmaps start either at the beginning
  1459           .          .    856:	// of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).
  1460           .          .    857:
  1461           .          .    858:	if size == 2*sys.PtrSize {
  1462           .          .    859:		if typ.size == sys.PtrSize {
  1463           .          .    860:			// We're allocating a block big enough to hold two pointers.
  1464           .          .    861:			// On 64-bit, that means the actual object must be two pointers,
  1465           .          .    862:			// or else we'd have used the one-pointer-sized block.
  1466           .          .    863:			// On 32-bit, however, this is the 8-byte block, the smallest one.
  1467           .          .    864:			// So it could be that we're allocating one pointer and this was
  1468           .          .    865:			// just the smallest block available. Distinguish by checking dataSize.
  1469           .          .    866:			// (In general the number of instances of typ being allocated is
  1470           .          .    867:			// dataSize/typ.size.)
  1471           .          .    868:			if sys.PtrSize == 4 && dataSize == sys.PtrSize {
  1472           .          .    869:				// 1 pointer object. On 32-bit machines clear the bit for the
  1473           .          .    870:				// unused second word.
  1474           .          .    871:				*h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
  1475           .          .    872:				*h.bitp |= (bitPointer | bitScan) << h.shift
  1476           .          .    873:			} else {
  1477           .          .    874:				// 2-element array of pointer.
  1478           .          .    875:				*h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
  1479           .          .    876:			}
  1480           .          .    877:			return
  1481           .          .    878:		}
  1482           .          .    879:		// Otherwise typ.size must be 2*sys.PtrSize,
  1483           .          .    880:		// and typ.kind&kindGCProg == 0.
  1484           .          .    881:		if doubleCheck {
  1485           .          .    882:			if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
  1486           .          .    883:				print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
  1487           .          .    884:				throw("heapBitsSetType")
  1488           .          .    885:			}
  1489           .          .    886:		}
  1490           .          .    887:		b := uint32(*ptrmask)
  1491           .          .    888:		hb := b & 3
  1492           .          .    889:		hb |= bitScanAll & ((bitScan << (typ.ptrdata / sys.PtrSize)) - 1)
  1493           .          .    890:		// Clear the bits for this object so we can set the
  1494           .          .    891:		// appropriate ones.
  1495           .          .    892:		*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
  1496           .          .    893:		*h.bitp |= uint8(hb << h.shift)
  1497           .          .    894:		return
  1498        10ms       10ms    895:	} else if size == 3*sys.PtrSize {
  1499           .          .    896:		b := uint8(*ptrmask)
  1500           .          .    897:		if doubleCheck {
  1501           .          .    898:			if b == 0 {
  1502           .          .    899:				println("runtime: invalid type ", typ.string())
  1503           .          .    900:				throw("heapBitsSetType: called with non-pointer type")
  1504           .          .    901:			}
  1505           .          .    902:			if sys.PtrSize != 8 {
  1506           .          .    903:				throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")
  1507           .          .    904:			}
  1508           .          .    905:			if typ.kind&kindGCProg != 0 {
  1509           .          .    906:				throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")
  1510           .          .    907:			}
  1511           .          .    908:			if typ.size == 2*sys.PtrSize {
  1512           .          .    909:				print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, "\n")
  1513           .          .    910:				throw("heapBitsSetType: inconsistent object sizes")
  1514           .          .    911:			}
  1515           .          .    912:		}
  1516           .          .    913:		if typ.size == sys.PtrSize {
  1517           .          .    914:			// The type contains a pointer otherwise heapBitsSetType wouldn't have been called.
  1518           .          .    915:			// Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.
  1519           .          .    916:			if doubleCheck && *typ.gcdata != 1 {
  1520           .          .    917:				print("runtime: heapBitsSetType size=", size, " typ.size=", typ.size, "but *typ.gcdata", *typ.gcdata, "\n")
  1521           .          .    918:				throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class")
  1522           .          .    919:			}
  1523           .          .    920:			// 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111.
  1524           .          .    921:			b = 7
  1525           .          .    922:		}
  1526           .          .    923:
  1527           .          .    924:		hb := b & 7
  1528           .          .    925:		// Set bitScan bits for all pointers.
  1529           .          .    926:		hb |= hb << wordsPerBitmapByte
  1530           .          .    927:		// First bitScan bit is always set since the type contains pointers.
  1531           .          .    928:		hb |= bitScan
  1532           .          .    929:		// Second bitScan bit needs to also be set if the third bitScan bit is set.
  1533           .          .    930:		hb |= hb & (bitScan << (2 * heapBitsShift)) >> 1
  1534           .          .    931:
  1535           .          .    932:		// For h.shift > 1 heap bits cross a byte boundary and need to be written part
  1536           .          .    933:		// to h.bitp and part to the next h.bitp.
  1537           .          .    934:		switch h.shift {
  1538           .          .    935:		case 0:
  1539        20ms       20ms    936:			*h.bitp &^= mask3 << 0
  1540        10ms       10ms    937:			*h.bitp |= hb << 0
  1541           .          .    938:		case 1:
  1542           .          .    939:			*h.bitp &^= mask3 << 1
  1543           .          .    940:			*h.bitp |= hb << 1
  1544           .          .    941:		case 2:
  1545           .          .    942:			*h.bitp &^= mask2 << 2
  1546  ROUTINE ======================== runtime.kevent in /usr/local/go/src/runtime/sys_darwin.go
  1547        90ms       90ms (flat, cum)  4.35% of Total
  1548           .          .    344:func kqueue_trampoline()
  1549           .          .    345:
  1550           .          .    346://go:nosplit
  1551           .          .    347://go:cgo_unsafe_args
  1552           .          .    348:func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32 {
  1553        90ms       90ms    349:	return libcCall(unsafe.Pointer(funcPC(kevent_trampoline)), unsafe.Pointer(&kq))
  1554           .          .    350:}
  1555           .          .    351:func kevent_trampoline()
  1556           .          .    352:
  1557           .          .    353://go:nosplit
  1558           .          .    354://go:cgo_unsafe_args
  1559  ROUTINE ======================== runtime.mPark in /usr/local/go/src/runtime/proc.go
  1560           0       40ms (flat, cum)  1.93% of Total
  1561           .          .   1335:// only way that m's should park themselves.
  1562           .          .   1336://go:nosplit
  1563           .          .   1337:func mPark() {
  1564           .          .   1338:	g := getg()
  1565           .          .   1339:	for {
  1566           .       40ms   1340:		notesleep(&g.m.park)
  1567           .          .   1341:		// Note, because of signal handling by this parked m,
  1568           .          .   1342:		// a preemptive mDoFixup() may actually occur via
  1569           .          .   1343:		// mDoFixupAndOSYield(). (See golang.org/issue/44193)
  1570           .          .   1344:		noteclear(&g.m.park)
  1571           .          .   1345:		if !mDoFixup() {
  1572  ROUTINE ======================== runtime.madvise in /usr/local/go/src/runtime/sys_darwin.go
  1573       120ms      120ms (flat, cum)  5.80% of Total
  1574           .          .    176:func munmap_trampoline()
  1575           .          .    177:
  1576           .          .    178://go:nosplit
  1577           .          .    179://go:cgo_unsafe_args
  1578           .          .    180:func madvise(addr unsafe.Pointer, n uintptr, flags int32) {
  1579       120ms      120ms    181:	libcCall(unsafe.Pointer(funcPC(madvise_trampoline)), unsafe.Pointer(&addr))
  1580           .          .    182:}
  1581           .          .    183:func madvise_trampoline()
  1582           .          .    184:
  1583           .          .    185://go:nosplit
  1584           .          .    186://go:cgo_unsafe_args
  1585  ROUTINE ======================== runtime.mallocgc in /usr/local/go/src/runtime/malloc.go
  1586       120ms      310ms (flat, cum) 14.98% of Total
  1587           .          .    900:}
  1588           .          .    901:
  1589           .          .    902:// Allocate an object of size bytes.
  1590           .          .    903:// Small objects are allocated from the per-P cache's free lists.
  1591           .          .    904:// Large objects (> 32 kB) are allocated straight from the heap.
  1592        20ms       20ms    905:func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
  1593           .          .    906:	if gcphase == _GCmarktermination {
  1594           .          .    907:		throw("mallocgc called with gcphase == _GCmarktermination")
  1595           .          .    908:	}
  1596           .          .    909:
  1597           .          .    910:	if size == 0 {
  1598           .          .    911:		return unsafe.Pointer(&zerobase)
  1599           .          .    912:	}
  1600           .          .    913:
  1601           .          .    914:	if debug.malloc {
  1602           .          .    915:		if debug.sbrk != 0 {
  1603           .          .    916:			align := uintptr(16)
  1604           .          .    917:			if typ != nil {
  1605           .          .    918:				// TODO(austin): This should be just
  1606           .          .    919:				//   align = uintptr(typ.align)
  1607           .          .    920:				// but that's only 4 on 32-bit platforms,
  1608           .          .    921:				// even if there's a uint64 field in typ (see #599).
  1609           .          .    922:				// This causes 64-bit atomic accesses to panic.
  1610           .          .    923:				// Hence, we use stricter alignment that matches
  1611           .          .    924:				// the normal allocator better.
  1612           .          .    925:				if size&7 == 0 {
  1613           .          .    926:					align = 8
  1614           .          .    927:				} else if size&3 == 0 {
  1615           .          .    928:					align = 4
  1616           .          .    929:				} else if size&1 == 0 {
  1617           .          .    930:					align = 2
  1618           .          .    931:				} else {
  1619           .          .    932:					align = 1
  1620           .          .    933:				}
  1621           .          .    934:			}
  1622           .          .    935:			return persistentalloc(size, align, &memstats.other_sys)
  1623           .          .    936:		}
  1624           .          .    937:
  1625           .          .    938:		if inittrace.active && inittrace.id == getg().goid {
  1626           .          .    939:			// Init functions are executed sequentially in a single Go routine.
  1627           .          .    940:			inittrace.allocs += 1
  1628           .          .    941:		}
  1629           .          .    942:	}
  1630           .          .    943:
  1631           .          .    944:	// assistG is the G to charge for this allocation, or nil if
  1632           .          .    945:	// GC is not currently active.
  1633           .          .    946:	var assistG *g
  1634        10ms       10ms    947:	if gcBlackenEnabled != 0 {
  1635           .          .    948:		// Charge the current user G for this allocation.
  1636           .          .    949:		assistG = getg()
  1637           .          .    950:		if assistG.m.curg != nil {
  1638           .          .    951:			assistG = assistG.m.curg
  1639           .          .    952:		}
  1640           .          .    953:		// Charge the allocation against the G. We'll account
  1641           .          .    954:		// for internal fragmentation at the end of mallocgc.
  1642           .          .    955:		assistG.gcAssistBytes -= int64(size)
  1643           .          .    956:
  1644           .          .    957:		if assistG.gcAssistBytes < 0 {
  1645           .          .    958:			// This G is in debt. Assist the GC to correct
  1646           .          .    959:			// this before allocating. This must happen
  1647           .          .    960:			// before disabling preemption.
  1648           .          .    961:			gcAssistAlloc(assistG)
  1649           .          .    962:		}
  1650           .          .    963:	}
  1651           .          .    964:
  1652           .          .    965:	// Set mp.mallocing to keep from being preempted by GC.
  1653           .          .    966:	mp := acquirem()
  1654        10ms       10ms    967:	if mp.mallocing != 0 {
  1655           .          .    968:		throw("malloc deadlock")
  1656           .          .    969:	}
  1657           .          .    970:	if mp.gsignal == getg() {
  1658           .          .    971:		throw("malloc during signal")
  1659           .          .    972:	}
  1660           .          .    973:	mp.mallocing = 1
  1661           .          .    974:
  1662           .          .    975:	shouldhelpgc := false
  1663           .          .    976:	dataSize := size
  1664           .          .    977:	c := getMCache()
  1665           .          .    978:	if c == nil {
  1666           .          .    979:		throw("mallocgc called without a P or outside bootstrapping")
  1667           .          .    980:	}
  1668           .          .    981:	var span *mspan
  1669           .          .    982:	var x unsafe.Pointer
  1670           .          .    983:	noscan := typ == nil || typ.ptrdata == 0
  1671           .          .    984:	if size <= maxSmallSize {
  1672           .          .    985:		if noscan && size < maxTinySize {
  1673           .          .    986:			// Tiny allocator.
  1674           .          .    987:			//
  1675           .          .    988:			// Tiny allocator combines several tiny allocation requests
  1676           .          .    989:			// into a single memory block. The resulting memory block
  1677           .          .    990:			// is freed when all subobjects are unreachable. The subobjects
  1678           .          .    991:			// must be noscan (don't have pointers), this ensures that
  1679           .          .    992:			// the amount of potentially wasted memory is bounded.
  1680           .          .    993:			//
  1681           .          .    994:			// Size of the memory block used for combining (maxTinySize) is tunable.
  1682           .          .    995:			// Current setting is 16 bytes, which relates to 2x worst case memory
  1683           .          .    996:			// wastage (when all but one subobjects are unreachable).
  1684           .          .    997:			// 8 bytes would result in no wastage at all, but provides less
  1685           .          .    998:			// opportunities for combining.
  1686           .          .    999:			// 32 bytes provides more opportunities for combining,
  1687           .          .   1000:			// but can lead to 4x worst case wastage.
  1688           .          .   1001:			// The best case winning is 8x regardless of block size.
  1689           .          .   1002:			//
  1690           .          .   1003:			// Objects obtained from tiny allocator must not be freed explicitly.
  1691           .          .   1004:			// So when an object will be freed explicitly, we ensure that
  1692           .          .   1005:			// its size >= maxTinySize.
  1693           .          .   1006:			//
  1694           .          .   1007:			// SetFinalizer has a special case for objects potentially coming
  1695           .          .   1008:			// from tiny allocator, it such case it allows to set finalizers
  1696           .          .   1009:			// for an inner byte of a memory block.
  1697           .          .   1010:			//
  1698           .          .   1011:			// The main targets of tiny allocator are small strings and
  1699           .          .   1012:			// standalone escaping variables. On a json benchmark
  1700           .          .   1013:			// the allocator reduces number of allocations by ~12% and
  1701           .          .   1014:			// reduces heap size by ~20%.
  1702           .          .   1015:			off := c.tinyoffset
  1703           .          .   1016:			// Align tiny pointer for required (conservative) alignment.
  1704           .          .   1017:			if size&7 == 0 {
  1705           .          .   1018:				off = alignUp(off, 8)
  1706           .          .   1019:			} else if sys.PtrSize == 4 && size == 12 {
  1707           .          .   1020:				// Conservatively align 12-byte objects to 8 bytes on 32-bit
  1708           .          .   1021:				// systems so that objects whose first field is a 64-bit
  1709           .          .   1022:				// value is aligned to 8 bytes and does not cause a fault on
  1710           .          .   1023:				// atomic access. See issue 37262.
  1711           .          .   1024:				// TODO(mknyszek): Remove this workaround if/when issue 36606
  1712           .          .   1025:				// is resolved.
  1713           .          .   1026:				off = alignUp(off, 8)
  1714           .          .   1027:			} else if size&3 == 0 {
  1715           .          .   1028:				off = alignUp(off, 4)
  1716           .          .   1029:			} else if size&1 == 0 {
  1717           .          .   1030:				off = alignUp(off, 2)
  1718           .          .   1031:			}
  1719           .          .   1032:			if off+size <= maxTinySize && c.tiny != 0 {
  1720           .          .   1033:				// The object fits into existing tiny block.
  1721           .          .   1034:				x = unsafe.Pointer(c.tiny + off)
  1722           .          .   1035:				c.tinyoffset = off + size
  1723           .          .   1036:				c.tinyAllocs++
  1724           .          .   1037:				mp.mallocing = 0
  1725           .          .   1038:				releasem(mp)
  1726           .          .   1039:				return x
  1727           .          .   1040:			}
  1728           .          .   1041:			// Allocate a new maxTinySize block.
  1729           .          .   1042:			span = c.alloc[tinySpanClass]
  1730           .          .   1043:			v := nextFreeFast(span)
  1731           .          .   1044:			if v == 0 {
  1732           .          .   1045:				v, span, shouldhelpgc = c.nextFree(tinySpanClass)
  1733           .          .   1046:			}
  1734           .          .   1047:			x = unsafe.Pointer(v)
  1735        10ms       10ms   1048:			(*[2]uint64)(x)[0] = 0
  1736           .          .   1049:			(*[2]uint64)(x)[1] = 0
  1737           .          .   1050:			// See if we need to replace the existing tiny block with the new one
  1738           .          .   1051:			// based on amount of remaining free space.
  1739           .          .   1052:			if size < c.tinyoffset || c.tiny == 0 {
  1740           .          .   1053:				c.tiny = uintptr(x)
  1741           .          .   1054:				c.tinyoffset = size
  1742           .          .   1055:			}
  1743           .          .   1056:			size = maxTinySize
  1744           .          .   1057:		} else {
  1745           .          .   1058:			var sizeclass uint8
  1746           .          .   1059:			if size <= smallSizeMax-8 {
  1747        20ms       20ms   1060:				sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
  1748           .          .   1061:			} else {
  1749           .          .   1062:				sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
  1750           .          .   1063:			}
  1751        10ms       10ms   1064:			size = uintptr(class_to_size[sizeclass])
  1752           .          .   1065:			spc := makeSpanClass(sizeclass, noscan)
  1753           .          .   1066:			span = c.alloc[spc]
  1754           .       30ms   1067:			v := nextFreeFast(span)
  1755           .          .   1068:			if v == 0 {
  1756           .       80ms   1069:				v, span, shouldhelpgc = c.nextFree(spc)
  1757           .          .   1070:			}
  1758           .          .   1071:			x = unsafe.Pointer(v)
  1759           .          .   1072:			if needzero && span.needzero != 0 {
  1760           .          .   1073:				memclrNoHeapPointers(unsafe.Pointer(v), size)
  1761           .          .   1074:			}
  1762           .          .   1075:		}
  1763           .          .   1076:	} else {
  1764           .          .   1077:		shouldhelpgc = true
  1765           .          .   1078:		span = c.allocLarge(size, needzero, noscan)
  1766           .          .   1079:		span.freeindex = 1
  1767           .          .   1080:		span.allocCount = 1
  1768           .          .   1081:		x = unsafe.Pointer(span.base())
  1769           .          .   1082:		size = span.elemsize
  1770           .          .   1083:	}
  1771           .          .   1084:
  1772           .          .   1085:	var scanSize uintptr
  1773           .          .   1086:	if !noscan {
  1774           .          .   1087:		// If allocating a defer+arg block, now that we've picked a malloc size
  1775           .          .   1088:		// large enough to hold everything, cut the "asked for" size down to
  1776           .          .   1089:		// just the defer header, so that the GC bitmap will record the arg block
  1777           .          .   1090:		// as containing nothing at all (as if it were unused space at the end of
  1778           .          .   1091:		// a malloc block caused by size rounding).
  1779           .          .   1092:		// The defer arg areas are scanned as part of scanstack.
  1780           .          .   1093:		if typ == deferType {
  1781           .          .   1094:			dataSize = unsafe.Sizeof(_defer{})
  1782           .          .   1095:		}
  1783        10ms       70ms   1096:		heapBitsSetType(uintptr(x), size, dataSize, typ)
  1784           .          .   1097:		if dataSize > typ.size {
  1785           .          .   1098:			// Array allocation. If there are any
  1786           .          .   1099:			// pointers, GC has to scan to the last
  1787           .          .   1100:			// element.
  1788           .          .   1101:			if typ.ptrdata != 0 {
  1789           .          .   1102:				scanSize = dataSize - typ.size + typ.ptrdata
  1790           .          .   1103:			}
  1791           .          .   1104:		} else {
  1792           .          .   1105:			scanSize = typ.ptrdata
  1793           .          .   1106:		}
  1794           .          .   1107:		c.scanAlloc += scanSize
  1795           .          .   1108:	}
  1796           .          .   1109:
  1797           .          .   1110:	// Ensure that the stores above that initialize x to
  1798           .          .   1111:	// type-safe memory and set the heap bits occur before
  1799           .          .   1112:	// the caller can make x observable to the garbage
  1800           .          .   1113:	// collector. Otherwise, on weakly ordered machines,
  1801           .          .   1114:	// the garbage collector could follow a pointer to x,
  1802           .          .   1115:	// but see uninitialized memory or stale heap bits.
  1803           .          .   1116:	publicationBarrier()
  1804           .          .   1117:
  1805           .          .   1118:	// Allocate black during GC.
  1806           .          .   1119:	// All slots hold nil so no scanning is needed.
  1807           .          .   1120:	// This may be racing with GC so do it atomically if there can be
  1808           .          .   1121:	// a race marking the bit.
  1809        10ms       10ms   1122:	if gcphase != _GCoff {
  1810           .          .   1123:		gcmarknewobject(span, uintptr(x), size, scanSize)
  1811           .          .   1124:	}
  1812           .          .   1125:
  1813           .          .   1126:	if raceenabled {
  1814           .          .   1127:		racemalloc(x, size)
  1815           .          .   1128:	}
  1816           .          .   1129:
  1817           .          .   1130:	if msanenabled {
  1818           .          .   1131:		msanmalloc(x, size)
  1819           .          .   1132:	}
  1820           .          .   1133:
  1821           .          .   1134:	mp.mallocing = 0
  1822           .       20ms   1135:	releasem(mp)
  1823           .          .   1136:
  1824           .          .   1137:	if debug.malloc {
  1825           .          .   1138:		if debug.allocfreetrace != 0 {
  1826           .          .   1139:			tracealloc(x, size, typ)
  1827           .          .   1140:		}
  1828           .          .   1141:
  1829           .          .   1142:		if inittrace.active && inittrace.id == getg().goid {
  1830           .          .   1143:			// Init functions are executed sequentially in a single Go routine.
  1831           .          .   1144:			inittrace.bytes += uint64(size)
  1832           .          .   1145:		}
  1833           .          .   1146:	}
  1834           .          .   1147:
  1835           .          .   1148:	if rate := MemProfileRate; rate > 0 {
  1836        20ms       20ms   1149:		if rate != 1 && size < c.nextSample {
  1837           .          .   1150:			c.nextSample -= size
  1838           .          .   1151:		} else {
  1839           .          .   1152:			mp := acquirem()
  1840           .          .   1153:			profilealloc(mp, x, size)
  1841           .          .   1154:			releasem(mp)
  1842  ROUTINE ======================== runtime.mapaccess1_faststr in /usr/local/go/src/runtime/map_faststr.go
  1843        20ms       20ms (flat, cum)  0.97% of Total
  1844           .          .     42:		}
  1845           .          .     43:		// long key, try not to do more comparisons than necessary
  1846           .          .     44:		keymaybe := uintptr(bucketCnt)
  1847           .          .     45:		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
  1848           .          .     46:			k := (*stringStruct)(kptr)
  1849        10ms       10ms     47:			if k.len != key.len || isEmpty(b.tophash[i]) {
  1850           .          .     48:				if b.tophash[i] == emptyRest {
  1851           .          .     49:					break
  1852           .          .     50:				}
  1853           .          .     51:				continue
  1854           .          .     52:			}
  1855           .          .     53:			if k.str == key.str {
  1856           .          .     54:				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
  1857           .          .     55:			}
  1858           .          .     56:			// check first 4 bytes
  1859           .          .     57:			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
  1860           .          .     58:				continue
  1861           .          .     59:			}
  1862           .          .     60:			// check last 4 bytes
  1863           .          .     61:			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
  1864           .          .     62:				continue
  1865           .          .     63:			}
  1866           .          .     64:			if keymaybe != bucketCnt {
  1867           .          .     65:				// Two keys are potential matches. Use hash to distinguish them.
  1868           .          .     66:				goto dohash
  1869           .          .     67:			}
  1870           .          .     68:			keymaybe = i
  1871           .          .     69:		}
  1872           .          .     70:		if keymaybe != bucketCnt {
  1873           .          .     71:			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
  1874        10ms       10ms     72:			if memequal(k.str, key.str, uintptr(key.len)) {
  1875           .          .     73:				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize))
  1876           .          .     74:			}
  1877           .          .     75:		}
  1878           .          .     76:		return unsafe.Pointer(&zeroVal[0])
  1879           .          .     77:	}
  1880  ROUTINE ======================== runtime.mapaccess2_faststr in /usr/local/go/src/runtime/map_faststr.go
  1881       120ms      280ms (flat, cum) 13.53% of Total
  1882           .          .    102:		}
  1883           .          .    103:	}
  1884           .          .    104:	return unsafe.Pointer(&zeroVal[0])
  1885           .          .    105:}
  1886           .          .    106:
  1887        10ms       10ms    107:func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
  1888           .          .    108:	if raceenabled && h != nil {
  1889           .          .    109:		callerpc := getcallerpc()
  1890           .          .    110:		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
  1891           .          .    111:	}
  1892           .          .    112:	if h == nil || h.count == 0 {
  1893           .          .    113:		return unsafe.Pointer(&zeroVal[0]), false
  1894           .          .    114:	}
  1895           .          .    115:	if h.flags&hashWriting != 0 {
  1896           .          .    116:		throw("concurrent map read and map write")
  1897           .          .    117:	}
  1898           .          .    118:	key := stringStructOf(&ky)
  1899        10ms       10ms    119:	if h.B == 0 {
  1900           .          .    120:		// One-bucket table.
  1901           .          .    121:		b := (*bmap)(h.buckets)
  1902           .          .    122:		if key.len < 32 {
  1903           .          .    123:			// short key, doing lots of comparisons is ok
  1904           .          .    124:			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
  1905           .          .    125:				k := (*stringStruct)(kptr)
  1906           .          .    126:				if k.len != key.len || isEmpty(b.tophash[i]) {
  1907           .          .    127:					if b.tophash[i] == emptyRest {
  1908           .          .    128:						break
  1909           .          .    129:					}
  1910           .          .    130:					continue
  1911           .          .    131:				}
  1912           .          .    132:				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
  1913           .          .    133:					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true
  1914           .          .    134:				}
  1915           .          .    135:			}
  1916           .          .    136:			return unsafe.Pointer(&zeroVal[0]), false
  1917           .          .    137:		}
  1918           .          .    138:		// long key, try not to do more comparisons than necessary
  1919           .          .    139:		keymaybe := uintptr(bucketCnt)
  1920           .          .    140:		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
  1921           .          .    141:			k := (*stringStruct)(kptr)
  1922           .          .    142:			if k.len != key.len || isEmpty(b.tophash[i]) {
  1923           .          .    143:				if b.tophash[i] == emptyRest {
  1924           .          .    144:					break
  1925           .          .    145:				}
  1926           .          .    146:				continue
  1927           .          .    147:			}
  1928           .          .    148:			if k.str == key.str {
  1929           .          .    149:				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true
  1930           .          .    150:			}
  1931           .          .    151:			// check first 4 bytes
  1932           .          .    152:			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
  1933           .          .    153:				continue
  1934           .          .    154:			}
  1935           .          .    155:			// check last 4 bytes
  1936           .          .    156:			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
  1937           .          .    157:				continue
  1938           .          .    158:			}
  1939           .          .    159:			if keymaybe != bucketCnt {
  1940           .          .    160:				// Two keys are potential matches. Use hash to distinguish them.
  1941           .          .    161:				goto dohash
  1942           .          .    162:			}
  1943           .          .    163:			keymaybe = i
  1944           .          .    164:		}
  1945           .          .    165:		if keymaybe != bucketCnt {
  1946           .          .    166:			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
  1947           .          .    167:			if memequal(k.str, key.str, uintptr(key.len)) {
  1948           .          .    168:				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize)), true
  1949           .          .    169:			}
  1950           .          .    170:		}
  1951           .          .    171:		return unsafe.Pointer(&zeroVal[0]), false
  1952           .          .    172:	}
  1953           .          .    173:dohash:
  1954        10ms       90ms    174:	hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
  1955        20ms       40ms    175:	m := bucketMask(h.B)
  1956           .          .    176:	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
  1957           .          .    177:	if c := h.oldbuckets; c != nil {
  1958           .          .    178:		if !h.sameSizeGrow() {
  1959           .          .    179:			// There used to be half as many buckets; mask down one more power of two.
  1960           .          .    180:			m >>= 1
  1961           .          .    181:		}
  1962           .          .    182:		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
  1963           .          .    183:		if !evacuated(oldb) {
  1964           .          .    184:			b = oldb
  1965           .          .    185:		}
  1966           .          .    186:	}
  1967           .          .    187:	top := tophash(hash)
  1968           .          .    188:	for ; b != nil; b = b.overflow(t) {
  1969           .          .    189:		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
  1970           .          .    190:			k := (*stringStruct)(kptr)
  1971        50ms       50ms    191:			if k.len != key.len || b.tophash[i] != top {
  1972           .          .    192:				continue
  1973           .          .    193:			}
  1974        10ms       60ms    194:			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
  1975        10ms       20ms    195:				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true
  1976           .          .    196:			}
  1977           .          .    197:		}
  1978           .          .    198:	}
  1979           .          .    199:	return unsafe.Pointer(&zeroVal[0]), false
  1980           .          .    200:}
  1981  ROUTINE ======================== runtime.markroot in /usr/local/go/src/runtime/mgcmark.go
  1982           0       10ms (flat, cum)  0.48% of Total
  1983           .          .    201:			gp.waitsince = work.tstart
  1984           .          .    202:		}
  1985           .          .    203:
  1986           .          .    204:		// scanstack must be done on the system stack in case
  1987           .          .    205:		// we're trying to scan our own stack.
  1988           .       10ms    206:		systemstack(func() {
  1989           .          .    207:			// If this is a self-scan, put the user G in
  1990           .          .    208:			// _Gwaiting to prevent self-deadlock. It may
  1991           .          .    209:			// already be in _Gwaiting if this is a mark
  1992           .          .    210:			// worker or we're in mark termination.
  1993           .          .    211:			userG := getg().m.curg
  1994  ROUTINE ======================== runtime.markroot.func1 in /usr/local/go/src/runtime/mgcmark.go
  1995           0       10ms (flat, cum)  0.48% of Total
  1996           .          .    228:				return
  1997           .          .    229:			}
  1998           .          .    230:			if gp.gcscandone {
  1999           .          .    231:				throw("g already scanned")
  2000           .          .    232:			}
  2001           .       10ms    233:			scanstack(gp, gcw)
  2002           .          .    234:			gp.gcscandone = true
  2003           .          .    235:			resumeG(stopped)
  2004           .          .    236:
  2005           .          .    237:			if selfScan {
  2006           .          .    238:				casgstatus(userG, _Gwaiting, _Grunning)
  2007  ROUTINE ======================== runtime.mcall in /usr/local/go/src/runtime/asm_amd64.s
  2008           0       70ms (flat, cum)  3.38% of Total
  2009           .          .    322:	MOVQ	SI, g(CX)	// g = m->g0
  2010           .          .    323:	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
  2011           .          .    324:	PUSHQ	AX
  2012           .          .    325:	MOVQ	DI, DX
  2013           .          .    326:	MOVQ	0(DI), DI
  2014           .       70ms    327:	CALL	DI
  2015           .          .    328:	POPQ	AX
  2016           .          .    329:	MOVQ	$runtime·badmcall2(SB), AX
  2017           .          .    330:	JMP	AX
  2018           .          .    331:	RET
  2019           .          .    332:
  2020  ROUTINE ======================== runtime.memclrNoHeapPointers in /usr/local/go/src/runtime/memclr_amd64.s
  2021        50ms       50ms (flat, cum)  2.42% of Total
  2022           .          .     34:	PXOR	X0, X0
  2023           .          .     35:	CMPQ	BX, $32
  2024           .          .     36:	JBE	_17through32
  2025           .          .     37:	CMPQ	BX, $64
  2026           .          .     38:	JBE	_33through64
  2027        10ms       10ms     39:	CMPQ	BX, $128
  2028           .          .     40:	JBE	_65through128
  2029           .          .     41:	CMPQ	BX, $256
  2030           .          .     42:	JBE	_129through256
  2031           .          .     43:	CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
  2032           .          .     44:	JE loop_preheader_avx2
  2033           .          .     45:	// TODO: for really big clears, use MOVNTDQ, even without AVX2.
  2034           .          .     46:
  2035           .          .     47:loop:
  2036           .          .     48:	MOVOU	X0, 0(DI)
  2037           .          .     49:	MOVOU	X0, 16(DI)
  2038           .          .     50:	MOVOU	X0, 32(DI)
  2039           .          .     51:	MOVOU	X0, 48(DI)
  2040           .          .     52:	MOVOU	X0, 64(DI)
  2041           .          .     53:	MOVOU	X0, 80(DI)
  2042           .          .     54:	MOVOU	X0, 96(DI)
  2043           .          .     55:	MOVOU	X0, 112(DI)
  2044           .          .     56:	MOVOU	X0, 128(DI)
  2045           .          .     57:	MOVOU	X0, 144(DI)
  2046           .          .     58:	MOVOU	X0, 160(DI)
  2047           .          .     59:	MOVOU	X0, 176(DI)
  2048           .          .     60:	MOVOU	X0, 192(DI)
  2049           .          .     61:	MOVOU	X0, 208(DI)
  2050           .          .     62:	MOVOU	X0, 224(DI)
  2051           .          .     63:	MOVOU	X0, 240(DI)
  2052           .          .     64:	SUBQ	$256, BX
  2053           .          .     65:	ADDQ	$256, DI
  2054           .          .     66:	CMPQ	BX, $256
  2055           .          .     67:	JAE	loop
  2056           .          .     68:	JMP	tail
  2057           .          .     69:
  2058           .          .     70:loop_preheader_avx2:
  2059           .          .     71:	VPXOR Y0, Y0, Y0
  2060           .          .     72:	// For smaller sizes MOVNTDQ may be faster or slower depending on hardware.
  2061           .          .     73:	// For larger sizes it is always faster, even on dual Xeons with 30M cache.
  2062           .          .     74:	// TODO take into account actual LLC size. E. g. glibc uses LLC size/2.
  2063           .          .     75:	CMPQ    BX, $0x2000000
  2064           .          .     76:	JAE     loop_preheader_avx2_huge
  2065           .          .     77:loop_avx2:
  2066        10ms       10ms     78:	VMOVDQU	Y0, 0(DI)
  2067        20ms       20ms     79:	VMOVDQU	Y0, 32(DI)
  2068           .          .     80:	VMOVDQU	Y0, 64(DI)
  2069        10ms       10ms     81:	VMOVDQU	Y0, 96(DI)
  2070           .          .     82:	SUBQ	$128, BX
  2071           .          .     83:	ADDQ	$128, DI
  2072           .          .     84:	CMPQ	BX, $128
  2073           .          .     85:	JAE	loop_avx2
  2074           .          .     86:	VMOVDQU  Y0, -32(DI)(BX*1)
  2075  ROUTINE ======================== runtime.memequal in /usr/local/go/src/internal/bytealg/equal_amd64.s
  2076        10ms       10ms (flat, cum)  0.48% of Total
  2077           .          .      5:#include "go_asm.h"
  2078           .          .      6:#include "textflag.h"
  2079           .          .      7:
  2080           .          .      8:// memequal(a, b unsafe.Pointer, size uintptr) bool
  2081           .          .      9:TEXT runtime·memequal(SB),NOSPLIT,$0-25
  2082        10ms       10ms     10:	MOVQ	a+0(FP), SI
  2083           .          .     11:	MOVQ	b+8(FP), DI
  2084           .          .     12:	CMPQ	SI, DI
  2085           .          .     13:	JEQ	eq
  2086           .          .     14:	MOVQ	size+16(FP), BX
  2087           .          .     15:	LEAQ	ret+24(FP), AX
  2088  ROUTINE ======================== runtime.memmove in /usr/local/go/src/runtime/memmove_amd64.s
  2089        40ms       40ms (flat, cum)  1.93% of Total
  2090           .          .     50:	// BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing.
  2091           .          .     51:	TESTQ	BX, BX
  2092           .          .     52:	JEQ	move_0
  2093           .          .     53:	CMPQ	BX, $2
  2094           .          .     54:	JBE	move_1or2
  2095        10ms       10ms     55:	CMPQ	BX, $4
  2096           .          .     56:	JB	move_3
  2097           .          .     57:	JBE	move_4
  2098           .          .     58:	CMPQ	BX, $8
  2099           .          .     59:	JB	move_5through7
  2100           .          .     60:	JE	move_8
  2101           .          .     61:	CMPQ	BX, $16
  2102           .          .     62:	JBE	move_9through16
  2103           .          .     63:	CMPQ	BX, $32
  2104           .          .     64:	JBE	move_17through32
  2105           .          .     65:	CMPQ	BX, $64
  2106           .          .     66:	JBE	move_33through64
  2107           .          .     67:	CMPQ	BX, $128
  2108           .          .     68:	JBE	move_65through128
  2109           .          .     69:	CMPQ	BX, $256
  2110           .          .     70:	JBE	move_129through256
  2111           .          .     71:
  2112           .          .     72:	TESTB	$1, runtime·useAVXmemmove(SB)
  2113           .          .     73:	JNZ	avxUnaligned
  2114           .          .     74:
  2115           .          .     75:/*
  2116           .          .     76: * check and set for backwards
  2117           .          .     77: */
  2118           .          .     78:	CMPQ	SI, DI
  2119           .          .     79:	JLS	back
  2120           .          .     80:
  2121           .          .     81:/*
  2122           .          .     82: * forward copy loop
  2123           .          .     83: */
  2124           .          .     84:forward:
  2125           .          .     85:	CMPQ	BX, $2048
  2126           .          .     86:	JLS	move_256through2048
  2127           .          .     87:
  2128           .          .     88:	// If REP MOVSB isn't fast, don't use it
  2129           .          .     89:	CMPB	internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
  2130           .          .     90:	JNE	fwdBy8
  2131           .          .     91:
  2132           .          .     92:	// Check alignment
  2133           .          .     93:	MOVL	SI, AX
  2134           .          .     94:	ORL	DI, AX
  2135           .          .     95:	TESTL	$7, AX
  2136           .          .     96:	JEQ	fwdBy8
  2137           .          .     97:
  2138           .          .     98:	// Do 1 byte at a time
  2139           .          .     99:	MOVQ	BX, CX
  2140           .          .    100:	REP;	MOVSB
  2141           .          .    101:	RET
  2142           .          .    102:
  2143           .          .    103:fwdBy8:
  2144           .          .    104:	// Do 8 bytes at a time
  2145           .          .    105:	MOVQ	BX, CX
  2146           .          .    106:	SHRQ	$3, CX
  2147           .          .    107:	ANDQ	$7, BX
  2148           .          .    108:	REP;	MOVSQ
  2149           .          .    109:	JMP	tail
  2150           .          .    110:
  2151           .          .    111:back:
  2152           .          .    112:/*
  2153           .          .    113: * check overlap
  2154           .          .    114: */
  2155           .          .    115:	MOVQ	SI, CX
  2156           .          .    116:	ADDQ	BX, CX
  2157           .          .    117:	CMPQ	CX, DI
  2158           .          .    118:	JLS	forward
  2159           .          .    119:/*
  2160           .          .    120: * whole thing backwards has
  2161           .          .    121: * adjusted addresses
  2162           .          .    122: */
  2163           .          .    123:	ADDQ	BX, DI
  2164           .          .    124:	ADDQ	BX, SI
  2165           .          .    125:	STD
  2166           .          .    126:
  2167           .          .    127:/*
  2168           .          .    128: * copy
  2169           .          .    129: */
  2170           .          .    130:	MOVQ	BX, CX
  2171           .          .    131:	SHRQ	$3, CX
  2172           .          .    132:	ANDQ	$7, BX
  2173           .          .    133:
  2174           .          .    134:	SUBQ	$8, DI
  2175           .          .    135:	SUBQ	$8, SI
  2176           .          .    136:	REP;	MOVSQ
  2177           .          .    137:
  2178           .          .    138:	CLD
  2179           .          .    139:	ADDQ	$8, DI
  2180           .          .    140:	ADDQ	$8, SI
  2181           .          .    141:	SUBQ	BX, DI
  2182           .          .    142:	SUBQ	BX, SI
  2183           .          .    143:	JMP	tail
  2184           .          .    144:
  2185           .          .    145:move_1or2:
  2186           .          .    146:	MOVB	(SI), AX
  2187           .          .    147:	MOVB	-1(SI)(BX*1), CX
  2188        10ms       10ms    148:	MOVB	AX, (DI)
  2189           .          .    149:	MOVB	CX, -1(DI)(BX*1)
  2190           .          .    150:	RET
  2191           .          .    151:move_0:
  2192           .          .    152:	RET
  2193           .          .    153:move_4:
  2194           .          .    154:	MOVL	(SI), AX
  2195           .          .    155:	MOVL	AX, (DI)
  2196           .          .    156:	RET
  2197           .          .    157:move_3:
  2198           .          .    158:	MOVW	(SI), AX
  2199           .          .    159:	MOVB	2(SI), CX
  2200           .          .    160:	MOVW	AX, (DI)
  2201           .          .    161:	MOVB	CX, 2(DI)
  2202           .          .    162:	RET
  2203           .          .    163:move_5through7:
  2204           .          .    164:	MOVL	(SI), AX
  2205           .          .    165:	MOVL	-4(SI)(BX*1), CX
  2206           .          .    166:	MOVL	AX, (DI)
  2207           .          .    167:	MOVL	CX, -4(DI)(BX*1)
  2208           .          .    168:	RET
  2209           .          .    169:move_8:
  2210           .          .    170:	// We need a separate case for 8 to make sure we write pointers atomically.
  2211           .          .    171:	MOVQ	(SI), AX
  2212           .          .    172:	MOVQ	AX, (DI)
  2213           .          .    173:	RET
  2214           .          .    174:move_9through16:
  2215           .          .    175:	MOVQ	(SI), AX
  2216           .          .    176:	MOVQ	-8(SI)(BX*1), CX
  2217           .          .    177:	MOVQ	AX, (DI)
  2218           .          .    178:	MOVQ	CX, -8(DI)(BX*1)
  2219           .          .    179:	RET
  2220           .          .    180:move_17through32:
  2221           .          .    181:	MOVOU	(SI), X0
  2222           .          .    182:	MOVOU	-16(SI)(BX*1), X1
  2223           .          .    183:	MOVOU	X0, (DI)
  2224           .          .    184:	MOVOU	X1, -16(DI)(BX*1)
  2225           .          .    185:	RET
  2226           .          .    186:move_33through64:
  2227           .          .    187:	MOVOU	(SI), X0
  2228           .          .    188:	MOVOU	16(SI), X1
  2229           .          .    189:	MOVOU	-32(SI)(BX*1), X2
  2230        10ms       10ms    190:	MOVOU	-16(SI)(BX*1), X3
  2231           .          .    191:	MOVOU	X0, (DI)
  2232           .          .    192:	MOVOU	X1, 16(DI)
  2233           .          .    193:	MOVOU	X2, -32(DI)(BX*1)
  2234           .          .    194:	MOVOU	X3, -16(DI)(BX*1)
  2235           .          .    195:	RET
  2236           .          .    196:move_65through128:
  2237           .          .    197:	MOVOU	(SI), X0
  2238           .          .    198:	MOVOU	16(SI), X1
  2239           .          .    199:	MOVOU	32(SI), X2
  2240           .          .    200:	MOVOU	48(SI), X3
  2241           .          .    201:	MOVOU	-64(SI)(BX*1), X4
  2242           .          .    202:	MOVOU	-48(SI)(BX*1), X5
  2243           .          .    203:	MOVOU	-32(SI)(BX*1), X6
  2244           .          .    204:	MOVOU	-16(SI)(BX*1), X7
  2245           .          .    205:	MOVOU	X0, (DI)
  2246           .          .    206:	MOVOU	X1, 16(DI)
  2247           .          .    207:	MOVOU	X2, 32(DI)
  2248           .          .    208:	MOVOU	X3, 48(DI)
  2249           .          .    209:	MOVOU	X4, -64(DI)(BX*1)
  2250           .          .    210:	MOVOU	X5, -48(DI)(BX*1)
  2251           .          .    211:	MOVOU	X6, -32(DI)(BX*1)
  2252           .          .    212:	MOVOU	X7, -16(DI)(BX*1)
  2253           .          .    213:	RET
  2254           .          .    214:move_129through256:
  2255        10ms       10ms    215:	MOVOU	(SI), X0
  2256           .          .    216:	MOVOU	16(SI), X1
  2257           .          .    217:	MOVOU	32(SI), X2
  2258           .          .    218:	MOVOU	48(SI), X3
  2259           .          .    219:	MOVOU	64(SI), X4
  2260           .          .    220:	MOVOU	80(SI), X5
  2261  ROUTINE ======================== runtime.mstart in /usr/local/go/src/runtime/proc.go
  2262           0      230ms (flat, cum) 11.11% of Total
  2263           .          .   1241:// May run during STW (because it doesn't have a P yet), so write
  2264           .          .   1242:// barriers are not allowed.
  2265           .          .   1243://
  2266           .          .   1244://go:nosplit
  2267           .          .   1245://go:nowritebarrierrec
  2268           .      230ms   1246:func mstart() {
  2269           .          .   1247:	_g_ := getg()
  2270           .          .   1248:
  2271           .          .   1249:	osStack := _g_.stack.lo == 0
  2272           .          .   1250:	if osStack {
  2273           .          .   1251:		// Initialize stack bounds from system stack.
  2274  ROUTINE ======================== runtime.nanotime in /usr/local/go/src/runtime/time_nofake.go
  2275           0       10ms (flat, cum)  0.48% of Total
  2276           .          .     14:// Zero means not to use faketime.
  2277           .          .     15:var faketime int64
  2278           .          .     16:
  2279           .          .     17://go:nosplit
  2280           .          .     18:func nanotime() int64 {
  2281           .       10ms     19:	return nanotime1()
  2282           .          .     20:}
  2283           .          .     21:
  2284           .          .     22:func walltime() (sec int64, nsec int32) {
  2285           .          .     23:	return walltime1()
  2286           .          .     24:}
  2287  ROUTINE ======================== runtime.nanotime1 in /usr/local/go/src/runtime/sys_darwin.go
  2288        10ms       10ms (flat, cum)  0.48% of Total
  2289           .          .    242:func open_trampoline()
  2290           .          .    243:
  2291           .          .    244://go:nosplit
  2292           .          .    245://go:cgo_unsafe_args
  2293           .          .    246:func nanotime1() int64 {
  2294        10ms       10ms    247:	var r struct {
  2295           .          .    248:		t            int64  // raw timer
  2296           .          .    249:		numer, denom uint32 // conversion factors. nanoseconds = t * numer / denom.
  2297           .          .    250:	}
  2298           .          .    251:	libcCall(unsafe.Pointer(funcPC(nanotime_trampoline)), unsafe.Pointer(&r))
  2299           .          .    252:	// Note: Apple seems unconcerned about overflow here. See
  2300  ROUTINE ======================== runtime.netpoll in /usr/local/go/src/runtime/netpoll_kqueue.go
  2301           0       90ms (flat, cum)  4.35% of Total
  2302           .          .    122:		}
  2303           .          .    123:		tp = &ts
  2304           .          .    124:	}
  2305           .          .    125:	var events [64]keventt
  2306           .          .    126:retry:
  2307           .       90ms    127:	n := kevent(kq, nil, 0, &events[0], int32(len(events)), tp)
  2308           .          .    128:	if n < 0 {
  2309           .          .    129:		if n != -_EINTR {
  2310           .          .    130:			println("runtime: kevent on fd", kq, "failed with", -n)
  2311           .          .    131:			throw("runtime: netpoll failed")
  2312           .          .    132:		}
  2313  ROUTINE ======================== runtime.newobject in /usr/local/go/src/runtime/malloc.go
  2314        10ms      290ms (flat, cum) 14.01% of Total
  2315           .          .   1172:
  2316           .          .   1173:// implementation of new builtin
  2317           .          .   1174:// compiler (both frontend and SSA backend) knows the signature
  2318           .          .   1175:// of this function
  2319           .          .   1176:func newobject(typ *_type) unsafe.Pointer {
  2320        10ms      290ms   1177:	return mallocgc(typ.size, typ, true)
  2321           .          .   1178:}
  2322           .          .   1179:
  2323           .          .   1180://go:linkname reflect_unsafe_New reflect.unsafe_New
  2324           .          .   1181:func reflect_unsafe_New(typ *_type) unsafe.Pointer {
  2325           .          .   1182:	return mallocgc(typ.size, typ, true)
  2326  ROUTINE ======================== runtime.nextFreeFast in /usr/local/go/src/runtime/malloc.go
  2327        30ms       30ms (flat, cum)  1.45% of Total
  2328           .          .    841:var zerobase uintptr
  2329           .          .    842:
  2330           .          .    843:// nextFreeFast returns the next free object if one is quickly available.
  2331           .          .    844:// Otherwise it returns 0.
  2332           .          .    845:func nextFreeFast(s *mspan) gclinkptr {
  2333        30ms       30ms    846:	theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
  2334           .          .    847:	if theBit < 64 {
  2335           .          .    848:		result := s.freeindex + uintptr(theBit)
  2336           .          .    849:		if result < s.nelems {
  2337           .          .    850:			freeidx := result + 1
  2338           .          .    851:			if freeidx%64 == 0 && freeidx != s.nelems {
  2339  ROUTINE ======================== runtime.notesleep in /usr/local/go/src/runtime/lock_sema.go
  2340           0       40ms (flat, cum)  1.93% of Total
  2341           .          .    176:		return
  2342           .          .    177:	}
  2343           .          .    178:	// Queued. Sleep.
  2344           .          .    179:	gp.m.blocked = true
  2345           .          .    180:	if *cgo_yield == nil {
  2346           .       40ms    181:		semasleep(-1)
  2347           .          .    182:	} else {
  2348           .          .    183:		// Sleep for an arbitrary-but-moderate interval to poll libc interceptors.
  2349           .          .    184:		const ns = 10e6
  2350           .          .    185:		for atomic.Loaduintptr(&n.key) == 0 {
  2351           .          .    186:			semasleep(ns)
  2352  ROUTINE ======================== runtime.park_m in /usr/local/go/src/runtime/proc.go
  2353           0       70ms (flat, cum)  3.38% of Total
  2354           .          .   3302:
  2355           .          .   3303:	casgstatus(gp, _Grunning, _Gwaiting)
  2356           .          .   3304:	dropg()
  2357           .          .   3305:
  2358           .          .   3306:	if fn := _g_.m.waitunlockf; fn != nil {
  2359           .       10ms   3307:		ok := fn(gp, _g_.m.waitlock)
  2360           .          .   3308:		_g_.m.waitunlockf = nil
  2361           .          .   3309:		_g_.m.waitlock = nil
  2362           .          .   3310:		if !ok {
  2363           .          .   3311:			if trace.enabled {
  2364           .          .   3312:				traceGoUnpark(gp, 2)
  2365           .          .   3313:			}
  2366           .          .   3314:			casgstatus(gp, _Gwaiting, _Grunnable)
  2367           .          .   3315:			execute(gp, true) // Schedule it back, never returns.
  2368           .          .   3316:		}
  2369           .          .   3317:	}
  2370           .       60ms   3318:	schedule()
  2371           .          .   3319:}
  2372           .          .   3320:
  2373           .          .   3321:func goschedImpl(gp *g) {
  2374           .          .   3322:	status := readgstatus(gp)
  2375           .          .   3323:	if status&^_Gscan != _Grunning {
  2376  ROUTINE ======================== runtime.preemptM in /usr/local/go/src/runtime/signal_unix.go
  2377           0       30ms (flat, cum)  1.45% of Total
  2378           .          .    364:		// If multiple threads are preempting the same M, it may send many
  2379           .          .    365:		// signals to the same M such that it hardly make progress, causing
  2380           .          .    366:		// live-lock problem. Apparently this could happen on darwin. See
  2381           .          .    367:		// issue #37741.
  2382           .          .    368:		// Only send a signal if there isn't already one pending.
  2383           .       30ms    369:		signalM(mp, sigPreempt)
  2384           .          .    370:	}
  2385           .          .    371:
  2386           .          .    372:	if GOOS == "darwin" || GOOS == "ios" {
  2387           .          .    373:		execLock.runlock()
  2388           .          .    374:	}
  2389  ROUTINE ======================== runtime.preemptone in /usr/local/go/src/runtime/proc.go
  2390           0       30ms (flat, cum)  1.45% of Total
  2391           .          .   5420:	gp.stackguard0 = stackPreempt
  2392           .          .   5421:
  2393           .          .   5422:	// Request an async preemption of this P.
  2394           .          .   5423:	if preemptMSupported && debug.asyncpreemptoff == 0 {
  2395           .          .   5424:		_p_.preempt = true
  2396           .       30ms   5425:		preemptM(mp)
  2397           .          .   5426:	}
  2398           .          .   5427:
  2399           .          .   5428:	return true
  2400           .          .   5429:}
  2401           .          .   5430:
  2402  ROUTINE ======================== runtime.pthread_cond_wait in /usr/local/go/src/runtime/sys_darwin.go
  2403        40ms       40ms (flat, cum)  1.93% of Total
  2404           .          .    379:func pthread_cond_init_trampoline()
  2405           .          .    380:
  2406           .          .    381://go:nosplit
  2407           .          .    382://go:cgo_unsafe_args
  2408           .          .    383:func pthread_cond_wait(c *pthreadcond, m *pthreadmutex) int32 {
  2409        40ms       40ms    384:	return libcCall(unsafe.Pointer(funcPC(pthread_cond_wait_trampoline)), unsafe.Pointer(&c))
  2410           .          .    385:}
  2411           .          .    386:func pthread_cond_wait_trampoline()
  2412           .          .    387:
  2413           .          .    388://go:nosplit
  2414           .          .    389://go:cgo_unsafe_args
  2415  ROUTINE ======================== runtime.pthread_kill in /usr/local/go/src/runtime/sys_darwin.go
  2416        30ms       30ms (flat, cum)  1.45% of Total
  2417           .          .    143:func pthread_self_trampoline()
  2418           .          .    144:
  2419           .          .    145://go:nosplit
  2420           .          .    146://go:cgo_unsafe_args
  2421           .          .    147:func pthread_kill(t pthread, sig uint32) {
  2422        30ms       30ms    148:	libcCall(unsafe.Pointer(funcPC(pthread_kill_trampoline)), unsafe.Pointer(&t))
  2423           .          .    149:	return
  2424           .          .    150:}
  2425           .          .    151:func pthread_kill_trampoline()
  2426           .          .    152:
  2427           .          .    153:// mmap is used to do low-level memory allocation via mmap. Don't allow stack
  2428  ROUTINE ======================== runtime.rawstring in /usr/local/go/src/runtime/string.go
  2429           0       30ms (flat, cum)  1.45% of Total
  2430           .          .    258:// rawstring allocates storage for a new string. The returned
  2431           .          .    259:// string and byte slice both refer to the same storage.
  2432           .          .    260:// The storage is not zeroed. Callers should use
  2433           .          .    261:// b to set the string contents and then drop b.
  2434           .          .    262:func rawstring(size int) (s string, b []byte) {
  2435           .       30ms    263:	p := mallocgc(uintptr(size), nil, false)
  2436           .          .    264:
  2437           .          .    265:	stringStructOf(&s).str = p
  2438           .          .    266:	stringStructOf(&s).len = size
  2439           .          .    267:
  2440           .          .    268:	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, size}
  2441  ROUTINE ======================== runtime.rawstringtmp in /usr/local/go/src/runtime/string.go
  2442        10ms       40ms (flat, cum)  1.93% of Total
  2443           .          .    122:	stk := getg().stack
  2444           .          .    123:	return stk.lo <= ptr && ptr < stk.hi
  2445           .          .    124:}
  2446           .          .    125:
  2447           .          .    126:func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
  2448        10ms       10ms    127:	if buf != nil && l <= len(buf) {
  2449           .          .    128:		b = buf[:l]
  2450           .          .    129:		s = slicebytetostringtmp(&b[0], len(b))
  2451           .          .    130:	} else {
  2452           .       30ms    131:		s, b = rawstring(l)
  2453           .          .    132:	}
  2454           .          .    133:	return
  2455           .          .    134:}
  2456           .          .    135:
  2457           .          .    136:// slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
  2458  ROUTINE ======================== runtime.releasem in /usr/local/go/src/runtime/runtime1.go
  2459        20ms       20ms (flat, cum)  0.97% of Total
  2460           .          .    471:}
  2461           .          .    472:
  2462           .          .    473://go:nosplit
  2463           .          .    474:func releasem(mp *m) {
  2464           .          .    475:	_g_ := getg()
  2465        10ms       10ms    476:	mp.locks--
  2466        10ms       10ms    477:	if mp.locks == 0 && _g_.preempt {
  2467           .          .    478:		// restore the preemption request in case we've cleared it in newstack
  2468           .          .    479:		_g_.stackguard0 = stackPreempt
  2469           .          .    480:	}
  2470           .          .    481:}
  2471           .          .    482:
  2472  ROUTINE ======================== runtime.scanblock in /usr/local/go/src/runtime/mgcmark.go
  2473        10ms       10ms (flat, cum)  0.48% of Total
  2474           .          .   1176:		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
  2475           .          .   1177:		if bits == 0 {
  2476           .          .   1178:			i += sys.PtrSize * 8
  2477           .          .   1179:			continue
  2478           .          .   1180:		}
  2479        10ms       10ms   1181:		for j := 0; j < 8 && i < n; j++ {
  2480           .          .   1182:			if bits&1 != 0 {
  2481           .          .   1183:				// Same work as in scanobject; see comments there.
  2482           .          .   1184:				p := *(*uintptr)(unsafe.Pointer(b + i))
  2483           .          .   1185:				if p != 0 {
  2484           .          .   1186:					if obj, span, objIndex := findObject(p, b, i); obj != 0 {
  2485  ROUTINE ======================== runtime.scanframeworker in /usr/local/go/src/runtime/mgcmark.go
  2486           0       10ms (flat, cum)  0.48% of Total
  2487           .          .    913:	locals, args, objs := getStackMap(frame, &state.cache, false)
  2488           .          .    914:
  2489           .          .    915:	// Scan local variables if stack frame has been allocated.
  2490           .          .    916:	if locals.n > 0 {
  2491           .          .    917:		size := uintptr(locals.n) * sys.PtrSize
  2492           .       10ms    918:		scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
  2493           .          .    919:	}
  2494           .          .    920:
  2495           .          .    921:	// Scan arguments.
  2496           .          .    922:	if args.n > 0 {
  2497           .          .    923:		scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state)
  2498  ROUTINE ======================== runtime.scanobject in /usr/local/go/src/runtime/mgcmark.go
  2499        10ms       10ms (flat, cum)  0.48% of Total
  2500           .          .   1258:			// Avoid needless hbits.next() on last iteration.
  2501           .          .   1259:			hbits = hbits.next()
  2502           .          .   1260:		}
  2503           .          .   1261:		// Load bits once. See CL 22712 and issue 16973 for discussion.
  2504           .          .   1262:		bits := hbits.bits()
  2505        10ms       10ms   1263:		if bits&bitScan == 0 {
  2506           .          .   1264:			break // no more pointers in this object
  2507           .          .   1265:		}
  2508           .          .   1266:		if bits&bitPointer == 0 {
  2509           .          .   1267:			continue // not a pointer
  2510           .          .   1268:		}
  2511  ROUTINE ======================== runtime.scanstack in /usr/local/go/src/runtime/mgcmark.go
  2512           0       10ms (flat, cum)  0.48% of Total
  2513           .          .    744:	// Scan the stack. Accumulate a list of stack objects.
  2514           .          .    745:	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
  2515           .          .    746:		scanframeworker(frame, &state, gcw)
  2516           .          .    747:		return true
  2517           .          .    748:	}
  2518           .       10ms    749:	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
  2519           .          .    750:
  2520           .          .    751:	// Find additional pointers that point into the stack from the heap.
  2521           .          .    752:	// Currently this includes defers and panics. See also function copystack.
  2522           .          .    753:
  2523           .          .    754:	// Find and trace all defer arguments.
  2524  ROUTINE ======================== runtime.scanstack.func1 in /usr/local/go/src/runtime/mgcmark.go
  2525           0       10ms (flat, cum)  0.48% of Total
  2526           .          .    741:		scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state)
  2527           .          .    742:	}
  2528           .          .    743:
  2529           .          .    744:	// Scan the stack. Accumulate a list of stack objects.
  2530           .          .    745:	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
  2531           .       10ms    746:		scanframeworker(frame, &state, gcw)
  2532           .          .    747:		return true
  2533           .          .    748:	}
  2534           .          .    749:	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
  2535           .          .    750:
  2536           .          .    751:	// Find additional pointers that point into the stack from the heap.
  2537  ROUTINE ======================== runtime.schedule in /usr/local/go/src/runtime/proc.go
  2538           0       60ms (flat, cum)  2.90% of Total
  2539           .          .   3164:		gp, inheritTime = runqget(_g_.m.p.ptr())
  2540           .          .   3165:		// We can see gp != nil here even if the M is spinning,
  2541           .          .   3166:		// if checkTimers added a local goroutine via goready.
  2542           .          .   3167:	}
  2543           .          .   3168:	if gp == nil {
  2544           .       60ms   3169:		gp, inheritTime = findrunnable() // blocks until work is available
  2545           .          .   3170:	}
  2546           .          .   3171:
  2547           .          .   3172:	// This thread is going to run a goroutine and is not spinning anymore,
  2548           .          .   3173:	// so if it was marked as spinning we need to reset it now and potentially
  2549           .          .   3174:	// start a new spinning M.
  2550  ROUTINE ======================== runtime.semasleep in /usr/local/go/src/runtime/os_darwin.go
  2551           0       40ms (flat, cum)  1.93% of Total
  2552           .          .     58:			if err == _ETIMEDOUT {
  2553           .          .     59:				pthread_mutex_unlock(&mp.mutex)
  2554           .          .     60:				return -1
  2555           .          .     61:			}
  2556           .          .     62:		} else {
  2557           .       40ms     63:			pthread_cond_wait(&mp.cond, &mp.mutex)
  2558           .          .     64:		}
  2559           .          .     65:	}
  2560           .          .     66:}
  2561           .          .     67:
  2562           .          .     68://go:nosplit
  2563  ROUTINE ======================== runtime.signalM in /usr/local/go/src/runtime/os_darwin.go
  2564           0       30ms (flat, cum)  1.45% of Total
  2565           .          .    429:		executablePath = executablePath[len(prefix):]
  2566           .          .    430:	}
  2567           .          .    431:}
  2568           .          .    432:
  2569           .          .    433:func signalM(mp *m, sig int) {
  2570           .       30ms    434:	pthread_kill(pthread(mp.procid), uint32(sig))
  2571           .          .    435:}
  2572  ROUTINE ======================== runtime.startTheWorld.func1 in /usr/local/go/src/runtime/proc.go
  2573           0       10ms (flat, cum)  0.48% of Total
  2574           .          .    998:	})
  2575           .          .    999:}
  2576           .          .   1000:
  2577           .          .   1001:// startTheWorld undoes the effects of stopTheWorld.
  2578           .          .   1002:func startTheWorld() {
  2579           .       10ms   1003:	systemstack(func() { startTheWorldWithSema(false) })
  2580           .          .   1004:
  2581           .          .   1005:	// worldsema must be held over startTheWorldWithSema to ensure
  2582           .          .   1006:	// gomaxprocs cannot change while worldsema is held.
  2583           .          .   1007:	//
  2584           .          .   1008:	// Release worldsema with direct handoff to the next waiter, but
  2585  ROUTINE ======================== runtime.startTheWorldWithSema in /usr/local/go/src/runtime/proc.go
  2586           0       80ms (flat, cum)  3.86% of Total
  2587           .          .   1151:func startTheWorldWithSema(emitTraceEvent bool) int64 {
  2588           .          .   1152:	assertWorldStopped()
  2589           .          .   1153:
  2590           .          .   1154:	mp := acquirem() // disable preemption because it can be holding p in a local var
  2591           .          .   1155:	if netpollinited() {
  2592           .       80ms   1156:		list := netpoll(0) // non-blocking
  2593           .          .   1157:		injectglist(&list)
  2594           .          .   1158:	}
  2595           .          .   1159:	lock(&sched.lock)
  2596           .          .   1160:
  2597           .          .   1161:	procs := gomaxprocs
  2598  ROUTINE ======================== runtime.stopm in /usr/local/go/src/runtime/proc.go
  2599           0       40ms (flat, cum)  1.93% of Total
  2600           .          .   2296:	}
  2601           .          .   2297:
  2602           .          .   2298:	lock(&sched.lock)
  2603           .          .   2299:	mput(_g_.m)
  2604           .          .   2300:	unlock(&sched.lock)
  2605           .       40ms   2301:	mPark()
  2606           .          .   2302:	acquirep(_g_.m.nextp.ptr())
  2607           .          .   2303:	_g_.m.nextp = 0
  2608           .          .   2304:}
  2609           .          .   2305:
  2610           .          .   2306:func mspinning() {
  2611  ROUTINE ======================== runtime.strhash in /usr/local/go/src/runtime/asm_amd64.s
  2612        10ms       10ms (flat, cum)  0.48% of Total
  2613           .          .    895:// func strhash(p unsafe.Pointer, h uintptr) uintptr
  2614           .          .    896:TEXT runtime·strhash(SB),NOSPLIT,$0-24
  2615           .          .    897:	CMPB	runtime·useAeshash(SB), $0
  2616           .          .    898:	JEQ	noaes
  2617           .          .    899:	MOVQ	p+0(FP), AX	// ptr to string struct
  2618        10ms       10ms    900:	MOVQ	8(AX), CX	// length of string
  2619           .          .    901:	MOVQ	(AX), AX	// string data
  2620           .          .    902:	LEAQ	ret+16(FP), DX
  2621           .          .    903:	JMP	aeshashbody<>(SB)
  2622           .          .    904:noaes:
  2623           .          .    905:	JMP	runtime·strhashFallback(SB)
  2624  ROUTINE ======================== runtime.sysUsed in /usr/local/go/src/runtime/mem_darwin.go
  2625           0      120ms (flat, cum)  5.80% of Total
  2626           .          .     28:
  2627           .          .     29:func sysUsed(v unsafe.Pointer, n uintptr) {
  2628           .          .     30:	// MADV_FREE_REUSE is necessary to keep the kernel's accounting
  2629           .          .     31:	// accurate. If called on any memory region that hasn't been
  2630           .          .     32:	// MADV_FREE_REUSABLE'd, it's a no-op.
  2631           .      120ms     33:	madvise(v, n, _MADV_FREE_REUSE)
  2632           .          .     34:}
  2633           .          .     35:
  2634           .          .     36:func sysHugePage(v unsafe.Pointer, n uintptr) {
  2635           .          .     37:}
  2636           .          .     38:
  2637  ROUTINE ======================== runtime.systemstack in /usr/local/go/src/runtime/asm_amd64.s
  2638           0      300ms (flat, cum) 14.49% of Total
  2639           .          .    374:	MOVQ	BX, SP
  2640           .          .    375:
  2641           .          .    376:	// call target function
  2642           .          .    377:	MOVQ	DI, DX
  2643           .          .    378:	MOVQ	0(DI), DI
  2644           .      300ms    379:	CALL	DI
  2645           .          .    380:
  2646           .          .    381:	// switch back to g
  2647           .          .    382:	get_tls(CX)
  2648           .          .    383:	MOVQ	g(CX), AX
  2649           .          .    384:	MOVQ	g_m(AX), BX
  2650  ROUTINE ======================== strconv.ParseFloat in /usr/local/go/src/strconv/atof.go
  2651           0       90ms (flat, cum)  4.35% of Total
  2652           .          .    686:// ParseFloat returns f = ±Inf, err.Err = ErrRange.
  2653           .          .    687://
  2654           .          .    688:// ParseFloat recognizes the strings "NaN", and the (possibly signed) strings "Inf" and "Infinity"
  2655           .          .    689:// as their respective special floating point values. It ignores case when matching.
  2656           .          .    690:func ParseFloat(s string, bitSize int) (float64, error) {
  2657           .       90ms    691:	f, n, err := parseFloatPrefix(s, bitSize)
  2658           .          .    692:	if err == nil && n != len(s) {
  2659           .          .    693:		return 0, syntaxError(fnParseFloat, s)
  2660           .          .    694:	}
  2661           .          .    695:	return f, err
  2662           .          .    696:}
  2663  ROUTINE ======================== strconv.atof64 in /usr/local/go/src/strconv/atof.go
  2664        10ms       80ms (flat, cum)  3.86% of Total
  2665           .          .    615:func atof64(s string) (f float64, n int, err error) {
  2666           .          .    616:	if val, n, ok := special(s); ok {
  2667           .          .    617:		return val, n, nil
  2668           .          .    618:	}
  2669           .          .    619:
  2670        10ms       60ms    620:	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
  2671           .          .    621:	if !ok {
  2672           .          .    622:		return 0, n, syntaxError(fnParseFloat, s)
  2673           .          .    623:	}
  2674           .          .    624:
  2675           .          .    625:	if hex {
  2676           .          .    626:		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
  2677           .          .    627:		return f, n, err
  2678           .          .    628:	}
  2679           .          .    629:
  2680           .          .    630:	if optimize {
  2681           .          .    631:		// Try pure floating-point arithmetic conversion, and if that fails,
  2682           .          .    632:		// the Eisel-Lemire algorithm.
  2683           .          .    633:		if !trunc {
  2684           .       20ms    634:			if f, ok := atof64exact(mantissa, exp, neg); ok {
  2685           .          .    635:				return f, n, nil
  2686           .          .    636:			}
  2687           .          .    637:		}
  2688           .          .    638:		f, ok := eiselLemire64(mantissa, exp, neg)
  2689           .          .    639:		if ok {
  2690  ROUTINE ======================== strconv.atof64exact in /usr/local/go/src/strconv/atof.go
  2691        20ms       20ms (flat, cum)  0.97% of Total
  2692           .          .    422:// Three common cases:
  2693           .          .    423://	value is exact integer
  2694           .          .    424://	value is exact integer * exact power of ten
  2695           .          .    425://	value is exact integer / exact power of ten
  2696           .          .    426:// These all produce potentially inexact but correctly rounded answers.
  2697        10ms       10ms    427:func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
  2698           .          .    428:	if mantissa>>float64info.mantbits != 0 {
  2699           .          .    429:		return
  2700           .          .    430:	}
  2701           .          .    431:	f = float64(mantissa)
  2702           .          .    432:	if neg {
  2703           .          .    433:		f = -f
  2704           .          .    434:	}
  2705           .          .    435:	switch {
  2706        10ms       10ms    436:	case exp == 0:
  2707           .          .    437:		// an integer.
  2708           .          .    438:		return f, true
  2709           .          .    439:	// Exact integers are <= 10^15.
  2710           .          .    440:	// Exact powers of ten are <= 10^22.
  2711           .          .    441:	case exp > 0 && exp <= 15+22: // int * 10^k
  2712  ROUTINE ======================== strconv.parseFloatPrefix in /usr/local/go/src/strconv/atof.go
  2713        10ms       90ms (flat, cum)  4.35% of Total
  2714           .          .    693:		return 0, syntaxError(fnParseFloat, s)
  2715           .          .    694:	}
  2716           .          .    695:	return f, err
  2717           .          .    696:}
  2718           .          .    697:
  2719        10ms       10ms    698:func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
  2720           .          .    699:	if bitSize == 32 {
  2721           .          .    700:		f, n, err := atof32(s)
  2722           .          .    701:		return float64(f), n, err
  2723           .          .    702:	}
  2724           .       80ms    703:	return atof64(s)
  2725           .          .    704:}
  2726  ROUTINE ======================== strconv.readFloat in /usr/local/go/src/strconv/atof.go
  2727        50ms       50ms (flat, cum)  2.42% of Total
  2728           .          .    202:	sawdigits := false
  2729           .          .    203:	nd := 0
  2730           .          .    204:	ndMant := 0
  2731           .          .    205:	dp := 0
  2732           .          .    206:loop:
  2733        10ms       10ms    207:	for ; i < len(s); i++ {
  2734           .          .    208:		switch c := s[i]; true {
  2735           .          .    209:		case c == '_':
  2736           .          .    210:			underscores = true
  2737           .          .    211:			continue
  2738           .          .    212:
  2739        10ms       10ms    213:		case c == '.':
  2740           .          .    214:			if sawdot {
  2741           .          .    215:				break loop
  2742           .          .    216:			}
  2743           .          .    217:			sawdot = true
  2744           .          .    218:			dp = nd
  2745           .          .    219:			continue
  2746           .          .    220:
  2747           .          .    221:		case '0' <= c && c <= '9':
  2748           .          .    222:			sawdigits = true
  2749           .          .    223:			if c == '0' && nd == 0 { // ignore leading zeros
  2750           .          .    224:				dp--
  2751           .          .    225:				continue
  2752           .          .    226:			}
  2753           .          .    227:			nd++
  2754           .          .    228:			if ndMant < maxMantDigits {
  2755           .          .    229:				mantissa *= base
  2756           .          .    230:				mantissa += uint64(c - '0')
  2757           .          .    231:				ndMant++
  2758           .          .    232:			} else if c != '0' {
  2759           .          .    233:				trunc = true
  2760           .          .    234:			}
  2761           .          .    235:			continue
  2762           .          .    236:
  2763           .          .    237:		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
  2764           .          .    238:			sawdigits = true
  2765           .          .    239:			nd++
  2766           .          .    240:			if ndMant < maxMantDigits {
  2767           .          .    241:				mantissa *= 16
  2768           .          .    242:				mantissa += uint64(lower(c) - 'a' + 10)
  2769           .          .    243:				ndMant++
  2770           .          .    244:			} else {
  2771           .          .    245:				trunc = true
  2772           .          .    246:			}
  2773           .          .    247:			continue
  2774           .          .    248:		}
  2775           .          .    249:		break
  2776           .          .    250:	}
  2777           .          .    251:	if !sawdigits {
  2778           .          .    252:		return
  2779           .          .    253:	}
  2780           .          .    254:	if !sawdot {
  2781           .          .    255:		dp = nd
  2782           .          .    256:	}
  2783           .          .    257:
  2784           .          .    258:	if base == 16 {
  2785           .          .    259:		dp *= 4
  2786           .          .    260:		ndMant *= 4
  2787           .          .    261:	}
  2788           .          .    262:
  2789           .          .    263:	// optional exponent moves decimal point.
  2790           .          .    264:	// if we read a very large, very long number,
  2791           .          .    265:	// just be sure to move the decimal point by
  2792           .          .    266:	// a lot (say, 100000).  it doesn't matter if it's
  2793           .          .    267:	// not the exact number.
  2794        10ms       10ms    268:	if i < len(s) && lower(s[i]) == expChar {
  2795           .          .    269:		i++
  2796           .          .    270:		if i >= len(s) {
  2797           .          .    271:			return
  2798           .          .    272:		}
  2799           .          .    273:		esign := 1
  2800           .          .    274:		if s[i] == '+' {
  2801           .          .    275:			i++
  2802           .          .    276:		} else if s[i] == '-' {
  2803           .          .    277:			i++
  2804           .          .    278:			esign = -1
  2805           .          .    279:		}
  2806           .          .    280:		if i >= len(s) || s[i] < '0' || s[i] > '9' {
  2807           .          .    281:			return
  2808           .          .    282:		}
  2809           .          .    283:		e := 0
  2810           .          .    284:		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
  2811           .          .    285:			if s[i] == '_' {
  2812           .          .    286:				underscores = true
  2813           .          .    287:				continue
  2814           .          .    288:			}
  2815           .          .    289:			if e < 10000 {
  2816           .          .    290:				e = e*10 + int(s[i]) - '0'
  2817           .          .    291:			}
  2818           .          .    292:		}
  2819           .          .    293:		dp += e * esign
  2820           .          .    294:	} else if base == 16 {
  2821           .          .    295:		// Must have exponent.
  2822        20ms       20ms    296:		return
  2823           .          .    297:	}
  2824           .          .    298:
  2825           .          .    299:	if mantissa != 0 {
  2826           .          .    300:		exp = dp - ndMant
  2827           .          .    301:	}
  2828  ROUTINE ======================== sync.(*RWMutex).RLock in /usr/local/go/src/sync/rwmutex.go
  2829        20ms       20ms (flat, cum)  0.97% of Total
  2830           .          .     56:func (rw *RWMutex) RLock() {
  2831           .          .     57:	if race.Enabled {
  2832           .          .     58:		_ = rw.w.state
  2833           .          .     59:		race.Disable()
  2834           .          .     60:	}
  2835        20ms       20ms     61:	if atomic.AddInt32(&rw.readerCount, 1) < 0 {
  2836           .          .     62:		// A writer is pending, wait for it.
  2837           .          .     63:		runtime_SemacquireMutex(&rw.readerSem, false, 0)
  2838           .          .     64:	}
  2839           .          .     65:	if race.Enabled {
  2840           .          .     66:		race.Enable()
  2841  ROUTINE ======================== testing.(*B).launch in /usr/local/go/src/testing/benchmark.go
  2842           0      1.73s (flat, cum) 83.57% of Total
  2843           .          .    320:			n = min(n, 100*last)
  2844           .          .    321:			// Be sure to run at least one more than last time.
  2845           .          .    322:			n = max(n, last+1)
  2846           .          .    323:			// Don't run more than 1e9 times. (This also keeps n in int range on 32 bit platforms.)
  2847           .          .    324:			n = min(n, 1e9)
  2848           .      1.73s    325:			b.runN(int(n))
  2849           .          .    326:		}
  2850           .          .    327:	}
  2851           .          .    328:	b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
  2852           .          .    329:}
  2853           .          .    330:
  2854  ROUTINE ======================== testing.(*B).runN in /usr/local/go/src/testing/benchmark.go
  2855           0      1.73s (flat, cum) 83.57% of Total
  2856           .          .    187:	b.raceErrors = -race.Errors()
  2857           .          .    188:	b.N = n
  2858           .          .    189:	b.parallelism = 1
  2859           .          .    190:	b.ResetTimer()
  2860           .          .    191:	b.StartTimer()
  2861           .      1.73s    192:	b.benchFunc(b)
  2862           .          .    193:	b.StopTimer()
  2863           .          .    194:	b.previousN = n
  2864           .          .    195:	b.previousDuration = b.duration
  2865           .          .    196:	b.raceErrors += race.Errors()
  2866           .          .    197:	if b.raceErrors > 0 {