github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/status/recorder.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package status 12 13 import ( 14 "bytes" 15 "context" 16 "encoding/json" 17 "fmt" 18 "io" 19 "math" 20 "os" 21 "runtime" 22 "strconv" 23 "sync/atomic" 24 "time" 25 26 "github.com/cockroachdb/cockroach/pkg/build" 27 "github.com/cockroachdb/cockroach/pkg/gossip" 28 "github.com/cockroachdb/cockroach/pkg/keys" 29 "github.com/cockroachdb/cockroach/pkg/kv" 30 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 31 "github.com/cockroachdb/cockroach/pkg/roachpb" 32 "github.com/cockroachdb/cockroach/pkg/rpc" 33 "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" 34 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 35 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 36 "github.com/cockroachdb/cockroach/pkg/util/cgroups" 37 "github.com/cockroachdb/cockroach/pkg/util/envutil" 38 "github.com/cockroachdb/cockroach/pkg/util/hlc" 39 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 40 "github.com/cockroachdb/cockroach/pkg/util/log" 41 "github.com/cockroachdb/cockroach/pkg/util/metric" 42 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 43 "github.com/cockroachdb/errors" 44 humanize "github.com/dustin/go-humanize" 45 "github.com/elastic/gosigar" 46 ) 47 48 const ( 49 // storeTimeSeriesPrefix is the common prefix for time series keys which 50 // record store-specific data. 51 storeTimeSeriesPrefix = "cr.store.%s" 52 // nodeTimeSeriesPrefix is the common prefix for time series keys which 53 // record node-specific data. 54 nodeTimeSeriesPrefix = "cr.node.%s" 55 56 advertiseAddrLabelKey = "advertise-addr" 57 httpAddrLabelKey = "http-addr" 58 sqlAddrLabelKey = "sql-addr" 59 ) 60 61 type quantile struct { 62 suffix string 63 quantile float64 64 } 65 66 var recordHistogramQuantiles = []quantile{ 67 {"-max", 100}, 68 {"-p99.999", 99.999}, 69 {"-p99.99", 99.99}, 70 {"-p99.9", 99.9}, 71 {"-p99", 99}, 72 {"-p90", 90}, 73 {"-p75", 75}, 74 {"-p50", 50}, 75 } 76 77 // storeMetrics is the minimum interface of the storage.Store object needed by 78 // MetricsRecorder to provide status summaries. This is used instead of Store 79 // directly in order to simplify testing. 80 type storeMetrics interface { 81 StoreID() roachpb.StoreID 82 Descriptor(bool) (*roachpb.StoreDescriptor, error) 83 Registry() *metric.Registry 84 } 85 86 // MetricsRecorder is used to periodically record the information in a number of 87 // metric registries. 88 // 89 // Two types of registries are maintained: "node-level" registries, provided by 90 // node-level systems, and "store-level" registries which are provided by each 91 // store hosted by the node. There are slight differences in the way these are 92 // recorded, and they are thus kept separate. 93 type MetricsRecorder struct { 94 *HealthChecker 95 gossip *gossip.Gossip 96 nodeLiveness *kvserver.NodeLiveness 97 rpcContext *rpc.Context 98 settings *cluster.Settings 99 clock *hlc.Clock 100 101 // Counts to help optimize slice allocation. Should only be accessed atomically. 102 lastDataCount int64 103 lastSummaryCount int64 104 lastNodeMetricCount int64 105 lastStoreMetricCount int64 106 107 // mu synchronizes the reading of node/store registries against the adding of 108 // nodes/stores. Consequently, almost all uses of it only need to take an 109 // RLock on it. 110 mu struct { 111 syncutil.RWMutex 112 // nodeRegistry contains, as subregistries, the multiple component-specific 113 // registries which are recorded as "node level" metrics. 114 nodeRegistry *metric.Registry 115 desc roachpb.NodeDescriptor 116 startedAt int64 117 118 // storeRegistries contains a registry for each store on the node. These 119 // are not stored as subregistries, but rather are treated as wholly 120 // independent. 121 storeRegistries map[roachpb.StoreID]*metric.Registry 122 stores map[roachpb.StoreID]storeMetrics 123 } 124 // PrometheusExporter is not thread-safe even for operations that are 125 // logically read-only, but we don't want to block using it just because 126 // another goroutine is reading from the registries (i.e. using 127 // `mu.RLock()`), so we use a separate mutex just for prometheus. 128 // NOTE: promMu should always be locked BEFORE trying to lock mu. 129 promMu struct { 130 syncutil.Mutex 131 // prometheusExporter merges metrics into families and generates the 132 // prometheus text format. 133 prometheusExporter metric.PrometheusExporter 134 } 135 // WriteNodeStatus is a potentially long-running method (with a network 136 // round-trip) that requires a mutex to be safe for concurrent usage. We 137 // therefore give it its own mutex to avoid blocking other methods. 138 writeSummaryMu syncutil.Mutex 139 } 140 141 // NewMetricsRecorder initializes a new MetricsRecorder object that uses the 142 // given clock. 143 func NewMetricsRecorder( 144 clock *hlc.Clock, 145 nodeLiveness *kvserver.NodeLiveness, 146 rpcContext *rpc.Context, 147 gossip *gossip.Gossip, 148 settings *cluster.Settings, 149 ) *MetricsRecorder { 150 mr := &MetricsRecorder{ 151 HealthChecker: NewHealthChecker(trackedMetrics), 152 nodeLiveness: nodeLiveness, 153 rpcContext: rpcContext, 154 gossip: gossip, 155 settings: settings, 156 } 157 mr.mu.storeRegistries = make(map[roachpb.StoreID]*metric.Registry) 158 mr.mu.stores = make(map[roachpb.StoreID]storeMetrics) 159 mr.promMu.prometheusExporter = metric.MakePrometheusExporter() 160 mr.clock = clock 161 return mr 162 } 163 164 // AddNode adds the Registry from an initialized node, along with its descriptor 165 // and start time. 166 func (mr *MetricsRecorder) AddNode( 167 reg *metric.Registry, 168 desc roachpb.NodeDescriptor, 169 startedAt int64, 170 advertiseAddr, httpAddr, sqlAddr string, 171 ) { 172 mr.mu.Lock() 173 defer mr.mu.Unlock() 174 mr.mu.nodeRegistry = reg 175 mr.mu.desc = desc 176 mr.mu.startedAt = startedAt 177 178 // Create node ID gauge metric with host as a label. 179 metadata := metric.Metadata{ 180 Name: "node-id", 181 Help: "node ID with labels for advertised RPC and HTTP addresses", 182 Measurement: "Node ID", 183 Unit: metric.Unit_CONST, 184 } 185 186 metadata.AddLabel(advertiseAddrLabelKey, advertiseAddr) 187 metadata.AddLabel(httpAddrLabelKey, httpAddr) 188 metadata.AddLabel(sqlAddrLabelKey, sqlAddr) 189 nodeIDGauge := metric.NewGauge(metadata) 190 nodeIDGauge.Update(int64(desc.NodeID)) 191 reg.AddMetric(nodeIDGauge) 192 } 193 194 // AddStore adds the Registry from the provided store as a store-level registry 195 // in this recorder. A reference to the store is kept for the purpose of 196 // gathering some additional information which is present in store status 197 // summaries. 198 // Stores should only be added to the registry after they have been started. 199 func (mr *MetricsRecorder) AddStore(store storeMetrics) { 200 mr.mu.Lock() 201 defer mr.mu.Unlock() 202 storeID := store.StoreID() 203 store.Registry().AddLabel("store", strconv.Itoa(int(storeID))) 204 mr.mu.storeRegistries[storeID] = store.Registry() 205 mr.mu.stores[storeID] = store 206 } 207 208 // MarshalJSON returns an appropriate JSON representation of the current values 209 // of the metrics being tracked by this recorder. 210 func (mr *MetricsRecorder) MarshalJSON() ([]byte, error) { 211 mr.mu.RLock() 212 defer mr.mu.RUnlock() 213 if mr.mu.nodeRegistry == nil { 214 // We haven't yet processed initialization information; return an empty 215 // JSON object. 216 if log.V(1) { 217 log.Warning(context.TODO(), "MetricsRecorder.MarshalJSON() called before NodeID allocation") 218 } 219 return []byte("{}"), nil 220 } 221 topLevel := map[string]interface{}{ 222 fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry, 223 } 224 // Add collection of stores to top level. JSON requires that keys be strings, 225 // so we must convert the store ID to a string. 226 storeLevel := make(map[string]interface{}) 227 for id, reg := range mr.mu.storeRegistries { 228 storeLevel[strconv.Itoa(int(id))] = reg 229 } 230 topLevel["stores"] = storeLevel 231 return json.Marshal(topLevel) 232 } 233 234 // scrapePrometheusLocked updates the prometheusExporter's metrics snapshot. 235 func (mr *MetricsRecorder) scrapePrometheusLocked() { 236 mr.scrapeIntoPrometheus(&mr.promMu.prometheusExporter) 237 } 238 239 // scrapeIntoPrometheus updates the passed-in prometheusExporter's metrics 240 // snapshot. 241 func (mr *MetricsRecorder) scrapeIntoPrometheus(pm *metric.PrometheusExporter) { 242 mr.mu.RLock() 243 defer mr.mu.RUnlock() 244 if mr.mu.nodeRegistry == nil { 245 // We haven't yet processed initialization information; output nothing. 246 if log.V(1) { 247 log.Warning(context.TODO(), "MetricsRecorder asked to scrape metrics before NodeID allocation") 248 } 249 } 250 251 pm.ScrapeRegistry(mr.mu.nodeRegistry) 252 for _, reg := range mr.mu.storeRegistries { 253 pm.ScrapeRegistry(reg) 254 } 255 } 256 257 // PrintAsText writes the current metrics values as plain-text to the writer. 258 // We write metrics to a temporary buffer which is then copied to the writer. 259 // This is to avoid hanging requests from holding the lock. 260 func (mr *MetricsRecorder) PrintAsText(w io.Writer) error { 261 var buf bytes.Buffer 262 if err := mr.lockAndPrintAsText(&buf); err != nil { 263 return err 264 } 265 _, err := buf.WriteTo(w) 266 return err 267 } 268 269 // lockAndPrintAsText grabs the recorder lock and generates the prometheus 270 // metrics page. 271 func (mr *MetricsRecorder) lockAndPrintAsText(w io.Writer) error { 272 mr.promMu.Lock() 273 defer mr.promMu.Unlock() 274 mr.scrapePrometheusLocked() 275 return mr.promMu.prometheusExporter.PrintAsText(w) 276 } 277 278 // ExportToGraphite sends the current metric values to a Graphite server. 279 // It creates a new PrometheusExporter each time to avoid needing to worry 280 // about races with mr.promMu.prometheusExporter. We are not as worried 281 // about the extra memory allocations. 282 func (mr *MetricsRecorder) ExportToGraphite( 283 ctx context.Context, endpoint string, pm *metric.PrometheusExporter, 284 ) error { 285 mr.scrapeIntoPrometheus(pm) 286 graphiteExporter := metric.MakeGraphiteExporter(pm) 287 return graphiteExporter.Push(ctx, endpoint) 288 } 289 290 // GetTimeSeriesData serializes registered metrics for consumption by 291 // CockroachDB's time series system. 292 func (mr *MetricsRecorder) GetTimeSeriesData() []tspb.TimeSeriesData { 293 mr.mu.RLock() 294 defer mr.mu.RUnlock() 295 296 if mr.mu.nodeRegistry == nil { 297 // We haven't yet processed initialization information; do nothing. 298 if log.V(1) { 299 log.Warning(context.TODO(), "MetricsRecorder.GetTimeSeriesData() called before NodeID allocation") 300 } 301 return nil 302 } 303 304 lastDataCount := atomic.LoadInt64(&mr.lastDataCount) 305 data := make([]tspb.TimeSeriesData, 0, lastDataCount) 306 307 // Record time series from node-level registries. 308 now := mr.clock.PhysicalNow() 309 recorder := registryRecorder{ 310 registry: mr.mu.nodeRegistry, 311 format: nodeTimeSeriesPrefix, 312 source: strconv.FormatInt(int64(mr.mu.desc.NodeID), 10), 313 timestampNanos: now, 314 } 315 recorder.record(&data) 316 317 // Record time series from store-level registries. 318 for storeID, r := range mr.mu.storeRegistries { 319 storeRecorder := registryRecorder{ 320 registry: r, 321 format: storeTimeSeriesPrefix, 322 source: strconv.FormatInt(int64(storeID), 10), 323 timestampNanos: now, 324 } 325 storeRecorder.record(&data) 326 } 327 atomic.CompareAndSwapInt64(&mr.lastDataCount, lastDataCount, int64(len(data))) 328 return data 329 } 330 331 // GetMetricsMetadata returns the metadata from all metrics tracked in the node's 332 // nodeRegistry and a randomly selected storeRegistry. 333 func (mr *MetricsRecorder) GetMetricsMetadata() map[string]metric.Metadata { 334 mr.mu.Lock() 335 defer mr.mu.Unlock() 336 337 if mr.mu.nodeRegistry == nil { 338 // We haven't yet processed initialization information; do nothing. 339 if log.V(1) { 340 log.Warning(context.TODO(), "MetricsRecorder.GetMetricsMetadata() called before NodeID allocation") 341 } 342 return nil 343 } 344 345 metrics := make(map[string]metric.Metadata) 346 347 mr.mu.nodeRegistry.WriteMetricsMetadata(metrics) 348 349 // Get a random storeID. 350 var sID roachpb.StoreID 351 352 for storeID := range mr.mu.storeRegistries { 353 sID = storeID 354 break 355 } 356 357 // Get metric metadata from that store because all stores have the same metadata. 358 mr.mu.storeRegistries[sID].WriteMetricsMetadata(metrics) 359 360 return metrics 361 } 362 363 // getNetworkActivity produces three maps detailing information about 364 // network activity between this node and all other nodes. The maps 365 // are incoming throughput, outgoing throughput, and average 366 // latency. Throughputs are stored as bytes, and latencies as nanos. 367 func (mr *MetricsRecorder) getNetworkActivity( 368 ctx context.Context, 369 ) map[roachpb.NodeID]statuspb.NodeStatus_NetworkActivity { 370 activity := make(map[roachpb.NodeID]statuspb.NodeStatus_NetworkActivity) 371 if mr.nodeLiveness != nil && mr.gossip != nil { 372 isLiveMap := mr.nodeLiveness.GetIsLiveMap() 373 374 throughputMap := mr.rpcContext.GetStatsMap() 375 var currentAverages map[string]time.Duration 376 if mr.rpcContext.RemoteClocks != nil { 377 currentAverages = mr.rpcContext.RemoteClocks.AllLatencies() 378 } 379 for nodeID, entry := range isLiveMap { 380 address, err := mr.gossip.GetNodeIDAddress(nodeID) 381 if err != nil { 382 if entry.IsLive { 383 log.Warningf(ctx, "%v", err) 384 } 385 continue 386 } 387 na := statuspb.NodeStatus_NetworkActivity{} 388 key := address.String() 389 if tp, ok := throughputMap.Load(key); ok { 390 stats := tp.(*rpc.Stats) 391 na.Incoming = stats.Incoming() 392 na.Outgoing = stats.Outgoing() 393 } 394 if entry.IsLive { 395 if latency, ok := currentAverages[key]; ok { 396 na.Latency = latency.Nanoseconds() 397 } 398 } 399 activity[nodeID] = na 400 } 401 } 402 return activity 403 } 404 405 // GenerateNodeStatus returns a status summary message for the node. The summary 406 // includes the recent values of metrics for both the node and all of its 407 // component stores. When the node isn't initialized yet, nil is returned. 408 func (mr *MetricsRecorder) GenerateNodeStatus(ctx context.Context) *statuspb.NodeStatus { 409 activity := mr.getNetworkActivity(ctx) 410 411 mr.mu.RLock() 412 defer mr.mu.RUnlock() 413 414 if mr.mu.nodeRegistry == nil { 415 // We haven't yet processed initialization information; do nothing. 416 if log.V(1) { 417 log.Warning(ctx, "attempt to generate status summary before NodeID allocation.") 418 } 419 return nil 420 } 421 422 now := mr.clock.PhysicalNow() 423 424 lastSummaryCount := atomic.LoadInt64(&mr.lastSummaryCount) 425 lastNodeMetricCount := atomic.LoadInt64(&mr.lastNodeMetricCount) 426 lastStoreMetricCount := atomic.LoadInt64(&mr.lastStoreMetricCount) 427 428 systemMemory, _, err := GetTotalMemoryWithoutLogging() 429 if err != nil { 430 log.Errorf(ctx, "could not get total system memory: %v", err) 431 } 432 433 // Generate a node status with no store data. 434 nodeStat := &statuspb.NodeStatus{ 435 Desc: mr.mu.desc, 436 BuildInfo: build.GetInfo(), 437 UpdatedAt: now, 438 StartedAt: mr.mu.startedAt, 439 StoreStatuses: make([]statuspb.StoreStatus, 0, lastSummaryCount), 440 Metrics: make(map[string]float64, lastNodeMetricCount), 441 Args: os.Args, 442 Env: envutil.GetEnvVarsUsed(), 443 Activity: activity, 444 NumCpus: int32(runtime.NumCPU()), 445 TotalSystemMemory: systemMemory, 446 } 447 448 eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) { 449 nodeStat.Metrics[name] = val 450 }) 451 452 // Generate status summaries for stores. 453 for storeID, r := range mr.mu.storeRegistries { 454 storeMetrics := make(map[string]float64, lastStoreMetricCount) 455 eachRecordableValue(r, func(name string, val float64) { 456 storeMetrics[name] = val 457 }) 458 459 // Gather descriptor from store. 460 descriptor, err := mr.mu.stores[storeID].Descriptor(false /* useCached */) 461 if err != nil { 462 log.Errorf(ctx, "Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err) 463 continue 464 } 465 466 nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, statuspb.StoreStatus{ 467 Desc: *descriptor, 468 Metrics: storeMetrics, 469 }) 470 } 471 472 atomic.CompareAndSwapInt64( 473 &mr.lastSummaryCount, lastSummaryCount, int64(len(nodeStat.StoreStatuses))) 474 atomic.CompareAndSwapInt64( 475 &mr.lastNodeMetricCount, lastNodeMetricCount, int64(len(nodeStat.Metrics))) 476 if len(nodeStat.StoreStatuses) > 0 { 477 atomic.CompareAndSwapInt64( 478 &mr.lastStoreMetricCount, lastStoreMetricCount, int64(len(nodeStat.StoreStatuses[0].Metrics))) 479 } 480 481 return nodeStat 482 } 483 484 // WriteNodeStatus writes the supplied summary to the given client. 485 func (mr *MetricsRecorder) WriteNodeStatus( 486 ctx context.Context, db *kv.DB, nodeStatus statuspb.NodeStatus, 487 ) error { 488 mr.writeSummaryMu.Lock() 489 defer mr.writeSummaryMu.Unlock() 490 key := keys.NodeStatusKey(nodeStatus.Desc.NodeID) 491 // We use PutInline to store only a single version of the node status. 492 // There's not much point in keeping the historical versions as we keep 493 // all of the constituent data as timeseries. Further, due to the size 494 // of the build info in the node status, writing one of these every 10s 495 // will generate more versions than will easily fit into a range over 496 // the course of a day. 497 if err := db.PutInline(ctx, key, &nodeStatus); err != nil { 498 return err 499 } 500 if log.V(2) { 501 statusJSON, err := json.Marshal(&nodeStatus) 502 if err != nil { 503 log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err) 504 } 505 log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) 506 } 507 return nil 508 } 509 510 // registryRecorder is a helper class for recording time series datapoints 511 // from a metrics Registry. 512 type registryRecorder struct { 513 registry *metric.Registry 514 format string 515 source string 516 timestampNanos int64 517 } 518 519 func extractValue(mtr interface{}) (float64, error) { 520 // TODO(tschottdorf|mrtracy): consider moving this switch to an interface 521 // implemented by the individual metric types. 522 switch mtr := mtr.(type) { 523 case float64: 524 return mtr, nil 525 case *metric.Counter: 526 return float64(mtr.Count()), nil 527 case *metric.Gauge: 528 return float64(mtr.Value()), nil 529 case *metric.Rate: 530 return mtr.Value(), nil 531 case *metric.GaugeFloat64: 532 return mtr.Value(), nil 533 default: 534 return 0, errors.Errorf("cannot extract value for type %T", mtr) 535 } 536 } 537 538 // eachRecordableValue visits each metric in the registry, calling the supplied 539 // function once for each recordable value represented by that metric. This is 540 // useful to expand certain metric types (such as histograms) into multiple 541 // recordable values. 542 func eachRecordableValue(reg *metric.Registry, fn func(string, float64)) { 543 reg.Each(func(name string, mtr interface{}) { 544 if histogram, ok := mtr.(*metric.Histogram); ok { 545 // TODO(mrtracy): Where should this comment go for better 546 // visibility? 547 // 548 // Proper support of Histograms for time series is difficult and 549 // likely not worth the trouble. Instead, we aggregate a windowed 550 // histogram at fixed quantiles. If the scraping window and the 551 // histogram's eviction duration are similar, this should give 552 // good results; if the two durations are very different, we either 553 // report stale results or report only the more recent data. 554 // 555 // Additionally, we can only aggregate max/min of the quantiles; 556 // roll-ups don't know that and so they will return mathematically 557 // nonsensical values, but that seems acceptable for the time 558 // being. 559 curr, _ := histogram.Windowed() 560 for _, pt := range recordHistogramQuantiles { 561 fn(name+pt.suffix, float64(curr.ValueAtQuantile(pt.quantile))) 562 } 563 } else { 564 val, err := extractValue(mtr) 565 if err != nil { 566 log.Warningf(context.TODO(), "%v", err) 567 return 568 } 569 fn(name, val) 570 } 571 }) 572 } 573 574 func (rr registryRecorder) record(dest *[]tspb.TimeSeriesData) { 575 eachRecordableValue(rr.registry, func(name string, val float64) { 576 *dest = append(*dest, tspb.TimeSeriesData{ 577 Name: fmt.Sprintf(rr.format, name), 578 Source: rr.source, 579 Datapoints: []tspb.TimeSeriesDatapoint{ 580 { 581 TimestampNanos: rr.timestampNanos, 582 Value: val, 583 }, 584 }, 585 }) 586 }) 587 } 588 589 // GetTotalMemory returns either the total system memory (in bytes) or if 590 // possible the cgroups available memory. 591 func GetTotalMemory(ctx context.Context) (int64, error) { 592 memory, warning, err := GetTotalMemoryWithoutLogging() 593 if err != nil { 594 return 0, err 595 } 596 if warning != "" { 597 log.Infof(ctx, "%s", warning) 598 } 599 return memory, nil 600 } 601 602 // GetTotalMemoryWithoutLogging is the same as GetTotalMemory, but returns any warning 603 // as a string instead of logging it. 604 func GetTotalMemoryWithoutLogging() (int64, string, error) { 605 totalMem, err := func() (int64, error) { 606 mem := gosigar.Mem{} 607 if err := mem.Get(); err != nil { 608 return 0, err 609 } 610 if mem.Total > math.MaxInt64 { 611 return 0, fmt.Errorf("inferred memory size %s exceeds maximum supported memory size %s", 612 humanize.IBytes(mem.Total), humanize.Bytes(math.MaxInt64)) 613 } 614 return int64(mem.Total), nil 615 }() 616 if err != nil { 617 return 0, "", err 618 } 619 checkTotal := func(x int64, warning string) (int64, string, error) { 620 if x <= 0 { 621 // https://github.com/elastic/gosigar/issues/72 622 return 0, warning, fmt.Errorf("inferred memory size %d is suspicious, considering invalid", x) 623 } 624 return x, warning, nil 625 } 626 if runtime.GOOS != "linux" { 627 return checkTotal(totalMem, "") 628 } 629 cgAvlMem, warning, err := cgroups.GetMemoryLimit() 630 if err != nil { 631 return checkTotal(totalMem, 632 fmt.Sprintf("available memory from cgroups is unsupported, using system memory %s instead: %v", 633 humanizeutil.IBytes(totalMem), err)) 634 } 635 if cgAvlMem == 0 || (totalMem > 0 && cgAvlMem > totalMem) { 636 return checkTotal(totalMem, 637 fmt.Sprintf("available memory from cgroups (%s) is unsupported, using system memory %s instead: %s", 638 humanize.IBytes(uint64(cgAvlMem)), humanizeutil.IBytes(totalMem), warning)) 639 } 640 return checkTotal(cgAvlMem, "") 641 }