github.com/minio/madmin-go/v2@v2.2.1/metrics.go (about)

     1  //
     2  // Copyright (c) 2015-2022 MinIO, Inc.
     3  //
     4  // This file is part of MinIO Object Storage stack
     5  //
     6  // This program is free software: you can redistribute it and/or modify
     7  // it under the terms of the GNU Affero General Public License as
     8  // published by the Free Software Foundation, either version 3 of the
     9  // License, or (at your option) any later version.
    10  //
    11  // This program is distributed in the hope that it will be useful,
    12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  // GNU Affero General Public License for more details.
    15  //
    16  // You should have received a copy of the GNU Affero General Public License
    17  // along with this program. If not, see <http://www.gnu.org/licenses/>.
    18  //
    19  
    20  package madmin
    21  
    22  import (
    23  	"context"
    24  	"encoding/json"
    25  	"errors"
    26  	"fmt"
    27  	"io"
    28  	"net/http"
    29  	"net/url"
    30  	"sort"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  )
    35  
    36  // MetricType is a bitfield representation of different metric types.
    37  type MetricType uint32
    38  
    39  // MetricsNone indicates no metrics.
    40  const MetricsNone MetricType = 0
    41  
    42  const (
    43  	MetricsScanner MetricType = 1 << (iota)
    44  	MetricsDisk
    45  	MetricsOS
    46  	MetricsBatchJobs
    47  	MetricsSiteResync
    48  
    49  	// MetricsAll must be last.
    50  	// Enables all metrics.
    51  	MetricsAll = 1<<(iota) - 1
    52  )
    53  
    54  // MetricsOptions are options provided to Metrics call.
    55  type MetricsOptions struct {
    56  	Type     MetricType    // Return only these metric types. Several types can be combined using |. Leave at 0 to return all.
    57  	N        int           // Maximum number of samples to return. 0 will return endless stream.
    58  	Interval time.Duration // Interval between samples. Will be rounded up to 1s.
    59  	Hosts    []string      // Leave empty for all
    60  	ByHost   bool          // Return metrics by host.
    61  	Disks    []string
    62  	ByDisk   bool
    63  	ByJobID  string
    64  	ByDepID  string
    65  }
    66  
    67  // Metrics makes an admin call to retrieve metrics.
    68  // The provided function is called for each received entry.
    69  func (adm *AdminClient) Metrics(ctx context.Context, o MetricsOptions, out func(RealtimeMetrics)) (err error) {
    70  	path := fmt.Sprintf(adminAPIPrefix + "/metrics")
    71  	q := make(url.Values)
    72  	q.Set("types", strconv.FormatUint(uint64(o.Type), 10))
    73  	q.Set("n", strconv.Itoa(o.N))
    74  	q.Set("interval", o.Interval.String())
    75  	q.Set("hosts", strings.Join(o.Hosts, ","))
    76  	if o.ByHost {
    77  		q.Set("by-host", "true")
    78  	}
    79  	q.Set("disks", strings.Join(o.Disks, ","))
    80  	if o.ByDisk {
    81  		q.Set("by-disk", "true")
    82  	}
    83  	if o.ByJobID != "" {
    84  		q.Set("by-jobID", o.ByJobID)
    85  	}
    86  	if o.ByDepID != "" {
    87  		q.Set("by-depID", o.ByDepID)
    88  	}
    89  
    90  	resp, err := adm.executeMethod(ctx,
    91  		http.MethodGet, requestData{
    92  			relPath:     path,
    93  			queryValues: q,
    94  		},
    95  	)
    96  	if err != nil {
    97  		return err
    98  	}
    99  
   100  	if resp.StatusCode != http.StatusOK {
   101  		closeResponse(resp)
   102  		return httpRespToErrorResponse(resp)
   103  	}
   104  	defer closeResponse(resp)
   105  	dec := json.NewDecoder(resp.Body)
   106  	for {
   107  		var m RealtimeMetrics
   108  		err := dec.Decode(&m)
   109  		if err != nil {
   110  			if errors.Is(err, io.EOF) {
   111  				err = io.ErrUnexpectedEOF
   112  			}
   113  			return err
   114  		}
   115  		out(m)
   116  		if m.Final {
   117  			break
   118  		}
   119  	}
   120  	return nil
   121  }
   122  
   123  // Contains returns whether m contains all of x.
   124  func (m MetricType) Contains(x MetricType) bool {
   125  	return m&x == x
   126  }
   127  
   128  // RealtimeMetrics provides realtime metrics.
   129  // This is intended to be expanded over time to cover more types.
   130  type RealtimeMetrics struct {
   131  	// Error indicates an error occurred.
   132  	Errors []string `json:"errors,omitempty"`
   133  	// Hosts indicates the scanned hosts
   134  	Hosts      []string              `json:"hosts"`
   135  	Aggregated Metrics               `json:"aggregated"`
   136  	ByHost     map[string]Metrics    `json:"by_host,omitempty"`
   137  	ByDisk     map[string]DiskMetric `json:"by_disk,omitempty"`
   138  	// Final indicates whether this is the final packet and the receiver can exit.
   139  	Final bool `json:"final"`
   140  }
   141  
   142  // Metrics contains all metric types.
   143  type Metrics struct {
   144  	Scanner    *ScannerMetrics    `json:"scanner,omitempty"`
   145  	Disk       *DiskMetric        `json:"disk,omitempty"`
   146  	OS         *OSMetrics         `json:"os,omitempty"`
   147  	BatchJobs  *BatchJobMetrics   `json:"batchJobs,omitempty"`
   148  	SiteResync *SiteResyncMetrics `json:"siteResync,omitempty"`
   149  }
   150  
   151  // Merge other into r.
   152  func (r *Metrics) Merge(other *Metrics) {
   153  	if other == nil {
   154  		return
   155  	}
   156  	if r.Scanner == nil && other.Scanner != nil {
   157  		r.Scanner = &ScannerMetrics{}
   158  	}
   159  	r.Scanner.Merge(other.Scanner)
   160  
   161  	if r.Disk == nil && other.Disk != nil {
   162  		r.Disk = &DiskMetric{}
   163  	}
   164  	r.Disk.Merge(other.Disk)
   165  
   166  	if r.OS == nil && other.OS != nil {
   167  		r.OS = &OSMetrics{}
   168  	}
   169  	r.OS.Merge(other.OS)
   170  	if r.BatchJobs == nil && other.BatchJobs != nil {
   171  		r.BatchJobs = &BatchJobMetrics{}
   172  	}
   173  	r.BatchJobs.Merge(other.BatchJobs)
   174  
   175  	if r.SiteResync == nil && other.SiteResync != nil {
   176  		r.SiteResync = &SiteResyncMetrics{}
   177  	}
   178  	r.SiteResync.Merge(other.SiteResync)
   179  }
   180  
   181  // Merge will merge other into r.
   182  func (r *RealtimeMetrics) Merge(other *RealtimeMetrics) {
   183  	if other == nil {
   184  		return
   185  	}
   186  
   187  	if len(other.Errors) > 0 {
   188  		r.Errors = append(r.Errors, other.Errors...)
   189  	}
   190  
   191  	if r.ByHost == nil && len(other.ByHost) > 0 {
   192  		r.ByHost = make(map[string]Metrics, len(other.ByHost))
   193  	}
   194  	for host, metrics := range other.ByHost {
   195  		r.ByHost[host] = metrics
   196  	}
   197  
   198  	r.Hosts = append(r.Hosts, other.Hosts...)
   199  	r.Aggregated.Merge(&other.Aggregated)
   200  	sort.Strings(r.Hosts)
   201  
   202  	// Gather per disk metrics
   203  	if r.ByDisk == nil && len(other.ByDisk) > 0 {
   204  		r.ByDisk = make(map[string]DiskMetric, len(other.ByDisk))
   205  	}
   206  	for disk, metrics := range other.ByDisk {
   207  		r.ByDisk[disk] = metrics
   208  	}
   209  }
   210  
   211  // ScannerMetrics contains scanner information.
   212  type ScannerMetrics struct {
   213  	// Time these metrics were collected
   214  	CollectedAt time.Time `json:"collected"`
   215  
   216  	// Current scanner cycle
   217  	CurrentCycle uint64 `json:"current_cycle"`
   218  
   219  	// Start time of current cycle
   220  	CurrentStarted time.Time `json:"current_started"`
   221  
   222  	// History of when last cycles completed
   223  	CyclesCompletedAt []time.Time `json:"cycle_complete_times"`
   224  
   225  	// Number of accumulated operations by type since server restart.
   226  	LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"`
   227  
   228  	// Number of accumulated ILM operations by type since server restart.
   229  	LifeTimeILM map[string]uint64 `json:"ilm_ops,omitempty"`
   230  
   231  	// Last minute operation statistics.
   232  	LastMinute struct {
   233  		// Scanner actions.
   234  		Actions map[string]TimedAction `json:"actions,omitempty"`
   235  		// ILM actions.
   236  		ILM map[string]TimedAction `json:"ilm,omitempty"`
   237  	} `json:"last_minute"`
   238  
   239  	// Currently active path(s) being scanned.
   240  	ActivePaths []string `json:"active,omitempty"`
   241  }
   242  
   243  // TimedAction contains a number of actions and their accumulated duration in nanoseconds.
   244  type TimedAction struct {
   245  	Count   uint64 `json:"count"`
   246  	AccTime uint64 `json:"acc_time_ns"`
   247  	Bytes   uint64 `json:"bytes,omitempty"`
   248  }
   249  
   250  // Avg returns the average time spent on the action.
   251  func (t TimedAction) Avg() time.Duration {
   252  	if t.Count == 0 {
   253  		return 0
   254  	}
   255  	return time.Duration(t.AccTime / t.Count)
   256  }
   257  
   258  // AvgBytes returns the average time spent on the action.
   259  func (t TimedAction) AvgBytes() uint64 {
   260  	if t.Count == 0 {
   261  		return 0
   262  	}
   263  	return t.Bytes / t.Count
   264  }
   265  
   266  // Merge other into t.
   267  func (t *TimedAction) Merge(other TimedAction) {
   268  	t.Count += other.Count
   269  	t.AccTime += other.AccTime
   270  	t.Bytes += other.Bytes
   271  }
   272  
   273  // Merge other into 's'.
   274  func (s *ScannerMetrics) Merge(other *ScannerMetrics) {
   275  	if other == nil {
   276  		return
   277  	}
   278  	if s.CollectedAt.Before(other.CollectedAt) {
   279  		// Use latest timestamp
   280  		s.CollectedAt = other.CollectedAt
   281  	}
   282  	if s.CurrentCycle < other.CurrentCycle {
   283  		s.CurrentCycle = other.CurrentCycle
   284  		s.CyclesCompletedAt = other.CyclesCompletedAt
   285  		s.CurrentStarted = other.CurrentStarted
   286  	}
   287  	if len(other.CyclesCompletedAt) > len(s.CyclesCompletedAt) {
   288  		s.CyclesCompletedAt = other.CyclesCompletedAt
   289  	}
   290  
   291  	// Regular ops
   292  	if len(other.LifeTimeOps) > 0 && s.LifeTimeOps == nil {
   293  		s.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps))
   294  	}
   295  	for k, v := range other.LifeTimeOps {
   296  		total := s.LifeTimeOps[k] + v
   297  		s.LifeTimeOps[k] = total
   298  	}
   299  	if s.LastMinute.Actions == nil && len(other.LastMinute.Actions) > 0 {
   300  		s.LastMinute.Actions = make(map[string]TimedAction, len(other.LastMinute.Actions))
   301  	}
   302  	for k, v := range other.LastMinute.Actions {
   303  		total := s.LastMinute.Actions[k]
   304  		total.Merge(v)
   305  		s.LastMinute.Actions[k] = total
   306  	}
   307  
   308  	// ILM
   309  	if len(other.LifeTimeILM) > 0 && s.LifeTimeILM == nil {
   310  		s.LifeTimeILM = make(map[string]uint64, len(other.LifeTimeILM))
   311  	}
   312  	for k, v := range other.LifeTimeILM {
   313  		total := s.LifeTimeILM[k] + v
   314  		s.LifeTimeILM[k] = total
   315  	}
   316  	if s.LastMinute.ILM == nil && len(other.LastMinute.ILM) > 0 {
   317  		s.LastMinute.ILM = make(map[string]TimedAction, len(other.LastMinute.ILM))
   318  	}
   319  	for k, v := range other.LastMinute.ILM {
   320  		total := s.LastMinute.ILM[k]
   321  		total.Merge(v)
   322  		s.LastMinute.ILM[k] = total
   323  	}
   324  	s.ActivePaths = append(s.ActivePaths, other.ActivePaths...)
   325  	sort.Strings(s.ActivePaths)
   326  }
   327  
   328  // DiskIOStats contains IO stats of a single drive
   329  type DiskIOStats struct {
   330  	ReadIOs        uint64 `json:"read_ios"`
   331  	ReadMerges     uint64 `json:"read_merges"`
   332  	ReadSectors    uint64 `json:"read_sectors"`
   333  	ReadTicks      uint64 `json:"read_ticks"`
   334  	WriteIOs       uint64 `json:"write_ios"`
   335  	WriteMerges    uint64 `json:"write_merges"`
   336  	WriteSectors   uint64 `json:"wrte_sectors"`
   337  	WriteTicks     uint64 `json:"write_ticks"`
   338  	CurrentIOs     uint64 `json:"current_ios"`
   339  	TotalTicks     uint64 `json:"total_ticks"`
   340  	ReqTicks       uint64 `json:"req_ticks"`
   341  	DiscardIOs     uint64 `json:"discard_ios"`
   342  	DiscardMerges  uint64 `json:"discard_merges"`
   343  	DiscardSectors uint64 `json:"discard_secotrs"`
   344  	DiscardTicks   uint64 `json:"discard_ticks"`
   345  	FlushIOs       uint64 `json:"flush_ios"`
   346  	FlushTicks     uint64 `json:"flush_ticks"`
   347  }
   348  
   349  // DiskMetric contains metrics for one or more disks.
   350  type DiskMetric struct {
   351  	// Time these metrics were collected
   352  	CollectedAt time.Time `json:"collected"`
   353  
   354  	// Number of disks
   355  	NDisks int `json:"n_disks"`
   356  
   357  	// Offline disks
   358  	Offline int `json:"offline,omitempty"`
   359  
   360  	// Healing disks
   361  	Healing int `json:"healing,omitempty"`
   362  
   363  	// Number of accumulated operations by type since server restart.
   364  	LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"`
   365  
   366  	// Last minute statistics.
   367  	LastMinute struct {
   368  		Operations map[string]TimedAction `json:"operations,omitempty"`
   369  	} `json:"last_minute"`
   370  
   371  	IOStats DiskIOStats `json:"iostats,omitempty"`
   372  }
   373  
   374  // Merge other into 's'.
   375  func (d *DiskMetric) Merge(other *DiskMetric) {
   376  	if other == nil {
   377  		return
   378  	}
   379  	if d.CollectedAt.Before(other.CollectedAt) {
   380  		// Use latest timestamp
   381  		d.CollectedAt = other.CollectedAt
   382  	}
   383  	d.NDisks += other.NDisks
   384  	d.Offline += other.Offline
   385  	d.Healing += other.Healing
   386  
   387  	if len(other.LifeTimeOps) > 0 && d.LifeTimeOps == nil {
   388  		d.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps))
   389  	}
   390  	for k, v := range other.LifeTimeOps {
   391  		total := d.LifeTimeOps[k] + v
   392  		d.LifeTimeOps[k] = total
   393  	}
   394  
   395  	if d.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 {
   396  		d.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations))
   397  	}
   398  	for k, v := range other.LastMinute.Operations {
   399  		total := d.LastMinute.Operations[k]
   400  		total.Merge(v)
   401  		d.LastMinute.Operations[k] = total
   402  	}
   403  }
   404  
   405  // OSMetrics contains metrics for OS operations.
   406  type OSMetrics struct {
   407  	// Time these metrics were collected
   408  	CollectedAt time.Time `json:"collected"`
   409  
   410  	// Number of accumulated operations by type since server restart.
   411  	LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"`
   412  
   413  	// Last minute statistics.
   414  	LastMinute struct {
   415  		Operations map[string]TimedAction `json:"operations,omitempty"`
   416  	} `json:"last_minute"`
   417  }
   418  
   419  // Merge other into 'o'.
   420  func (o *OSMetrics) Merge(other *OSMetrics) {
   421  	if other == nil {
   422  		return
   423  	}
   424  	if o.CollectedAt.Before(other.CollectedAt) {
   425  		// Use latest timestamp
   426  		o.CollectedAt = other.CollectedAt
   427  	}
   428  
   429  	if len(other.LifeTimeOps) > 0 && o.LifeTimeOps == nil {
   430  		o.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps))
   431  	}
   432  	for k, v := range other.LifeTimeOps {
   433  		total := o.LifeTimeOps[k] + v
   434  		o.LifeTimeOps[k] = total
   435  	}
   436  
   437  	if o.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 {
   438  		o.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations))
   439  	}
   440  	for k, v := range other.LastMinute.Operations {
   441  		total := o.LastMinute.Operations[k]
   442  		total.Merge(v)
   443  		o.LastMinute.Operations[k] = total
   444  	}
   445  }
   446  
   447  // BatchJobMetrics contains metrics for batch operations
   448  type BatchJobMetrics struct {
   449  	// Time these metrics were collected
   450  	CollectedAt time.Time `json:"collected"`
   451  
   452  	// Jobs by ID.
   453  	Jobs map[string]JobMetric
   454  }
   455  
   456  type JobMetric struct {
   457  	JobID         string    `json:"jobID"`
   458  	JobType       string    `json:"jobType"`
   459  	StartTime     time.Time `json:"startTime"`
   460  	LastUpdate    time.Time `json:"lastUpdate"`
   461  	RetryAttempts int       `json:"retryAttempts"`
   462  
   463  	Complete bool `json:"complete"`
   464  	Failed   bool `json:"failed"`
   465  
   466  	// Specific job type data:
   467  	Replicate *ReplicateInfo   `json:"replicate,omitempty"`
   468  	KeyRotate *KeyRotationInfo `json:"rotation,omitempty"`
   469  }
   470  
   471  type ReplicateInfo struct {
   472  	// Last bucket/object batch replicated
   473  	Bucket string `json:"lastBucket"`
   474  	Object string `json:"lastObject"`
   475  
   476  	// Verbose information
   477  	Objects          int64 `json:"objects"`
   478  	ObjectsFailed    int64 `json:"objectsFailed"`
   479  	BytesTransferred int64 `json:"bytesTransferred"`
   480  	BytesFailed      int64 `json:"bytesFailed"`
   481  }
   482  
   483  type KeyRotationInfo struct {
   484  	// Last bucket/object key rotated
   485  	Bucket string `json:"lastBucket"`
   486  	Object string `json:"lastObject"`
   487  
   488  	// Verbose information
   489  	Objects       int64 `json:"objects"`
   490  	ObjectsFailed int64 `json:"objectsFailed"`
   491  }
   492  
   493  // Merge other into 'o'.
   494  func (o *BatchJobMetrics) Merge(other *BatchJobMetrics) {
   495  	if other == nil || len(other.Jobs) == 0 {
   496  		return
   497  	}
   498  	if o.CollectedAt.Before(other.CollectedAt) {
   499  		// Use latest timestamp
   500  		o.CollectedAt = other.CollectedAt
   501  	}
   502  	if o.Jobs == nil {
   503  		o.Jobs = make(map[string]JobMetric, len(other.Jobs))
   504  	}
   505  	// Job
   506  	for k, v := range other.Jobs {
   507  		o.Jobs[k] = v
   508  	}
   509  }
   510  
   511  // SiteResyncMetrics contains metrics for site resync operation
   512  type SiteResyncMetrics struct {
   513  	// Time these metrics were collected
   514  	CollectedAt time.Time `json:"collected"`
   515  	// Status of resync operation
   516  	ResyncStatus string    `json:"resyncStatus,omitempty"`
   517  	StartTime    time.Time `json:"startTime"`
   518  	LastUpdate   time.Time `json:"lastUpdate"`
   519  	NumBuckets   int64     `json:"numBuckets"`
   520  	ResyncID     string    `json:"resyncID"`
   521  	DeplID       string    `json:"deplID"`
   522  
   523  	// Completed size in bytes
   524  	ReplicatedSize int64 `json:"completedReplicationSize"`
   525  	// Total number of objects replicated
   526  	ReplicatedCount int64 `json:"replicationCount"`
   527  	// Failed size in bytes
   528  	FailedSize int64 `json:"failedReplicationSize"`
   529  	// Total number of failed operations
   530  	FailedCount int64 `json:"failedReplicationCount"`
   531  	// Buckets that could not be synced
   532  	FailedBuckets []string `json:"failedBuckets"`
   533  	// Last bucket/object replicated.
   534  	Bucket string `json:"bucket,omitempty"`
   535  	Object string `json:"object,omitempty"`
   536  }
   537  
   538  func (o SiteResyncMetrics) Complete() bool {
   539  	return strings.ToLower(o.ResyncStatus) == "completed"
   540  }
   541  
   542  // Merge other into 'o'.
   543  func (o *SiteResyncMetrics) Merge(other *SiteResyncMetrics) {
   544  	if other == nil {
   545  		return
   546  	}
   547  	if o.CollectedAt.Before(other.CollectedAt) {
   548  		// Use latest
   549  		*o = *other
   550  	}
   551  }