github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/stats.go (about)

     1  // Package ec provides erasure coding (EC) based data protection for AIStore.
     2  /*
     3   * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ec
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/NVIDIA/aistore/cmn"
    13  	"github.com/NVIDIA/aistore/cmn/atomic"
    14  )
    15  
    16  // internal EC stats in raw format: only counters
    17  type stats struct {
    18  	bck        cmn.Bck
    19  	queueLen   atomic.Int64
    20  	queueCnt   atomic.Int64
    21  	waitTime   atomic.Int64
    22  	waitCnt    atomic.Int64
    23  	encodeReq  atomic.Int64
    24  	encodeTime atomic.Int64
    25  	encodeSize atomic.Int64
    26  	encodeErr  atomic.Int64
    27  	decodeReq  atomic.Int64
    28  	decodeErr  atomic.Int64
    29  	decodeTime atomic.Int64
    30  	deleteReq  atomic.Int64
    31  	deleteTime atomic.Int64
    32  	deleteErr  atomic.Int64
    33  	objTime    atomic.Int64
    34  	objCnt     atomic.Int64
    35  }
    36  
    37  // Stats are EC-specific stats for clients-side apps - calculated from raw counters
    38  // All numbers except number of errors and requests are average ones
    39  type Stats struct {
    40  	// mpathrunner(not ecrunner) queue len
    41  	QueueLen float64
    42  	// time between ecrunner receives an object and mpathrunner starts processing it
    43  	WaitTime time.Duration
    44  	// EC encoding time (for both EC'ed and replicated objects)
    45  	EncodeTime time.Duration
    46  	// size of a file put into encode queue
    47  	EncodeSize int64
    48  	// total number of errors while encoding objects
    49  	EncodeErr int64
    50  	// total number of errors while restoring objects
    51  	DecodeErr int64
    52  	// time to restore an object(for both EC'ed and replicated objects)
    53  	DecodeTime time.Duration
    54  	// time to cleanup object's slices(for both EC'ed and replicated objects)
    55  	DeleteTime time.Duration
    56  	// total number of errors while cleaning up object slices
    57  	DeleteErr int64
    58  	// total object processing time: from putting to ecrunner queue to
    59  	// completing the request by mpathrunner
    60  	ObjTime time.Duration
    61  	// total number of cleanup requests
    62  	DelReq int64
    63  	// total number of restore requests
    64  	GetReq int64
    65  	// total number of encode requests
    66  	PutReq int64
    67  	// name of the bucket
    68  	Bck cmn.Bck
    69  	// xaction state: working or waiting for commands
    70  	IsIdle bool
    71  }
    72  
    73  func (s *stats) updateQueue(l int) {
    74  	s.queueLen.Add(int64(l))
    75  	s.queueCnt.Inc()
    76  }
    77  
    78  func (s *stats) updateEncode(size int64) {
    79  	s.encodeSize.Add(size)
    80  	s.encodeReq.Inc()
    81  }
    82  
    83  func (s *stats) updateEncodeTime(d time.Duration, failed bool) {
    84  	s.encodeTime.Add(int64(d))
    85  	if failed {
    86  		s.encodeErr.Inc()
    87  	}
    88  }
    89  
    90  func (s *stats) updateDecode() {
    91  	s.decodeReq.Inc()
    92  }
    93  
    94  func (s *stats) updateDecodeTime(d time.Duration, failed bool) {
    95  	s.decodeTime.Add(int64(d))
    96  	if failed {
    97  		s.decodeErr.Inc()
    98  	}
    99  }
   100  
   101  func (s *stats) updateDelete() {
   102  	s.deleteReq.Inc()
   103  }
   104  
   105  func (s *stats) updateDeleteTime(d time.Duration, failed bool) {
   106  	s.deleteTime.Add(int64(d))
   107  	if failed {
   108  		s.deleteErr.Inc()
   109  	}
   110  }
   111  
   112  func (s *stats) updateWaitTime(d time.Duration) {
   113  	s.waitTime.Add(int64(d))
   114  	s.waitCnt.Inc()
   115  }
   116  
   117  func (s *stats) updateObjTime(d time.Duration) {
   118  	s.objTime.Add(int64(d))
   119  	s.objCnt.Inc()
   120  }
   121  
   122  func (s *stats) stats() *Stats {
   123  	st := &Stats{Bck: s.bck}
   124  
   125  	val := s.queueLen.Load()
   126  	cnt := s.queueCnt.Load()
   127  	if cnt > 0 {
   128  		st.QueueLen = float64(val) / float64(cnt)
   129  	}
   130  
   131  	val = s.waitTime.Load()
   132  	cnt = s.waitCnt.Load()
   133  	if cnt > 0 {
   134  		st.WaitTime = time.Duration(val / cnt)
   135  	}
   136  
   137  	val = s.encodeTime.Load()
   138  	cnt = s.encodeReq.Load()
   139  	sz := s.encodeSize.Load()
   140  	if cnt > 0 {
   141  		st.EncodeTime = time.Duration(val / cnt)
   142  		st.EncodeSize = sz / cnt
   143  		st.PutReq = cnt
   144  	}
   145  
   146  	val = s.decodeTime.Load()
   147  	cnt = s.decodeReq.Load()
   148  	if cnt > 0 {
   149  		st.DecodeTime = time.Duration(val / cnt)
   150  		st.GetReq = cnt
   151  	}
   152  
   153  	val = s.deleteTime.Load()
   154  	cnt = s.deleteReq.Load()
   155  	if cnt > 0 {
   156  		st.DeleteTime = time.Duration(val / cnt)
   157  		st.DelReq = cnt
   158  	}
   159  
   160  	val = s.objTime.Load()
   161  	cnt = s.objCnt.Load()
   162  	if cnt > 0 {
   163  		st.ObjTime = time.Duration(val / cnt)
   164  	}
   165  
   166  	st.EncodeErr = s.encodeErr.Load()
   167  	st.DecodeErr = s.decodeErr.Load()
   168  	st.DeleteErr = s.deleteErr.Load()
   169  
   170  	return st
   171  }
   172  
   173  func (s *Stats) String() string {
   174  	if s.ObjTime == 0 {
   175  		return ""
   176  	}
   177  
   178  	lines := make([]string, 0, 8)
   179  	lines = append(lines,
   180  		fmt.Sprintf("EC stats for bucket %s", s.Bck),
   181  		fmt.Sprintf("Queue avg len: %.4f, avg wait time: %v", s.QueueLen, s.WaitTime),
   182  		fmt.Sprintf("Avg object processing time: %v", s.ObjTime),
   183  	)
   184  
   185  	if s.EncodeTime != 0 {
   186  		lines = append(lines, fmt.Sprintf("Encode avg time: %v, errors: %d, avg size: %d", s.EncodeTime, s.EncodeErr, s.EncodeSize))
   187  	}
   188  
   189  	if s.DecodeTime != 0 {
   190  		lines = append(lines, fmt.Sprintf("Decode avg time: %v, errors: %d", s.DecodeTime, s.DecodeErr))
   191  	}
   192  
   193  	if s.DeleteTime != 0 {
   194  		lines = append(lines, fmt.Sprintf("Delete avg time: %v, errors: %d", s.DeleteTime, s.DeleteErr))
   195  	}
   196  
   197  	lines = append(lines, fmt.Sprintf("Requests count: encode %d, restore %d, delete %d", s.PutReq, s.GetReq, s.DelReq))
   198  
   199  	return strings.Join(lines, "\n")
   200  }