go.dedis.ch/onet/v4@v4.0.0-pre1/simul/monitor/stats.go (about)

     1  package monitor
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"math"
     7  	"regexp"
     8  	"sort"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  
    13  	"github.com/montanaflynn/stats"
    14  	"go.dedis.ch/onet/v4/log"
    15  	"golang.org/x/xerrors"
    16  )
    17  
    18  // Stats contains all structures that are related to the computations of stats
    19  // such as Value (compute the mean/min/max/...), Measurements ( aggregation of
    20  // Value), Stats (collection of measurements) and DataFilter which is used to
    21  // apply some filtering before any statistics is done.
    22  
    23  // Stats holds the different measurements done
    24  type Stats struct {
    25  	// The static fields are created when creating the stats out of a
    26  	// running config.
    27  	static     map[string]string
    28  	staticKeys []string
    29  
    30  	// The received measures we have and the keys ordered
    31  	values map[string]*Value
    32  	keys   []string
    33  
    34  	// The filter used to filter out abberant data
    35  	filter DataFilter
    36  	sync.Mutex
    37  }
    38  
    39  // NewStats return a NewStats with some fields extracted from the platform run config
    40  // It enforces the default set of measure to have if you pass that as
    41  // defaults.
    42  func NewStats(rc map[string]string, defaults ...string) *Stats {
    43  	s := new(Stats).init()
    44  	s.readRunConfig(rc, defaults...)
    45  	return s
    46  }
    47  
    48  func (s *Stats) init() *Stats {
    49  	s.values = make(map[string]*Value)
    50  	s.keys = make([]string, 0)
    51  	s.static = make(map[string]string)
    52  	s.staticKeys = make([]string, 0)
    53  	return s
    54  }
    55  
    56  // Update will update the Stats with this given measure
    57  func (s *Stats) Update(m *singleMeasure) {
    58  	s.Lock()
    59  	defer s.Unlock()
    60  	var value *Value
    61  	var ok bool
    62  	value, ok = s.values[m.Name]
    63  	if !ok {
    64  		value = NewValue(m.Name)
    65  		s.values[m.Name] = value
    66  		s.keys = append(s.keys, m.Name)
    67  		sort.Strings(s.keys)
    68  	}
    69  	value.Store(m.Value)
    70  }
    71  
    72  // WriteHeader will write the header to the writer
    73  func (s *Stats) WriteHeader(w io.Writer) {
    74  	s.Lock()
    75  	defer s.Unlock()
    76  	// write static  fields
    77  	var fields []string
    78  	for _, k := range s.staticKeys {
    79  		// verify if we wellhave a value for it
    80  		if _, ok := s.static[k]; ok {
    81  			fields = append(fields, k)
    82  		}
    83  	}
    84  	// Write the values header
    85  	for _, k := range s.keys {
    86  		v := s.values[k]
    87  		fields = append(fields, v.HeaderFields()...)
    88  	}
    89  	fmt.Fprintf(w, "%s", strings.Join(fields, ","))
    90  	fmt.Fprintf(w, "\n")
    91  }
    92  
    93  // WriteValues will write the values to the specified writer
    94  func (s *Stats) WriteValues(w io.Writer) {
    95  	// by default
    96  	s.Collect()
    97  	s.Lock()
    98  	defer s.Unlock()
    99  	// write static fields
   100  	var values []string
   101  	for _, k := range s.staticKeys {
   102  		if v, ok := s.static[k]; ok {
   103  			values = append(values, v)
   104  		}
   105  	}
   106  	// write the values
   107  	for _, k := range s.keys {
   108  		v := s.values[k]
   109  		values = append(values, v.Values()...)
   110  	}
   111  	fmt.Fprintf(w, "%s", strings.Join(values, ","))
   112  	fmt.Fprintf(w, "\n")
   113  }
   114  
   115  // WriteIndividualStats will write the values to the specified writer but without
   116  // making averages. Each value should either be:
   117  //   - represented once - then it'll be copied to all runs
   118  //   - have the same frequency as the other non-once values
   119  func (s *Stats) WriteIndividualStats(w io.Writer) error {
   120  	// by default
   121  	s.Lock()
   122  	defer s.Unlock()
   123  
   124  	// Verify we have either one or n values, where n >= 1 but constant
   125  	// over all values
   126  	n := 1
   127  	for _, k := range s.keys {
   128  		if newN := len(s.values[k].store); newN > 1 {
   129  			if n == 1 {
   130  				n = newN
   131  			} else if n != newN {
   132  				return xerrors.New("Found inconsistencies in values")
   133  			}
   134  		}
   135  	}
   136  
   137  	// store static fields
   138  	var static []string
   139  	for _, k := range s.staticKeys {
   140  		if v, ok := s.static[k]; ok {
   141  			static = append(static, v)
   142  		}
   143  	}
   144  
   145  	// add all values
   146  	for entry := 0; entry < n; entry++ {
   147  		var values []string
   148  		// write the values
   149  		for _, k := range s.keys {
   150  			v := s.values[k]
   151  			values = append(values, v.SingleValues(entry)...)
   152  		}
   153  
   154  		all := append(static, values...)
   155  		_, err := fmt.Fprintf(w, "%s", strings.Join(all, ","))
   156  		if err != nil {
   157  			return xerrors.Errorf("formatting: %v", err)
   158  		}
   159  		_, err = fmt.Fprintf(w, "\n")
   160  		if err != nil {
   161  			return xerrors.Errorf("formatting: %v", err)
   162  		}
   163  
   164  	}
   165  	return nil
   166  }
   167  
   168  // AverageStats will make an average of the given stats
   169  func AverageStats(stats []*Stats) *Stats {
   170  	if len(stats) < 1 {
   171  		return new(Stats)
   172  	}
   173  	s := new(Stats).init()
   174  	stats[0].Lock()
   175  	s.filter = stats[0].filter
   176  	s.static = stats[0].static
   177  	s.staticKeys = stats[0].staticKeys
   178  	s.keys = stats[0].keys
   179  	stats[0].Unlock()
   180  	// Average
   181  	for _, k := range s.keys {
   182  		var values []*Value
   183  		for _, stat := range stats {
   184  			stat.Lock()
   185  			value, ok := stat.values[k]
   186  			if !ok {
   187  				continue
   188  			}
   189  			values = append(values, value)
   190  			stat.Unlock()
   191  		}
   192  		// make the average
   193  		avg := AverageValue(values...)
   194  		// dont have to necessary collect or filters here. Collect() must be called only
   195  		// when we want the final results (writing or by calling Value(name)
   196  		s.values[k] = avg
   197  	}
   198  	return s
   199  }
   200  
   201  // DataFilter is used to process data before making any statistics about them
   202  type DataFilter struct {
   203  	// percentiles maps the measurements name to the percentile we need to take
   204  	// to filter thoses measuremements with the percentile
   205  	percentiles map[string]float64
   206  }
   207  
   208  // NewDataFilter returns a new data filter initialized with the rights values
   209  // taken out from the run config. If absent, will take defaults values.
   210  // Keys expected are:
   211  // discard_measurementname = perc => will take the lower and upper percentile =
   212  // perc
   213  // discard_measurementname = lower,upper => will take different percentiles
   214  func NewDataFilter(config map[string]string) DataFilter {
   215  	df := DataFilter{
   216  		percentiles: make(map[string]float64),
   217  	}
   218  	reg, err := regexp.Compile("filter_(\\w+)")
   219  	if err != nil {
   220  		log.Lvl1("DataFilter: Error compiling regexp:", err)
   221  		return df
   222  	}
   223  	// analyse the each entry
   224  	for k, v := range config {
   225  		if measure := reg.FindString(k); measure == "" {
   226  			continue
   227  		} else {
   228  			// this value must be filtered by how many ?
   229  			perc, err := strconv.ParseFloat(v, 64)
   230  			if err != nil {
   231  				log.Lvl1("DataFilter: Cannot parse value for filter measure:", measure)
   232  				continue
   233  			}
   234  			measure = strings.Replace(measure, "filter_", "", -1)
   235  			df.percentiles[measure] = perc
   236  		}
   237  	}
   238  	log.Lvl3("Filtering:", df.percentiles)
   239  	return df
   240  }
   241  
   242  // Filter out a serie of values
   243  func (df *DataFilter) Filter(measure string, values []float64) []float64 {
   244  	// do we have a filter for this measure ?
   245  	if _, ok := df.percentiles[measure]; !ok {
   246  		return values
   247  	}
   248  	// Compute the percentile value
   249  	max, err := stats.PercentileNearestRank(values, df.percentiles[measure])
   250  	if err != nil {
   251  		log.Lvl2("Monitor: Error filtering data(", values, "):", err)
   252  		return values
   253  	}
   254  
   255  	// Find the index from where to filter
   256  	maxIndex := -1
   257  	for i, v := range values {
   258  		if v > max {
   259  			maxIndex = i
   260  		}
   261  	}
   262  	// check if we foud something to filter out
   263  	if maxIndex == -1 {
   264  		log.Lvl3("Filtering: nothing to filter for", measure)
   265  		return values
   266  	}
   267  	// return the values below the percentile
   268  	log.Lvl3("Filtering: filters out", measure, ":", maxIndex, "/", len(values))
   269  	return values[:maxIndex]
   270  }
   271  
   272  // Collect make the final computations before stringing or writing.
   273  // Automatically done in other methods anyway.
   274  func (s *Stats) Collect() {
   275  	s.Lock()
   276  	defer s.Unlock()
   277  	for _, v := range s.values {
   278  		v.Filter(s.filter)
   279  		v.Collect()
   280  	}
   281  }
   282  
   283  // Value returns the value object corresponding to this name in this Stats
   284  func (s *Stats) Value(name string) *Value {
   285  	s.Lock()
   286  	defer s.Unlock()
   287  	if val, ok := s.values[name]; ok {
   288  		return val
   289  	}
   290  	return nil
   291  }
   292  
   293  // Returns an overview of the stats - not complete data returned!
   294  func (s *Stats) String() string {
   295  	s.Collect()
   296  	s.Lock()
   297  	defer s.Unlock()
   298  	var str string
   299  	for _, k := range s.staticKeys {
   300  		str += fmt.Sprintf("%s = %v ", k, s.static[k])
   301  	}
   302  	for _, v := range s.values {
   303  		str += fmt.Sprintf("%v ", v.Values())
   304  	}
   305  	return fmt.Sprintf("{Stats: %s}", str)
   306  }
   307  
   308  // Read a config file and fills up some fields for Stats struct
   309  func (s *Stats) readRunConfig(rc map[string]string, defaults ...string) {
   310  	// First find the defaults keys
   311  	for _, def := range defaults {
   312  		valStr, ok := rc[def]
   313  		if !ok {
   314  			log.Fatal("Could not find the default value", def, "in the RunConfig")
   315  		}
   316  		// registers the static value
   317  		s.static[def] = valStr
   318  		s.staticKeys = append(s.staticKeys, def)
   319  	}
   320  	// Then parse the others keys
   321  	var statics []string
   322  	for k, v := range rc {
   323  		// pass the ones we already registered
   324  		var alreadyRegistered bool
   325  		for _, def := range defaults {
   326  			if k == def {
   327  				alreadyRegistered = true
   328  				break
   329  			}
   330  		}
   331  		if alreadyRegistered {
   332  			continue
   333  		}
   334  		s.static[k] = v
   335  		statics = append(statics, k)
   336  	}
   337  	// sort them so it's always the same order
   338  	sort.Strings(statics)
   339  	// append them to the defaults one
   340  	s.staticKeys = append(s.staticKeys, statics...)
   341  
   342  	// let the filter figure out itself what it is supposed to be doing
   343  	s.filter = NewDataFilter(rc)
   344  }
   345  
   346  // Value is used to compute the statistics
   347  // it reprensent the time to an action (setup, shamir round, coll round etc)
   348  // use it to compute streaming mean + dev
   349  type Value struct {
   350  	name string
   351  	min  float64
   352  	max  float64
   353  	sum  float64
   354  	n    int
   355  	oldM float64
   356  	newM float64
   357  	oldS float64
   358  	newS float64
   359  	dev  float64
   360  
   361  	// Store where are kept the values
   362  	store []float64
   363  	sync.Mutex
   364  }
   365  
   366  // NewValue returns a new value object with this name
   367  func NewValue(name string) *Value {
   368  	return &Value{name: name, store: make([]float64, 0)}
   369  }
   370  
   371  // Store takes this new time and stores it for later analysis
   372  // Since we might want to do percentile sorting, we need to have all the Values
   373  // For the moment, we do a simple store of the Value, but note that some
   374  // streaming percentile algorithm exists in case the number of messages is
   375  // growing to big.
   376  func (t *Value) Store(newTime float64) {
   377  	t.Lock()
   378  	defer t.Unlock()
   379  	t.store = append(t.store, newTime)
   380  }
   381  
   382  // Collect will collect all float64 stored in the store's Value and will compute
   383  // the basic statistics about them such as min, max, dev and avg.
   384  func (t *Value) Collect() {
   385  	t.Lock()
   386  	defer t.Unlock()
   387  	// It is kept as a streaming average / dev processus for the moment (not the most
   388  	// optimized).
   389  	// streaming dev algo taken from http://www.johndcook.com/blog/standard_deviation/
   390  	t.sum = 0
   391  	for _, newTime := range t.store {
   392  		// nothings takes 0 ms to complete, so we know it's the first time
   393  		if t.min > newTime || t.n == 0 {
   394  			t.min = newTime
   395  		}
   396  		if t.max < newTime {
   397  			t.max = newTime
   398  		}
   399  
   400  		t.n++
   401  		if t.n == 1 {
   402  			t.oldM = newTime
   403  			t.newM = newTime
   404  			t.oldS = 0.0
   405  		} else {
   406  			t.newM = t.oldM + (newTime-t.oldM)/float64(t.n)
   407  			t.newS = t.oldS + (newTime-t.oldM)*(newTime-t.newM)
   408  			t.oldM = t.newM
   409  			t.oldS = t.newS
   410  		}
   411  		t.dev = math.Sqrt(t.newS / float64(t.n-1))
   412  		t.sum += newTime
   413  	}
   414  }
   415  
   416  // Filter outs its Values
   417  func (t *Value) Filter(filt DataFilter) {
   418  	t.Lock()
   419  	defer t.Unlock()
   420  	t.store = filt.Filter(t.name, t.store)
   421  }
   422  
   423  // AverageValue will create a Value averaging all Values given
   424  func AverageValue(st ...*Value) *Value {
   425  	if len(st) < 1 {
   426  		return new(Value)
   427  	}
   428  	var t Value
   429  	name := st[0].name
   430  	for _, s := range st {
   431  		if s.name != name {
   432  			log.Error("Averaging not the sames Values ...?")
   433  			return new(Value)
   434  		}
   435  		s.Lock()
   436  		t.store = append(t.store, s.store...)
   437  		s.Unlock()
   438  	}
   439  	t.name = name
   440  	return &t
   441  }
   442  
   443  // Min returns the minimum of all stored float64
   444  func (t *Value) Min() float64 {
   445  	t.Lock()
   446  	defer t.Unlock()
   447  	return t.min
   448  }
   449  
   450  // Max returns the maximum of all stored float64
   451  func (t *Value) Max() float64 {
   452  	t.Lock()
   453  	defer t.Unlock()
   454  	return t.max
   455  }
   456  
   457  // Sum returns the sum of all stored float64
   458  func (t *Value) Sum() float64 {
   459  	t.Lock()
   460  	defer t.Unlock()
   461  	return t.sum
   462  }
   463  
   464  // NumValue returns the number of Value added
   465  func (t *Value) NumValue() int {
   466  	t.Lock()
   467  	defer t.Unlock()
   468  	return t.n
   469  }
   470  
   471  // Avg returns the average (mean) of the Values
   472  func (t *Value) Avg() float64 {
   473  	t.Lock()
   474  	defer t.Unlock()
   475  	return t.newM
   476  }
   477  
   478  // Dev returns the standard deviation of the Values
   479  func (t *Value) Dev() float64 {
   480  	t.Lock()
   481  	defer t.Unlock()
   482  	return t.dev
   483  }
   484  
   485  // HeaderFields returns the first line of the CSV-file
   486  func (t *Value) HeaderFields() []string {
   487  	return []string{t.name + "_min", t.name + "_max", t.name + "_avg", t.name + "_sum", t.name + "_dev"}
   488  }
   489  
   490  // Values returns the string representation of a Value
   491  func (t *Value) Values() []string {
   492  	return []string{fmt.Sprintf("%f", t.Min()), fmt.Sprintf("%f", t.Max()), fmt.Sprintf("%f", t.Avg()), fmt.Sprintf("%f", t.Sum()), fmt.Sprintf("%f", t.Dev())}
   493  }
   494  
   495  // SingleValues returns the string representation of an entry in the value
   496  func (t *Value) SingleValues(i int) []string {
   497  	v := fmt.Sprintf("%f", t.store[0])
   498  	if i < len(t.store) {
   499  		v = fmt.Sprintf("%f", t.store[i])
   500  	}
   501  	return []string{v, v, v, v, "NaN"}
   502  }