go.dedis.ch/onet/v4@v4.0.0-pre1/simul/monitor/stats.go (about) 1 package monitor 2 3 import ( 4 "fmt" 5 "io" 6 "math" 7 "regexp" 8 "sort" 9 "strconv" 10 "strings" 11 "sync" 12 13 "github.com/montanaflynn/stats" 14 "go.dedis.ch/onet/v4/log" 15 "golang.org/x/xerrors" 16 ) 17 18 // Stats contains all structures that are related to the computations of stats 19 // such as Value (compute the mean/min/max/...), Measurements ( aggregation of 20 // Value), Stats (collection of measurements) and DataFilter which is used to 21 // apply some filtering before any statistics is done. 22 23 // Stats holds the different measurements done 24 type Stats struct { 25 // The static fields are created when creating the stats out of a 26 // running config. 27 static map[string]string 28 staticKeys []string 29 30 // The received measures we have and the keys ordered 31 values map[string]*Value 32 keys []string 33 34 // The filter used to filter out abberant data 35 filter DataFilter 36 sync.Mutex 37 } 38 39 // NewStats return a NewStats with some fields extracted from the platform run config 40 // It enforces the default set of measure to have if you pass that as 41 // defaults. 42 func NewStats(rc map[string]string, defaults ...string) *Stats { 43 s := new(Stats).init() 44 s.readRunConfig(rc, defaults...) 45 return s 46 } 47 48 func (s *Stats) init() *Stats { 49 s.values = make(map[string]*Value) 50 s.keys = make([]string, 0) 51 s.static = make(map[string]string) 52 s.staticKeys = make([]string, 0) 53 return s 54 } 55 56 // Update will update the Stats with this given measure 57 func (s *Stats) Update(m *singleMeasure) { 58 s.Lock() 59 defer s.Unlock() 60 var value *Value 61 var ok bool 62 value, ok = s.values[m.Name] 63 if !ok { 64 value = NewValue(m.Name) 65 s.values[m.Name] = value 66 s.keys = append(s.keys, m.Name) 67 sort.Strings(s.keys) 68 } 69 value.Store(m.Value) 70 } 71 72 // WriteHeader will write the header to the writer 73 func (s *Stats) WriteHeader(w io.Writer) { 74 s.Lock() 75 defer s.Unlock() 76 // write static fields 77 var fields []string 78 for _, k := range s.staticKeys { 79 // verify if we wellhave a value for it 80 if _, ok := s.static[k]; ok { 81 fields = append(fields, k) 82 } 83 } 84 // Write the values header 85 for _, k := range s.keys { 86 v := s.values[k] 87 fields = append(fields, v.HeaderFields()...) 88 } 89 fmt.Fprintf(w, "%s", strings.Join(fields, ",")) 90 fmt.Fprintf(w, "\n") 91 } 92 93 // WriteValues will write the values to the specified writer 94 func (s *Stats) WriteValues(w io.Writer) { 95 // by default 96 s.Collect() 97 s.Lock() 98 defer s.Unlock() 99 // write static fields 100 var values []string 101 for _, k := range s.staticKeys { 102 if v, ok := s.static[k]; ok { 103 values = append(values, v) 104 } 105 } 106 // write the values 107 for _, k := range s.keys { 108 v := s.values[k] 109 values = append(values, v.Values()...) 110 } 111 fmt.Fprintf(w, "%s", strings.Join(values, ",")) 112 fmt.Fprintf(w, "\n") 113 } 114 115 // WriteIndividualStats will write the values to the specified writer but without 116 // making averages. Each value should either be: 117 // - represented once - then it'll be copied to all runs 118 // - have the same frequency as the other non-once values 119 func (s *Stats) WriteIndividualStats(w io.Writer) error { 120 // by default 121 s.Lock() 122 defer s.Unlock() 123 124 // Verify we have either one or n values, where n >= 1 but constant 125 // over all values 126 n := 1 127 for _, k := range s.keys { 128 if newN := len(s.values[k].store); newN > 1 { 129 if n == 1 { 130 n = newN 131 } else if n != newN { 132 return xerrors.New("Found inconsistencies in values") 133 } 134 } 135 } 136 137 // store static fields 138 var static []string 139 for _, k := range s.staticKeys { 140 if v, ok := s.static[k]; ok { 141 static = append(static, v) 142 } 143 } 144 145 // add all values 146 for entry := 0; entry < n; entry++ { 147 var values []string 148 // write the values 149 for _, k := range s.keys { 150 v := s.values[k] 151 values = append(values, v.SingleValues(entry)...) 152 } 153 154 all := append(static, values...) 155 _, err := fmt.Fprintf(w, "%s", strings.Join(all, ",")) 156 if err != nil { 157 return xerrors.Errorf("formatting: %v", err) 158 } 159 _, err = fmt.Fprintf(w, "\n") 160 if err != nil { 161 return xerrors.Errorf("formatting: %v", err) 162 } 163 164 } 165 return nil 166 } 167 168 // AverageStats will make an average of the given stats 169 func AverageStats(stats []*Stats) *Stats { 170 if len(stats) < 1 { 171 return new(Stats) 172 } 173 s := new(Stats).init() 174 stats[0].Lock() 175 s.filter = stats[0].filter 176 s.static = stats[0].static 177 s.staticKeys = stats[0].staticKeys 178 s.keys = stats[0].keys 179 stats[0].Unlock() 180 // Average 181 for _, k := range s.keys { 182 var values []*Value 183 for _, stat := range stats { 184 stat.Lock() 185 value, ok := stat.values[k] 186 if !ok { 187 continue 188 } 189 values = append(values, value) 190 stat.Unlock() 191 } 192 // make the average 193 avg := AverageValue(values...) 194 // dont have to necessary collect or filters here. Collect() must be called only 195 // when we want the final results (writing or by calling Value(name) 196 s.values[k] = avg 197 } 198 return s 199 } 200 201 // DataFilter is used to process data before making any statistics about them 202 type DataFilter struct { 203 // percentiles maps the measurements name to the percentile we need to take 204 // to filter thoses measuremements with the percentile 205 percentiles map[string]float64 206 } 207 208 // NewDataFilter returns a new data filter initialized with the rights values 209 // taken out from the run config. If absent, will take defaults values. 210 // Keys expected are: 211 // discard_measurementname = perc => will take the lower and upper percentile = 212 // perc 213 // discard_measurementname = lower,upper => will take different percentiles 214 func NewDataFilter(config map[string]string) DataFilter { 215 df := DataFilter{ 216 percentiles: make(map[string]float64), 217 } 218 reg, err := regexp.Compile("filter_(\\w+)") 219 if err != nil { 220 log.Lvl1("DataFilter: Error compiling regexp:", err) 221 return df 222 } 223 // analyse the each entry 224 for k, v := range config { 225 if measure := reg.FindString(k); measure == "" { 226 continue 227 } else { 228 // this value must be filtered by how many ? 229 perc, err := strconv.ParseFloat(v, 64) 230 if err != nil { 231 log.Lvl1("DataFilter: Cannot parse value for filter measure:", measure) 232 continue 233 } 234 measure = strings.Replace(measure, "filter_", "", -1) 235 df.percentiles[measure] = perc 236 } 237 } 238 log.Lvl3("Filtering:", df.percentiles) 239 return df 240 } 241 242 // Filter out a serie of values 243 func (df *DataFilter) Filter(measure string, values []float64) []float64 { 244 // do we have a filter for this measure ? 245 if _, ok := df.percentiles[measure]; !ok { 246 return values 247 } 248 // Compute the percentile value 249 max, err := stats.PercentileNearestRank(values, df.percentiles[measure]) 250 if err != nil { 251 log.Lvl2("Monitor: Error filtering data(", values, "):", err) 252 return values 253 } 254 255 // Find the index from where to filter 256 maxIndex := -1 257 for i, v := range values { 258 if v > max { 259 maxIndex = i 260 } 261 } 262 // check if we foud something to filter out 263 if maxIndex == -1 { 264 log.Lvl3("Filtering: nothing to filter for", measure) 265 return values 266 } 267 // return the values below the percentile 268 log.Lvl3("Filtering: filters out", measure, ":", maxIndex, "/", len(values)) 269 return values[:maxIndex] 270 } 271 272 // Collect make the final computations before stringing or writing. 273 // Automatically done in other methods anyway. 274 func (s *Stats) Collect() { 275 s.Lock() 276 defer s.Unlock() 277 for _, v := range s.values { 278 v.Filter(s.filter) 279 v.Collect() 280 } 281 } 282 283 // Value returns the value object corresponding to this name in this Stats 284 func (s *Stats) Value(name string) *Value { 285 s.Lock() 286 defer s.Unlock() 287 if val, ok := s.values[name]; ok { 288 return val 289 } 290 return nil 291 } 292 293 // Returns an overview of the stats - not complete data returned! 294 func (s *Stats) String() string { 295 s.Collect() 296 s.Lock() 297 defer s.Unlock() 298 var str string 299 for _, k := range s.staticKeys { 300 str += fmt.Sprintf("%s = %v ", k, s.static[k]) 301 } 302 for _, v := range s.values { 303 str += fmt.Sprintf("%v ", v.Values()) 304 } 305 return fmt.Sprintf("{Stats: %s}", str) 306 } 307 308 // Read a config file and fills up some fields for Stats struct 309 func (s *Stats) readRunConfig(rc map[string]string, defaults ...string) { 310 // First find the defaults keys 311 for _, def := range defaults { 312 valStr, ok := rc[def] 313 if !ok { 314 log.Fatal("Could not find the default value", def, "in the RunConfig") 315 } 316 // registers the static value 317 s.static[def] = valStr 318 s.staticKeys = append(s.staticKeys, def) 319 } 320 // Then parse the others keys 321 var statics []string 322 for k, v := range rc { 323 // pass the ones we already registered 324 var alreadyRegistered bool 325 for _, def := range defaults { 326 if k == def { 327 alreadyRegistered = true 328 break 329 } 330 } 331 if alreadyRegistered { 332 continue 333 } 334 s.static[k] = v 335 statics = append(statics, k) 336 } 337 // sort them so it's always the same order 338 sort.Strings(statics) 339 // append them to the defaults one 340 s.staticKeys = append(s.staticKeys, statics...) 341 342 // let the filter figure out itself what it is supposed to be doing 343 s.filter = NewDataFilter(rc) 344 } 345 346 // Value is used to compute the statistics 347 // it reprensent the time to an action (setup, shamir round, coll round etc) 348 // use it to compute streaming mean + dev 349 type Value struct { 350 name string 351 min float64 352 max float64 353 sum float64 354 n int 355 oldM float64 356 newM float64 357 oldS float64 358 newS float64 359 dev float64 360 361 // Store where are kept the values 362 store []float64 363 sync.Mutex 364 } 365 366 // NewValue returns a new value object with this name 367 func NewValue(name string) *Value { 368 return &Value{name: name, store: make([]float64, 0)} 369 } 370 371 // Store takes this new time and stores it for later analysis 372 // Since we might want to do percentile sorting, we need to have all the Values 373 // For the moment, we do a simple store of the Value, but note that some 374 // streaming percentile algorithm exists in case the number of messages is 375 // growing to big. 376 func (t *Value) Store(newTime float64) { 377 t.Lock() 378 defer t.Unlock() 379 t.store = append(t.store, newTime) 380 } 381 382 // Collect will collect all float64 stored in the store's Value and will compute 383 // the basic statistics about them such as min, max, dev and avg. 384 func (t *Value) Collect() { 385 t.Lock() 386 defer t.Unlock() 387 // It is kept as a streaming average / dev processus for the moment (not the most 388 // optimized). 389 // streaming dev algo taken from http://www.johndcook.com/blog/standard_deviation/ 390 t.sum = 0 391 for _, newTime := range t.store { 392 // nothings takes 0 ms to complete, so we know it's the first time 393 if t.min > newTime || t.n == 0 { 394 t.min = newTime 395 } 396 if t.max < newTime { 397 t.max = newTime 398 } 399 400 t.n++ 401 if t.n == 1 { 402 t.oldM = newTime 403 t.newM = newTime 404 t.oldS = 0.0 405 } else { 406 t.newM = t.oldM + (newTime-t.oldM)/float64(t.n) 407 t.newS = t.oldS + (newTime-t.oldM)*(newTime-t.newM) 408 t.oldM = t.newM 409 t.oldS = t.newS 410 } 411 t.dev = math.Sqrt(t.newS / float64(t.n-1)) 412 t.sum += newTime 413 } 414 } 415 416 // Filter outs its Values 417 func (t *Value) Filter(filt DataFilter) { 418 t.Lock() 419 defer t.Unlock() 420 t.store = filt.Filter(t.name, t.store) 421 } 422 423 // AverageValue will create a Value averaging all Values given 424 func AverageValue(st ...*Value) *Value { 425 if len(st) < 1 { 426 return new(Value) 427 } 428 var t Value 429 name := st[0].name 430 for _, s := range st { 431 if s.name != name { 432 log.Error("Averaging not the sames Values ...?") 433 return new(Value) 434 } 435 s.Lock() 436 t.store = append(t.store, s.store...) 437 s.Unlock() 438 } 439 t.name = name 440 return &t 441 } 442 443 // Min returns the minimum of all stored float64 444 func (t *Value) Min() float64 { 445 t.Lock() 446 defer t.Unlock() 447 return t.min 448 } 449 450 // Max returns the maximum of all stored float64 451 func (t *Value) Max() float64 { 452 t.Lock() 453 defer t.Unlock() 454 return t.max 455 } 456 457 // Sum returns the sum of all stored float64 458 func (t *Value) Sum() float64 { 459 t.Lock() 460 defer t.Unlock() 461 return t.sum 462 } 463 464 // NumValue returns the number of Value added 465 func (t *Value) NumValue() int { 466 t.Lock() 467 defer t.Unlock() 468 return t.n 469 } 470 471 // Avg returns the average (mean) of the Values 472 func (t *Value) Avg() float64 { 473 t.Lock() 474 defer t.Unlock() 475 return t.newM 476 } 477 478 // Dev returns the standard deviation of the Values 479 func (t *Value) Dev() float64 { 480 t.Lock() 481 defer t.Unlock() 482 return t.dev 483 } 484 485 // HeaderFields returns the first line of the CSV-file 486 func (t *Value) HeaderFields() []string { 487 return []string{t.name + "_min", t.name + "_max", t.name + "_avg", t.name + "_sum", t.name + "_dev"} 488 } 489 490 // Values returns the string representation of a Value 491 func (t *Value) Values() []string { 492 return []string{fmt.Sprintf("%f", t.Min()), fmt.Sprintf("%f", t.Max()), fmt.Sprintf("%f", t.Avg()), fmt.Sprintf("%f", t.Sum()), fmt.Sprintf("%f", t.Dev())} 493 } 494 495 // SingleValues returns the string representation of an entry in the value 496 func (t *Value) SingleValues(i int) []string { 497 v := fmt.Sprintf("%f", t.store[0]) 498 if i < len(t.store) { 499 v = fmt.Sprintf("%f", t.store[i]) 500 } 501 return []string{v, v, v, v, "NaN"} 502 }