github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/bucketstats/impl.go (about)

     1  // Copyright (c) 2015-2021, NVIDIA CORPORATION.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  // The bucketstats Package implements convenient, easy to use, bucketized
     5  // statistics.
     6  
     7  package bucketstats
     8  
     9  import (
    10  	"fmt"
    11  	"math/big"
    12  	"math/bits"
    13  	"reflect"
    14  	"strings"
    15  	"sync"
    16  	"unicode"
    17  )
    18  
    19  var (
    20  	pkgNameToGroupName map[string]map[string]interface{}
    21  	statsNameMapLock   sync.Mutex
    22  )
    23  
    24  // Register a set of statistics, where the statistics are one or more fields in
    25  // the passed structure.
    26  //
    27  func register(pkgName string, statsGroupName string, statsStruct interface{}) {
    28  
    29  	var ok bool
    30  
    31  	if pkgName == "" && statsGroupName == "" {
    32  		panic(fmt.Sprintf("statistics group must have non-empty pkgName or statsGroupName"))
    33  	}
    34  
    35  	// let us reflect upon any statistics fields in statsStruct ...
    36  	//
    37  	// but first verify this is a pointer to a struct
    38  	if reflect.TypeOf(statsStruct).Kind() != reflect.Ptr ||
    39  		reflect.ValueOf(statsStruct).Elem().Type().Kind() != reflect.Struct {
    40  		panic(fmt.Sprintf("statsStruct for statistics group '%s' is (%s), should be (*struct)",
    41  			statsGroupName, reflect.TypeOf(statsStruct)))
    42  	}
    43  
    44  	structAsValue := reflect.ValueOf(statsStruct).Elem()
    45  	structAsType := structAsValue.Type()
    46  
    47  	// find all the statistics fields and init them;
    48  	// assign them a name if they don't have one;
    49  	// verify each name is only used once
    50  	names := make(map[string]struct{})
    51  
    52  	for i := 0; i < structAsType.NumField(); i++ {
    53  		fieldName := structAsType.Field(i).Name
    54  		fieldAsType := structAsType.Field(i).Type
    55  		fieldAsValue := structAsValue.Field(i)
    56  
    57  		// ignore fields that are not a BucketStats type
    58  		var (
    59  			countStat          Total
    60  			averageStat        Average
    61  			bucketLog2Stat     BucketLog2Round
    62  			bucketLogRoot2Stat BucketLogRoot2Round
    63  		)
    64  		if fieldAsType != reflect.TypeOf(countStat) &&
    65  			fieldAsType != reflect.TypeOf(averageStat) &&
    66  			fieldAsType != reflect.TypeOf(bucketLog2Stat) &&
    67  			fieldAsType != reflect.TypeOf(bucketLogRoot2Stat) {
    68  			continue
    69  		}
    70  
    71  		// verify BucketStats fields are setable (exported)
    72  		if !fieldAsValue.CanSet() {
    73  			panic(fmt.Sprintf("statistics group '%s' field %s must be exported to be usable by bucketstats",
    74  				statsGroupName, fieldName))
    75  		}
    76  
    77  		// get the statistic name and insure its initialized;
    78  		// then verify its unique
    79  		statNameValue := fieldAsValue.FieldByName("Name")
    80  		if !statNameValue.IsValid() {
    81  			panic(fmt.Sprintf("statistics Group '%s' field %s does not does not contain a 'Name' field",
    82  				statsGroupName, fieldName))
    83  		}
    84  		if statNameValue.String() == "" {
    85  			statNameValue.SetString(fieldName)
    86  		} else {
    87  			statNameValue.SetString(statNameValue.String())
    88  		}
    89  		_, ok = names[statNameValue.String()]
    90  		if ok {
    91  			panic(fmt.Sprintf("stats '%s' field %s Name '%s' is already in use",
    92  				statsGroupName, fieldName, statNameValue))
    93  		}
    94  		names[statNameValue.String()] = struct{}{}
    95  
    96  		// initialize the statistic (all fields are already zero - unless relaunched in test sequence)
    97  		switch v := (fieldAsValue.Addr().Interface()).(type) {
    98  		case *Total:
    99  		case *Average:
   100  		case *BucketLog2Round:
   101  			if v.NBucket == 0 || v.NBucket > uint(len(v.statBuckets)) {
   102  				v.NBucket = uint(len(v.statBuckets))
   103  			} else if v.NBucket < 10 {
   104  				v.NBucket = 10
   105  			}
   106  		case *BucketLogRoot2Round:
   107  			if v.NBucket == 0 || v.NBucket > uint(len(v.statBuckets)) {
   108  				v.NBucket = uint(len(v.statBuckets))
   109  			} else if v.NBucket < 17 {
   110  				v.NBucket = 17
   111  			}
   112  		default:
   113  			panic(fmt.Sprintf("statistics Group '%s' field %s type '%v' unknown: internal error",
   114  				statsGroupName, fieldName, fieldAsType))
   115  		}
   116  
   117  	}
   118  
   119  	// add statsGroupName to the list of statistics
   120  	statsNameMapLock.Lock()
   121  	defer statsNameMapLock.Unlock()
   122  
   123  	if pkgNameToGroupName == nil {
   124  		pkgNameToGroupName = make(map[string]map[string]interface{})
   125  	}
   126  	if pkgNameToGroupName[pkgName] == nil {
   127  		pkgNameToGroupName[pkgName] = make(map[string]interface{})
   128  	}
   129  
   130  	// check for pre-existence
   131  	if pkgNameToGroupName[pkgName][statsGroupName] != nil {
   132  		panic(fmt.Sprintf("pkgName '%s' with statsGroupName '%s' is already registered",
   133  			pkgName, statsGroupName))
   134  	}
   135  
   136  	pkgNameToGroupName[pkgName][statsGroupName] = statsStruct
   137  
   138  	return
   139  }
   140  
   141  func unRegister(pkgName string, statsGroupName string) {
   142  
   143  	statsNameMapLock.Lock()
   144  	defer statsNameMapLock.Unlock()
   145  
   146  	// remove statsGroupName from the list of statistics (silently ignore it
   147  	// if it doesn't exist)
   148  	if pkgNameToGroupName[pkgName] != nil {
   149  		delete(pkgNameToGroupName[pkgName], statsGroupName)
   150  
   151  		if len(pkgNameToGroupName[pkgName]) == 0 {
   152  			delete(pkgNameToGroupName, pkgName)
   153  		}
   154  	}
   155  
   156  	return
   157  }
   158  
   159  // Return the selected group(s) of statistics as a string.
   160  //
   161  func sprintStats(statFmt StatStringFormat, pkgName string, statsGroupName string) (statValues string) {
   162  
   163  	statsNameMapLock.Lock()
   164  	defer statsNameMapLock.Unlock()
   165  
   166  	var (
   167  		pkgNameMap   map[string]map[string]interface{}
   168  		groupNameMap map[string]interface{}
   169  	)
   170  	if pkgName == "*" {
   171  		pkgNameMap = pkgNameToGroupName
   172  	} else {
   173  		// make a map with a single entry for the pkgName
   174  		pkgNameMap = map[string]map[string]interface{}{pkgName: nil}
   175  	}
   176  
   177  	for pkg := range pkgNameMap {
   178  		if statsGroupName == "*" {
   179  			groupNameMap = pkgNameToGroupName[pkg]
   180  		} else {
   181  			// make a map with a single entry for the statsGroupName
   182  			groupNameMap = map[string]interface{}{statsGroupName: nil}
   183  		}
   184  
   185  		for group := range groupNameMap {
   186  			_, ok := pkgNameToGroupName[pkg][group]
   187  			if !ok {
   188  				panic(fmt.Sprintf(
   189  					"bucketstats.sprintStats(): statistics group '%s.%s' is not registered",
   190  					pkg, group))
   191  			}
   192  			statValues += sprintStatsStruct(statFmt, pkg, group, pkgNameToGroupName[pkg][group])
   193  		}
   194  	}
   195  	return
   196  }
   197  
   198  func sprintStatsStruct(statFmt StatStringFormat, pkgName string, statsGroupName string,
   199  	statsStruct interface{}) (statValues string) {
   200  
   201  	// let us reflect upon any statistic fields in statsStruct ...
   202  	//
   203  	// but first verify this is a pointer to a struct
   204  	if reflect.TypeOf(statsStruct).Kind() != reflect.Ptr ||
   205  		reflect.ValueOf(statsStruct).Elem().Type().Kind() != reflect.Struct {
   206  		panic(fmt.Sprintf("statsStruct for statistics group '%s' is (%s), should be (*struct)",
   207  			statsGroupName, reflect.TypeOf(statsStruct)))
   208  	}
   209  
   210  	structAsValue := reflect.ValueOf(statsStruct).Elem()
   211  	structAsType := structAsValue.Type()
   212  
   213  	// find all the statistics fields and sprint them
   214  	for i := 0; i < structAsType.NumField(); i++ {
   215  		fieldAsType := structAsType.Field(i).Type
   216  		fieldAsValue := structAsValue.Field(i)
   217  
   218  		// ignore fields that are not a BucketStats type
   219  		var (
   220  			countStat          Total
   221  			averageStat        Average
   222  			bucketLog2Stat     BucketLog2Round
   223  			bucketLogRoot2Stat BucketLogRoot2Round
   224  		)
   225  		if fieldAsType != reflect.TypeOf(countStat) &&
   226  			fieldAsType != reflect.TypeOf(averageStat) &&
   227  			fieldAsType != reflect.TypeOf(bucketLog2Stat) &&
   228  			fieldAsType != reflect.TypeOf(bucketLogRoot2Stat) {
   229  			continue
   230  		}
   231  
   232  		switch v := (fieldAsValue.Addr().Interface()).(type) {
   233  		case *Total:
   234  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   235  		case *Average:
   236  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   237  		case *BucketLog2Round:
   238  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   239  		case *BucketLogRoot2Round:
   240  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   241  		default:
   242  			panic(fmt.Sprintf("Unknown type in struct: %s", fieldAsType.Name()))
   243  		}
   244  	}
   245  	return
   246  }
   247  
   248  // Construct and return a statistics name (fully qualified field name) in the specified format.
   249  //
   250  func statisticName(statFmt StatStringFormat, pkgName string, statsGroupName string, fieldName string) string {
   251  
   252  	switch statFmt {
   253  
   254  	default:
   255  		panic(fmt.Sprintf("Unknown StatStringFormat '%v'", statFmt))
   256  
   257  	case StatFormatParsable1:
   258  		pkgName = scrubName(pkgName)
   259  		statsGroupName = scrubName(statsGroupName)
   260  		fieldName = scrubName(fieldName)
   261  
   262  		if pkgName == "" && statsGroupName == "" {
   263  			return fieldName
   264  		}
   265  		if pkgName == "" {
   266  			return statsGroupName + "." + fieldName
   267  		}
   268  		if statsGroupName == "" {
   269  			return pkgName + "." + fieldName
   270  		}
   271  		return pkgName + "." + statsGroupName + "." + fieldName
   272  	}
   273  }
   274  
   275  // Return the "name" of the bucket that would hold 'n' as the string "2^x".
   276  //
   277  func bucketNameLog2(value uint64) string {
   278  
   279  	var idx uint
   280  	if value < 256 {
   281  		idx = uint(log2RoundIdxTable[value])
   282  	} else {
   283  		bits := uint(bits.Len64(value))
   284  		baseIdx := uint(log2RoundIdxTable[value>>(bits-8)])
   285  		idx = baseIdx + bits - 8
   286  	}
   287  	return fmt.Sprintf("2^%d", idx-1)
   288  }
   289  
   290  // Return the "name" of the bucket that would hold 'n' as the string "2^x",
   291  // where x can have the suffix ".5" as in "2^7.5".
   292  //
   293  func bucketNameLogRoot2(value uint64) string {
   294  
   295  	var idx uint
   296  	if value < 256 {
   297  		idx = uint(log2RoundIdxTable[value])
   298  	} else {
   299  		bits := uint(bits.Len64(value))
   300  		baseIdx := uint(logRoot2RoundIdxTable[value>>(bits-8)])
   301  		idx = baseIdx + (bits-8)*2
   302  	}
   303  	if idx%2 == 1 {
   304  		return fmt.Sprintf("2^%1.0f", float32(idx-1)/2)
   305  	}
   306  	return fmt.Sprintf("2^%1.1f", float32(idx-1)/2)
   307  }
   308  
   309  // Return a string with the statistic's value in the specified format.
   310  //
   311  func (this *Total) sprint(statFmt StatStringFormat, pkgName string, statsGroupName string) string {
   312  
   313  	statName := statisticName(statFmt, pkgName, statsGroupName, this.Name)
   314  
   315  	switch statFmt {
   316  	case StatFormatParsable1:
   317  		return fmt.Sprintf("%s total:%d\n", statName, this.total)
   318  	}
   319  
   320  	return fmt.Sprintf("statName '%s': Unknown StatStringFormat: '%v'\n", statName, statFmt)
   321  }
   322  
   323  // Return a string with the statistic's value in the specified format.
   324  //
   325  func (this *Average) sprint(statFmt StatStringFormat, pkgName string, statsGroupName string) string {
   326  
   327  	statName := statisticName(statFmt, pkgName, statsGroupName, this.Name)
   328  	var avg uint64
   329  	if this.count > 0 {
   330  		avg = this.total / this.count
   331  	}
   332  
   333  	switch statFmt {
   334  	case StatFormatParsable1:
   335  		return fmt.Sprintf("%s avg:%d count:%d total:%d\n",
   336  			statName, avg, this.count, this.total)
   337  	}
   338  
   339  	return fmt.Sprintf("statName '%s': Unknown StatStringFormat: '%v'\n", statName, statFmt)
   340  }
   341  
   342  // The canonical distribution for a bucketized statistic is an array of BucketInfo.
   343  // Create one based on the information for this bucketstat .
   344  //
   345  func bucketDistMake(nBucket uint, statBuckets []uint32, bucketInfoBase []BucketInfo) []BucketInfo {
   346  
   347  	// copy the base []BucketInfo before modifying it
   348  	bucketInfo := make([]BucketInfo, nBucket, nBucket)
   349  	copy(bucketInfo, bucketInfoBase[0:nBucket])
   350  	for i := uint(0); i < nBucket; i += 1 {
   351  		bucketInfo[i].Count = uint64(statBuckets[i])
   352  	}
   353  
   354  	// if nBucket is less then len(bucketInfo) then update the range and
   355  	// average for the last bucket that's used
   356  	if nBucket < uint(len(bucketInfoBase)) {
   357  		bucketInfo[nBucket-1].RangeHigh = bucketInfo[len(bucketInfo)-1].RangeHigh
   358  
   359  		mean := bucketInfo[nBucket-1].RangeLow / 2
   360  		mean += bucketInfo[nBucket-1].RangeHigh / 2
   361  		bothOdd := bucketInfo[nBucket-1].RangeLow & bucketInfo[nBucket-1].RangeHigh & 0x1
   362  		if bothOdd == 1 {
   363  			mean += 1
   364  		}
   365  		bucketInfo[nBucket-1].MeanVal = mean
   366  	}
   367  	return bucketInfo
   368  }
   369  
   370  // Given the distribution ([]BucketInfo) for a bucketized statistic, calculate:
   371  //
   372  // o the index of the first entry with a non-zero count
   373  // o the index + 1 of the last entry with a non-zero count, or zero if no such
   374  //   bucket exists
   375  // o the count (number things in buckets)
   376  // o sum of counts * count_meanVal, and
   377  // o mean (average)
   378  //
   379  func bucketCalcStat(bucketInfo []BucketInfo) (firstIdx int, maxIdx int, count uint64, sum uint64, mean uint64) {
   380  
   381  	var (
   382  		bigSum     big.Int
   383  		bigMean    big.Int
   384  		bigTmp     big.Int
   385  		bigProduct big.Int
   386  	)
   387  
   388  	// firstIdx is the index of the first bucket with a non-zero count
   389  	// maxIdx is the index + 1 of the last bucket with a non-zero count, or zero
   390  	// bigSum is the running total of count * bucket_meanval
   391  	firstIdx = 0
   392  	maxIdx = 0
   393  	for i := 0; i < len(bucketInfo); i += 1 {
   394  		if bucketInfo[i].Count > 0 {
   395  			firstIdx = i
   396  			break
   397  		}
   398  	}
   399  	for i := firstIdx; i < len(bucketInfo); i += 1 {
   400  		count += bucketInfo[i].Count
   401  
   402  		bigTmp.SetUint64(bucketInfo[i].Count)
   403  		bigProduct.SetUint64(bucketInfo[i].MeanVal)
   404  		bigProduct.Mul(&bigProduct, &bigTmp)
   405  		bigSum.Add(&bigSum, &bigProduct)
   406  
   407  		if bucketInfo[i].Count > 0 {
   408  			maxIdx = i + 1
   409  		}
   410  	}
   411  	if count > 0 {
   412  		bigTmp.SetUint64(count)
   413  		bigMean.Div(&bigSum, &bigTmp)
   414  	}
   415  
   416  	// sum will be set to math.MaxUint64 if bigSum overflows
   417  	mean = bigMean.Uint64()
   418  	sum = bigSum.Uint64()
   419  
   420  	return
   421  }
   422  
   423  // Return a string with the bucketized statistic content in the specified format.
   424  //
   425  func bucketSprint(statFmt StatStringFormat, pkgName string, statsGroupName string, fieldName string,
   426  	bucketInfo []BucketInfo) string {
   427  
   428  	var (
   429  		idx        int
   430  		statName   string
   431  		bucketName string
   432  	)
   433  	firstIdx, maxIdx, count, sum, mean := bucketCalcStat(bucketInfo)
   434  	statName = statisticName(statFmt, pkgName, statsGroupName, fieldName)
   435  
   436  	switch statFmt {
   437  
   438  	case StatFormatParsable1:
   439  		line := fmt.Sprintf("%s avg:%d count:%d total:%d", statName, mean, count, sum)
   440  
   441  		// bucket names are printed as a number upto 3 digits long and
   442  		// as a power of 2 after that
   443  		for idx = firstIdx; idx < maxIdx && bucketInfo[idx].NominalVal < 1024; idx += 1 {
   444  			line += fmt.Sprintf(" %d:%d", bucketInfo[idx].NominalVal, bucketInfo[idx].Count)
   445  		}
   446  		for ; idx < maxIdx; idx += 1 {
   447  			// bucketInfo[3] must exist and its value depends on the base
   448  			if bucketInfo[3].NominalVal == 3 {
   449  				bucketName = bucketNameLogRoot2(bucketInfo[idx].NominalVal)
   450  			} else {
   451  				bucketName = bucketNameLog2(bucketInfo[idx].NominalVal)
   452  			}
   453  			line += fmt.Sprintf(" %s:%d", bucketName, bucketInfo[idx].Count)
   454  		}
   455  		return line + "\n"
   456  	}
   457  
   458  	return fmt.Sprintf("StatisticName '%s': Unknown StatStringFormat: '%v'\n", statName, statFmt)
   459  }
   460  
   461  // Replace illegal characters in names with underbar (`_`)
   462  //
   463  func scrubName(name string) string {
   464  
   465  	// Names should include only pritable characters that are not
   466  	// whitespace.  Also disallow splat ('*') (used for wildcard for
   467  	// statistic group names), sharp ('#') (used for comments in output) and
   468  	// colon (':') (used as a delimiter in "key:value" output).
   469  	replaceChar := func(r rune) rune {
   470  		switch {
   471  		case unicode.IsSpace(r):
   472  			return '_'
   473  		case !unicode.IsPrint(r):
   474  			return '_'
   475  		case r == '*':
   476  			return '_'
   477  		case r == ':':
   478  			return '_'
   479  		case r == '#':
   480  			return '_'
   481  		}
   482  		return r
   483  	}
   484  
   485  	return strings.Map(replaceChar, name)
   486  }