github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/bucketstats/impl.go (about)

     1  // The bucketstats Package implements convenient, easy to use, bucketized
     2  // statistics.
     3  
     4  package bucketstats
     5  
     6  import (
     7  	"fmt"
     8  	"math/big"
     9  	"math/bits"
    10  	"reflect"
    11  	"strings"
    12  	"sync"
    13  	"unicode"
    14  )
    15  
    16  var (
    17  	pkgNameToGroupName map[string]map[string]interface{}
    18  	statsNameMapLock   sync.Mutex
    19  )
    20  
    21  // Register a set of statistics, where the statistics are one or more fields in
    22  // the passed structure.
    23  //
    24  func register(pkgName string, statsGroupName string, statsStruct interface{}) {
    25  
    26  	var ok bool
    27  
    28  	if pkgName == "" && statsGroupName == "" {
    29  		panic(fmt.Sprintf("statistics group must have non-empty pkgName or statsGroupName"))
    30  	}
    31  
    32  	// let us reflect upon any statistics fields in statsStruct ...
    33  	//
    34  	// but first verify this is a pointer to a struct
    35  	if reflect.TypeOf(statsStruct).Kind() != reflect.Ptr ||
    36  		reflect.ValueOf(statsStruct).Elem().Type().Kind() != reflect.Struct {
    37  		panic(fmt.Sprintf("statsStruct for statistics group '%s' is (%s), should be (*struct)",
    38  			statsGroupName, reflect.TypeOf(statsStruct)))
    39  	}
    40  
    41  	structAsValue := reflect.ValueOf(statsStruct).Elem()
    42  	structAsType := structAsValue.Type()
    43  
    44  	// find all the statistics fields and init them;
    45  	// assign them a name if they don't have one;
    46  	// verify each name is only used once
    47  	names := make(map[string]struct{})
    48  
    49  	for i := 0; i < structAsType.NumField(); i++ {
    50  		fieldName := structAsType.Field(i).Name
    51  		fieldAsType := structAsType.Field(i).Type
    52  		fieldAsValue := structAsValue.Field(i)
    53  
    54  		// ignore fields that are not a BucketStats type
    55  		var (
    56  			countStat          Total
    57  			averageStat        Average
    58  			bucketLog2Stat     BucketLog2Round
    59  			bucketLogRoot2Stat BucketLogRoot2Round
    60  		)
    61  		if fieldAsType != reflect.TypeOf(countStat) &&
    62  			fieldAsType != reflect.TypeOf(averageStat) &&
    63  			fieldAsType != reflect.TypeOf(bucketLog2Stat) &&
    64  			fieldAsType != reflect.TypeOf(bucketLogRoot2Stat) {
    65  			continue
    66  		}
    67  
    68  		// verify BucketStats fields are setable (exported)
    69  		if !fieldAsValue.CanSet() {
    70  			panic(fmt.Sprintf("statistics group '%s' field %s must be exported to be usable by bucketstats",
    71  				statsGroupName, fieldName))
    72  		}
    73  
    74  		// get the statistic name and insure its initialized;
    75  		// then verify its unique
    76  		statNameValue := fieldAsValue.FieldByName("Name")
    77  		if !statNameValue.IsValid() {
    78  			panic(fmt.Sprintf("statistics Group '%s' field %s does not does not contain a 'Name' field",
    79  				statsGroupName, fieldName))
    80  		}
    81  		if statNameValue.String() == "" {
    82  			statNameValue.SetString(fieldName)
    83  		} else {
    84  			statNameValue.SetString(statNameValue.String())
    85  		}
    86  		_, ok = names[statNameValue.String()]
    87  		if ok {
    88  			panic(fmt.Sprintf("stats '%s' field %s Name '%s' is already in use",
    89  				statsGroupName, fieldName, statNameValue))
    90  		}
    91  		names[statNameValue.String()] = struct{}{}
    92  
    93  		// initialize the statistic (all fields are already zero - unless relaunched in test sequence)
    94  		switch v := (fieldAsValue.Addr().Interface()).(type) {
    95  		case *Total:
    96  		case *Average:
    97  		case *BucketLog2Round:
    98  			if v.NBucket == 0 || v.NBucket > uint(len(v.statBuckets)) {
    99  				v.NBucket = uint(len(v.statBuckets))
   100  			} else if v.NBucket < 10 {
   101  				v.NBucket = 10
   102  			}
   103  		case *BucketLogRoot2Round:
   104  			if v.NBucket == 0 || v.NBucket > uint(len(v.statBuckets)) {
   105  				v.NBucket = uint(len(v.statBuckets))
   106  			} else if v.NBucket < 17 {
   107  				v.NBucket = 17
   108  			}
   109  		default:
   110  			panic(fmt.Sprintf("statistics Group '%s' field %s type '%v' unknown: internal error",
   111  				statsGroupName, fieldName, fieldAsType))
   112  		}
   113  
   114  	}
   115  
   116  	// add statsGroupName to the list of statistics
   117  	statsNameMapLock.Lock()
   118  	defer statsNameMapLock.Unlock()
   119  
   120  	if pkgNameToGroupName == nil {
   121  		pkgNameToGroupName = make(map[string]map[string]interface{})
   122  	}
   123  	if pkgNameToGroupName[pkgName] == nil {
   124  		pkgNameToGroupName[pkgName] = make(map[string]interface{})
   125  	}
   126  
   127  	// check for pre-existence
   128  	if pkgNameToGroupName[pkgName][statsGroupName] != nil {
   129  		panic(fmt.Sprintf("pkgName '%s' with statsGroupName '%s' is already registered",
   130  			pkgName, statsGroupName))
   131  	}
   132  
   133  	pkgNameToGroupName[pkgName][statsGroupName] = statsStruct
   134  
   135  	return
   136  }
   137  
   138  func unRegister(pkgName string, statsGroupName string) {
   139  
   140  	statsNameMapLock.Lock()
   141  	defer statsNameMapLock.Unlock()
   142  
   143  	// remove statsGroupName from the list of statistics (silently ignore it
   144  	// if it doesn't exist)
   145  	if pkgNameToGroupName[pkgName] != nil {
   146  		delete(pkgNameToGroupName[pkgName], statsGroupName)
   147  
   148  		if len(pkgNameToGroupName[pkgName]) == 0 {
   149  			delete(pkgNameToGroupName, pkgName)
   150  		}
   151  	}
   152  
   153  	return
   154  }
   155  
   156  // Return the selected group(s) of statistics as a string.
   157  //
   158  func sprintStats(statFmt StatStringFormat, pkgName string, statsGroupName string) (statValues string) {
   159  
   160  	statsNameMapLock.Lock()
   161  	defer statsNameMapLock.Unlock()
   162  
   163  	var (
   164  		pkgNameMap   map[string]map[string]interface{}
   165  		groupNameMap map[string]interface{}
   166  	)
   167  	if pkgName == "*" {
   168  		pkgNameMap = pkgNameToGroupName
   169  	} else {
   170  		// make a map with a single entry for the pkgName
   171  		pkgNameMap = map[string]map[string]interface{}{pkgName: nil}
   172  	}
   173  
   174  	for pkg := range pkgNameMap {
   175  		if statsGroupName == "*" {
   176  			groupNameMap = pkgNameToGroupName[pkg]
   177  		} else {
   178  			// make a map with a single entry for the statsGroupName
   179  			groupNameMap = map[string]interface{}{statsGroupName: nil}
   180  		}
   181  
   182  		for group := range groupNameMap {
   183  			_, ok := pkgNameToGroupName[pkg][group]
   184  			if !ok {
   185  				panic(fmt.Sprintf(
   186  					"bucketstats.sprintStats(): statistics group '%s.%s' is not registered",
   187  					pkg, group))
   188  			}
   189  			statValues += sprintStatsStruct(statFmt, pkg, group, pkgNameToGroupName[pkg][group])
   190  		}
   191  	}
   192  	return
   193  }
   194  
   195  func sprintStatsStruct(statFmt StatStringFormat, pkgName string, statsGroupName string,
   196  	statsStruct interface{}) (statValues string) {
   197  
   198  	// let us reflect upon any statistic fields in statsStruct ...
   199  	//
   200  	// but first verify this is a pointer to a struct
   201  	if reflect.TypeOf(statsStruct).Kind() != reflect.Ptr ||
   202  		reflect.ValueOf(statsStruct).Elem().Type().Kind() != reflect.Struct {
   203  		panic(fmt.Sprintf("statsStruct for statistics group '%s' is (%s), should be (*struct)",
   204  			statsGroupName, reflect.TypeOf(statsStruct)))
   205  	}
   206  
   207  	structAsValue := reflect.ValueOf(statsStruct).Elem()
   208  	structAsType := structAsValue.Type()
   209  
   210  	// find all the statistics fields and sprint them
   211  	for i := 0; i < structAsType.NumField(); i++ {
   212  		fieldAsType := structAsType.Field(i).Type
   213  		fieldAsValue := structAsValue.Field(i)
   214  
   215  		// ignore fields that are not a BucketStats type
   216  		var (
   217  			countStat          Total
   218  			averageStat        Average
   219  			bucketLog2Stat     BucketLog2Round
   220  			bucketLogRoot2Stat BucketLogRoot2Round
   221  		)
   222  		if fieldAsType != reflect.TypeOf(countStat) &&
   223  			fieldAsType != reflect.TypeOf(averageStat) &&
   224  			fieldAsType != reflect.TypeOf(bucketLog2Stat) &&
   225  			fieldAsType != reflect.TypeOf(bucketLogRoot2Stat) {
   226  			continue
   227  		}
   228  
   229  		switch v := (fieldAsValue.Addr().Interface()).(type) {
   230  		case *Total:
   231  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   232  		case *Average:
   233  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   234  		case *BucketLog2Round:
   235  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   236  		case *BucketLogRoot2Round:
   237  			statValues += v.Sprint(statFmt, pkgName, statsGroupName)
   238  		default:
   239  			panic(fmt.Sprintf("Unknown type in struct: %s", fieldAsType.Name()))
   240  		}
   241  	}
   242  	return
   243  }
   244  
   245  // Construct and return a statistics name (fully qualified field name) in the specified format.
   246  //
   247  func statisticName(statFmt StatStringFormat, pkgName string, statsGroupName string, fieldName string) string {
   248  
   249  	switch statFmt {
   250  
   251  	default:
   252  		panic(fmt.Sprintf("Unknown StatStringFormat '%v'", statFmt))
   253  
   254  	case StatFormatParsable1:
   255  		pkgName = scrubName(pkgName)
   256  		statsGroupName = scrubName(statsGroupName)
   257  		fieldName = scrubName(fieldName)
   258  
   259  		if pkgName == "" && statsGroupName == "" {
   260  			return fieldName
   261  		}
   262  		if pkgName == "" {
   263  			return statsGroupName + "." + fieldName
   264  		}
   265  		if statsGroupName == "" {
   266  			return pkgName + "." + fieldName
   267  		}
   268  		return pkgName + "." + statsGroupName + "." + fieldName
   269  	}
   270  }
   271  
   272  // Return the "name" of the bucket that would hold 'n' as the string "2^x".
   273  //
   274  func bucketNameLog2(value uint64) string {
   275  
   276  	var idx uint
   277  	if value < 256 {
   278  		idx = uint(log2RoundIdxTable[value])
   279  	} else {
   280  		bits := uint(bits.Len64(value))
   281  		baseIdx := uint(log2RoundIdxTable[value>>(bits-8)])
   282  		idx = baseIdx + bits - 8
   283  	}
   284  	return fmt.Sprintf("2^%d", idx-1)
   285  }
   286  
   287  // Return the "name" of the bucket that would hold 'n' as the string "2^x",
   288  // where x can have the suffix ".5" as in "2^7.5".
   289  //
   290  func bucketNameLogRoot2(value uint64) string {
   291  
   292  	var idx uint
   293  	if value < 256 {
   294  		idx = uint(log2RoundIdxTable[value])
   295  	} else {
   296  		bits := uint(bits.Len64(value))
   297  		baseIdx := uint(logRoot2RoundIdxTable[value>>(bits-8)])
   298  		idx = baseIdx + (bits-8)*2
   299  	}
   300  	if idx%2 == 1 {
   301  		return fmt.Sprintf("2^%1.0f", float32(idx-1)/2)
   302  	}
   303  	return fmt.Sprintf("2^%1.1f", float32(idx-1)/2)
   304  }
   305  
   306  // Return a string with the statistic's value in the specified format.
   307  //
   308  func (this *Total) sprint(statFmt StatStringFormat, pkgName string, statsGroupName string) string {
   309  
   310  	statName := statisticName(statFmt, pkgName, statsGroupName, this.Name)
   311  
   312  	switch statFmt {
   313  	case StatFormatParsable1:
   314  		return fmt.Sprintf("%s total:%d\n", statName, this.total)
   315  	}
   316  
   317  	return fmt.Sprintf("statName '%s': Unknown StatStringFormat: '%v'\n", statName, statFmt)
   318  }
   319  
   320  // Return a string with the statistic's value in the specified format.
   321  //
   322  func (this *Average) sprint(statFmt StatStringFormat, pkgName string, statsGroupName string) string {
   323  
   324  	statName := statisticName(statFmt, pkgName, statsGroupName, this.Name)
   325  	var avg uint64
   326  	if this.count > 0 {
   327  		avg = this.total / this.count
   328  	}
   329  
   330  	switch statFmt {
   331  	case StatFormatParsable1:
   332  		return fmt.Sprintf("%s avg:%d count:%d total:%d\n",
   333  			statName, avg, this.count, this.total)
   334  	}
   335  
   336  	return fmt.Sprintf("statName '%s': Unknown StatStringFormat: '%v'\n", statName, statFmt)
   337  }
   338  
   339  // The canonical distribution for a bucketized statistic is an array of BucketInfo.
   340  // Create one based on the information for this bucketstat .
   341  //
   342  func bucketDistMake(nBucket uint, statBuckets []uint32, bucketInfoBase []BucketInfo) []BucketInfo {
   343  
   344  	// copy the base []BucketInfo before modifying it
   345  	bucketInfo := make([]BucketInfo, nBucket, nBucket)
   346  	copy(bucketInfo, bucketInfoBase[0:nBucket])
   347  	for i := uint(0); i < nBucket; i += 1 {
   348  		bucketInfo[i].Count = uint64(statBuckets[i])
   349  	}
   350  
   351  	// if nBucket is less then len(bucketInfo) then update the range and
   352  	// average for the last bucket that's used
   353  	if nBucket < uint(len(bucketInfoBase)) {
   354  		bucketInfo[nBucket-1].RangeHigh = bucketInfo[len(bucketInfo)-1].RangeHigh
   355  
   356  		mean := bucketInfo[nBucket-1].RangeLow / 2
   357  		mean += bucketInfo[nBucket-1].RangeHigh / 2
   358  		bothOdd := bucketInfo[nBucket-1].RangeLow & bucketInfo[nBucket-1].RangeHigh & 0x1
   359  		if bothOdd == 1 {
   360  			mean += 1
   361  		}
   362  		bucketInfo[nBucket-1].MeanVal = mean
   363  	}
   364  	return bucketInfo
   365  }
   366  
   367  // Given the distribution ([]BucketInfo) for a bucketized statistic, calculate:
   368  //
   369  // o the index of the first entry with a non-zero count
   370  // o the index + 1 of the last entry with a non-zero count, or zero if no such
   371  //   bucket exists
   372  // o the count (number things in buckets)
   373  // o sum of counts * count_meanVal, and
   374  // o mean (average)
   375  //
   376  func bucketCalcStat(bucketInfo []BucketInfo) (firstIdx int, maxIdx int, count uint64, sum uint64, mean uint64) {
   377  
   378  	var (
   379  		bigSum     big.Int
   380  		bigMean    big.Int
   381  		bigTmp     big.Int
   382  		bigProduct big.Int
   383  	)
   384  
   385  	// firstIdx is the index of the first bucket with a non-zero count
   386  	// maxIdx is the index + 1 of the last bucket with a non-zero count, or zero
   387  	// bigSum is the running total of count * bucket_meanval
   388  	firstIdx = 0
   389  	maxIdx = 0
   390  	for i := 0; i < len(bucketInfo); i += 1 {
   391  		if bucketInfo[i].Count > 0 {
   392  			firstIdx = i
   393  			break
   394  		}
   395  	}
   396  	for i := firstIdx; i < len(bucketInfo); i += 1 {
   397  		count += bucketInfo[i].Count
   398  
   399  		bigTmp.SetUint64(bucketInfo[i].Count)
   400  		bigProduct.SetUint64(bucketInfo[i].MeanVal)
   401  		bigProduct.Mul(&bigProduct, &bigTmp)
   402  		bigSum.Add(&bigSum, &bigProduct)
   403  
   404  		if bucketInfo[i].Count > 0 {
   405  			maxIdx = i + 1
   406  		}
   407  	}
   408  	if count > 0 {
   409  		bigTmp.SetUint64(count)
   410  		bigMean.Div(&bigSum, &bigTmp)
   411  	}
   412  
   413  	// sum will be set to math.MaxUint64 if bigSum overflows
   414  	mean = bigMean.Uint64()
   415  	sum = bigSum.Uint64()
   416  
   417  	return
   418  }
   419  
   420  // Return a string with the bucketized statistic content in the specified format.
   421  //
   422  func bucketSprint(statFmt StatStringFormat, pkgName string, statsGroupName string, fieldName string,
   423  	bucketInfo []BucketInfo) string {
   424  
   425  	var (
   426  		idx        int
   427  		statName   string
   428  		bucketName string
   429  	)
   430  	firstIdx, maxIdx, count, sum, mean := bucketCalcStat(bucketInfo)
   431  	statName = statisticName(statFmt, pkgName, statsGroupName, fieldName)
   432  
   433  	switch statFmt {
   434  
   435  	case StatFormatParsable1:
   436  		line := fmt.Sprintf("%s avg:%d count:%d total:%d", statName, mean, count, sum)
   437  
   438  		// bucket names are printed as a number upto 3 digits long and
   439  		// as a power of 2 after that
   440  		for idx = firstIdx; idx < maxIdx && bucketInfo[idx].NominalVal < 1024; idx += 1 {
   441  			line += fmt.Sprintf(" %d:%d", bucketInfo[idx].NominalVal, bucketInfo[idx].Count)
   442  		}
   443  		for ; idx < maxIdx; idx += 1 {
   444  			// bucketInfo[3] must exist and its value depends on the base
   445  			if bucketInfo[3].NominalVal == 3 {
   446  				bucketName = bucketNameLogRoot2(bucketInfo[idx].NominalVal)
   447  			} else {
   448  				bucketName = bucketNameLog2(bucketInfo[idx].NominalVal)
   449  			}
   450  			line += fmt.Sprintf(" %s:%d", bucketName, bucketInfo[idx].Count)
   451  		}
   452  		return line + "\n"
   453  	}
   454  
   455  	return fmt.Sprintf("StatisticName '%s': Unknown StatStringFormat: '%v'\n", statName, statFmt)
   456  }
   457  
   458  // Replace illegal characters in names with underbar (`_`)
   459  //
   460  func scrubName(name string) string {
   461  
   462  	// Names should include only pritable characters that are not
   463  	// whitespace.  Also disallow splat ('*') (used for wildcard for
   464  	// statistic group names), sharp ('#') (used for comments in output) and
   465  	// colon (':') (used as a delimiter in "key:value" output).
   466  	replaceChar := func(r rune) rune {
   467  		switch {
   468  		case unicode.IsSpace(r):
   469  			return '_'
   470  		case !unicode.IsPrint(r):
   471  			return '_'
   472  		case r == '*':
   473  			return '_'
   474  		case r == ':':
   475  			return '_'
   476  		case r == '#':
   477  			return '_'
   478  		}
   479  		return r
   480  	}
   481  
   482  	return strings.Map(replaceChar, name)
   483  }