github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/sharding/hash.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package sharding
    22  
    23  import (
    24  	"fmt"
    25  	"strings"
    26  
    27  	"github.com/m3db/m3/src/metrics/metric/id"
    28  
    29  	murmur3 "github.com/m3db/stackmurmur3/v2"
    30  )
    31  
    32  const (
    33  	initialChunkedIDSize = 512
    34  )
    35  
    36  // ShardFn maps a id to a shard.
    37  type ShardFn func(id []byte, numShards uint32) uint32
    38  
    39  // AggregatedShardFn maps a chunked id to a shard.
    40  type AggregatedShardFn func(chunkedID id.ChunkedID, numShards int) uint32
    41  
    42  // HashType is the hashing type.
    43  type HashType string
    44  
    45  // List of supported hashing types.
    46  const (
    47  	// Murmur32Hash represents the murmur3 hash.
    48  	Murmur32Hash HashType = "murmur32"
    49  
    50  	// zeroHash always returns 0 as the hash. It is used when sharding is disabled.
    51  	zeroHash HashType = "zero"
    52  
    53  	DefaultHash = Murmur32Hash
    54  )
    55  
    56  var (
    57  	validHashTypes = []HashType{
    58  		Murmur32Hash,
    59  	}
    60  )
    61  
    62  // UnmarshalYAML unmarshals YAML object into a hash type.
    63  func (t *HashType) UnmarshalYAML(unmarshal func(interface{}) error) error {
    64  	var str string
    65  	if err := unmarshal(&str); err != nil {
    66  		return err
    67  	}
    68  	if str == "" {
    69  		*t = DefaultHash
    70  		return nil
    71  	}
    72  	validTypes := make([]string, 0, len(validHashTypes))
    73  	for _, valid := range validHashTypes {
    74  		if str == string(valid) {
    75  			*t = valid
    76  			return nil
    77  		}
    78  		validTypes = append(validTypes, string(valid))
    79  	}
    80  	return fmt.Errorf("invalid hash type '%s' valid types are: %s",
    81  		str, strings.Join(validTypes, ", "))
    82  }
    83  
    84  // ShardFn returns the sharding function.
    85  func (t HashType) ShardFn() (ShardFn, error) {
    86  	switch t {
    87  	case Murmur32Hash:
    88  		return func(id []byte, numShards uint32) uint32 {
    89  			return murmur3.Sum32(id) % numShards
    90  		}, nil
    91  	default:
    92  		return nil, fmt.Errorf("unrecognized hashing type %v", t)
    93  	}
    94  }
    95  
    96  // MustShardFn returns the sharding function, or panics if an error is encountered.
    97  func (t HashType) MustShardFn() ShardFn {
    98  	fn, err := t.ShardFn()
    99  	if err != nil {
   100  		panic(fmt.Errorf("error creating shard fn: %v", err))
   101  	}
   102  	return fn
   103  }
   104  
   105  // AggregatedShardFn returns the sharding function for computing aggregated shards.
   106  func (t HashType) AggregatedShardFn() (AggregatedShardFn, error) {
   107  	switch t {
   108  	case Murmur32Hash:
   109  		// NB(xichen): This function only allocates when the id of the aggregated metric
   110  		// is more than initialChunkedIDSize in size and requires zero allocation otherwise.
   111  		// If this turns out to be still too CPU intensive due to byte copies, can rewrite
   112  		// it to compute murmur3 hashes with zero byte copies.
   113  		return func(chunkedID id.ChunkedID, numShards int) uint32 {
   114  			var b [initialChunkedIDSize]byte
   115  			buf := b[:0]
   116  			buf = append(buf, chunkedID.Prefix...)
   117  			buf = append(buf, chunkedID.Data...)
   118  			buf = append(buf, chunkedID.Suffix...)
   119  			return murmur3.Sum32(buf) % uint32(numShards)
   120  		}, nil
   121  	case zeroHash:
   122  		return func(chunkedID id.ChunkedID, numShards int) uint32 {
   123  			return 0
   124  		}, nil
   125  	default:
   126  		return nil, fmt.Errorf("unrecognized hashing type %v", t)
   127  	}
   128  }