github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/model/flow/identifier.go (about)

     1  package flow
     2  
     3  import (
     4  	"encoding/binary"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"reflect"
     8  
     9  	"github.com/ipfs/go-cid"
    10  	mh "github.com/multiformats/go-multihash"
    11  
    12  	"github.com/onflow/crypto"
    13  	"github.com/onflow/crypto/hash"
    14  
    15  	"github.com/onflow/flow-go/model/fingerprint"
    16  	"github.com/onflow/flow-go/storage/merkle"
    17  	"github.com/onflow/flow-go/utils/rand"
    18  )
    19  
    20  const IdentifierLen = 32
    21  
    22  // Identifier represents a 32-byte unique identifier for an entity.
    23  type Identifier [IdentifierLen]byte
    24  
    25  // IdentifierFilter is a filter on identifiers.
    26  type IdentifierFilter func(Identifier) bool
    27  
    28  // IdentifierOrder is an order function for identifiers.
    29  //
    30  // It defines a strict weak ordering between identifiers.
    31  // It returns a negative number if the first identifier is "strictly less" than the second,
    32  // a positive number if the second identifier is "strictly less" than the first,
    33  // and zero if the two identifiers are equal.
    34  //
    35  // `IdentifierOrder` can be used to sort identifiers with
    36  // https://pkg.go.dev/golang.org/x/exp/slices#SortFunc.
    37  type IdentifierOrder func(Identifier, Identifier) int
    38  
    39  var (
    40  	// ZeroID is the lowest value in the 32-byte ID space.
    41  	ZeroID = Identifier{}
    42  )
    43  
    44  // HexStringToIdentifier converts a hex string to an identifier. The input
    45  // must be 64 characters long and contain only valid hex characters.
    46  func HexStringToIdentifier(hexString string) (Identifier, error) {
    47  	var identifier Identifier
    48  	i, err := hex.Decode(identifier[:], []byte(hexString))
    49  	if err != nil {
    50  		return identifier, err
    51  	}
    52  	if i != 32 {
    53  		return identifier, fmt.Errorf("malformed input, expected 32 bytes (64 characters), decoded %d", i)
    54  	}
    55  	return identifier, nil
    56  }
    57  
    58  func MustHexStringToIdentifier(hexString string) Identifier {
    59  	id, err := HexStringToIdentifier(hexString)
    60  	if err != nil {
    61  		panic(err)
    62  	}
    63  	return id
    64  }
    65  
    66  // String returns the hex string representation of the identifier.
    67  func (id Identifier) String() string {
    68  	return hex.EncodeToString(id[:])
    69  }
    70  
    71  // Format handles formatting of id for different verbs. This is called when
    72  // formatting an identifier with fmt.
    73  func (id Identifier) Format(state fmt.State, verb rune) {
    74  	switch verb {
    75  	case 'x', 's', 'v':
    76  		_, _ = state.Write([]byte(id.String()))
    77  	default:
    78  		_, _ = state.Write([]byte(fmt.Sprintf("%%!%c(%s=%s)", verb, reflect.TypeOf(id), id)))
    79  	}
    80  }
    81  
    82  // IsSampled is a utility method to sample entities based on their ids
    83  // the range is from [0, 64].
    84  // 0 is 100% (all data will be collected)
    85  // 1 is ~50%
    86  // 2 is ~25%
    87  // 3 is ~12.5%
    88  // ...
    89  // >64 is 0% (no data will be collected)
    90  func (id Identifier) IsSampled(sensitivity uint) bool {
    91  	if sensitivity > 64 {
    92  		return false
    93  	}
    94  	// take the first 8 bytes and check the first few bits based on sensitivity
    95  	// higher sensitivity means more bits has to be zero, means less number of samples
    96  	// sensitivity of zero, means everything is sampled
    97  	return binary.BigEndian.Uint64(id[:8])>>uint64(64-sensitivity) == 0
    98  }
    99  
   100  func (id Identifier) MarshalText() ([]byte, error) {
   101  	return []byte(id.String()), nil
   102  }
   103  
   104  func (id *Identifier) UnmarshalText(text []byte) error {
   105  	var err error
   106  	*id, err = HexStringToIdentifier(string(text))
   107  	return err
   108  }
   109  
   110  func HashToID(hash []byte) Identifier {
   111  	var id Identifier
   112  	copy(id[:], hash)
   113  	return id
   114  }
   115  
   116  // MakeID creates an ID from a hash of encoded data. MakeID uses `model.Fingerprint() []byte` to get the byte
   117  // representation of the entity, which uses RLP to encode the data. If the input defines its own canonical encoding by
   118  // implementing Fingerprinter, it uses that instead. That allows removal of non-unique fields from structs or
   119  // overwriting of the used encoder. We are using Fingerprint instead of the default encoding for two reasons: a) JSON
   120  // (the default encoding) does not specify an order for the elements of arrays and objects, which could lead to
   121  // different hashes depending on the JSON implementation and b) the Fingerprinter interface allows to exclude fields not
   122  // needed in the pre-image of the hash that comprises the Identifier, which could be different from the encoding for
   123  // sending entities in messages or for storing them.
   124  func MakeID(entity interface{}) Identifier {
   125  	// collect fingerprint of the entity
   126  	data := fingerprint.Fingerprint(entity)
   127  	// make ID from fingerprint
   128  	return MakeIDFromFingerPrint(data)
   129  }
   130  
   131  // MakeIDFromFingerPrint is similar to MakeID but skipping fingerprinting step.
   132  func MakeIDFromFingerPrint(fingerPrint []byte) Identifier {
   133  	var id Identifier
   134  	hash.ComputeSHA3_256((*[hash.HashLenSHA3_256]byte)(&id), fingerPrint)
   135  	return id
   136  }
   137  
   138  // PublicKeyToID creates an ID from a public key.
   139  func PublicKeyToID(pk crypto.PublicKey) (Identifier, error) {
   140  	var id Identifier
   141  	pkBytes := pk.Encode()
   142  	hash.ComputeSHA3_256((*[32]byte)(&id), pkBytes)
   143  	return id, nil
   144  }
   145  
   146  // GetIDs gets the IDs for a slice of entities.
   147  func GetIDs[T Entity](entities []T) IdentifierList {
   148  	ids := make([]Identifier, 0, len(entities))
   149  	for _, entity := range entities {
   150  		ids = append(ids, entity.ID())
   151  	}
   152  	return ids
   153  }
   154  
   155  func MerkleRoot(ids ...Identifier) Identifier {
   156  	var root Identifier
   157  	tree, _ := merkle.NewTree(IdentifierLen) // we verify in a unit test that constructor does not error for this paramter
   158  	for i, id := range ids {
   159  		val := make([]byte, 8)
   160  		binary.BigEndian.PutUint64(val, uint64(i))
   161  		_, _ = tree.Put(id[:], val) // Tree copies keys and values internally
   162  		// `Put` only errors for keys whose length does not conform to the pre-configured length. As
   163  		// Identifiers are fixed-sized arrays, errors are impossible here, which we also verify in a unit test.
   164  	}
   165  	hash := tree.Hash()
   166  	copy(root[:], hash)
   167  	return root
   168  }
   169  
   170  func CheckMerkleRoot(root Identifier, ids ...Identifier) bool {
   171  	computed := MerkleRoot(ids...)
   172  	return root == computed
   173  }
   174  
   175  func ConcatSum(ids ...Identifier) Identifier {
   176  	hasher := hash.NewSHA3_256()
   177  	for _, id := range ids {
   178  		_, _ = hasher.Write(id[:])
   179  	}
   180  	hash := hasher.SumHash()
   181  	return HashToID(hash)
   182  }
   183  
   184  func CheckConcatSum(sum Identifier, fps ...Identifier) bool {
   185  	computed := ConcatSum(fps...)
   186  	return sum == computed
   187  }
   188  
   189  // Sample returns non-deterministic random sample of length 'size' of the ids
   190  func Sample(size uint, ids ...Identifier) ([]Identifier, error) {
   191  	n := uint(len(ids))
   192  	dup := make([]Identifier, 0, n)
   193  	dup = append(dup, ids...)
   194  	// if sample size is greater than total size, return all the elements
   195  	if n <= size {
   196  		return dup, nil
   197  	}
   198  	swap := func(i, j uint) {
   199  		dup[i], dup[j] = dup[j], dup[i]
   200  	}
   201  
   202  	err := rand.Samples(n, size, swap)
   203  	if err != nil {
   204  		return nil, fmt.Errorf("generating randoms failed: %w", err)
   205  	}
   206  	return dup[:size], nil
   207  }
   208  
   209  func CidToId(c cid.Cid) (Identifier, error) {
   210  	decoded, err := mh.Decode(c.Hash())
   211  	if err != nil {
   212  		return ZeroID, fmt.Errorf("failed to decode CID: %w", err)
   213  	}
   214  
   215  	if decoded.Code != mh.SHA2_256 {
   216  		return ZeroID, fmt.Errorf("unsupported CID hash function: %v", decoded.Name)
   217  	}
   218  	if decoded.Length != IdentifierLen {
   219  		return ZeroID, fmt.Errorf("invalid CID length: %d", decoded.Length)
   220  	}
   221  
   222  	return HashToID(decoded.Digest), nil
   223  }
   224  
   225  func IdToCid(f Identifier) cid.Cid {
   226  	hash, _ := mh.Encode(f[:], mh.SHA2_256)
   227  	return cid.NewCidV0(hash)
   228  }
   229  
   230  func ByteSliceToId(b []byte) (Identifier, error) {
   231  	var id Identifier
   232  	if len(b) != IdentifierLen {
   233  		return id, fmt.Errorf("illegal length for a flow identifier %x: got: %d, expected: %d", b, len(b), IdentifierLen)
   234  	}
   235  
   236  	copy(id[:], b[:])
   237  
   238  	return id, nil
   239  }
   240  
   241  func ByteSlicesToIds(b [][]byte) (IdentifierList, error) {
   242  	total := len(b)
   243  	ids := make(IdentifierList, total)
   244  
   245  	for i := 0; i < total; i++ {
   246  		id, err := ByteSliceToId(b[i])
   247  		if err != nil {
   248  			return nil, err
   249  		}
   250  
   251  		ids[i] = id
   252  	}
   253  
   254  	return ids, nil
   255  }
   256  
   257  func IdsToBytes(identifiers []Identifier) [][]byte {
   258  	var byteIds [][]byte
   259  	for _, id := range identifiers {
   260  		tempID := id // avoid capturing loop variable
   261  		byteIds = append(byteIds, tempID[:])
   262  	}
   263  
   264  	return byteIds
   265  }