github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/cos/cksum.go (about)

     1  // Package cos provides common low-level types and utilities for all aistore projects
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package cos
     6  
     7  import (
     8  	"crypto/md5"
     9  	"crypto/sha256"
    10  	"crypto/sha512"
    11  	"encoding"
    12  	"encoding/hex"
    13  	"fmt"
    14  	"hash"
    15  	"hash/crc32"
    16  	"io"
    17  	"sort"
    18  
    19  	"github.com/OneOfOne/xxhash"
    20  	jsoniter "github.com/json-iterator/go"
    21  )
    22  
    23  // NOTE: not supporting SHA-3 family is its current golang.org/x/crypto/sha3 source
    24  //       doesn't implement BinaryMarshaler & BinaryUnmarshaler interfaces
    25  //       (see also https://golang.org/pkg/encoding)
    26  
    27  // checksums
    28  const (
    29  	ChecksumNone   = "none"
    30  	ChecksumXXHash = "xxhash"
    31  	ChecksumMD5    = "md5"
    32  	ChecksumCRC32C = "crc32c"
    33  	ChecksumSHA256 = "sha256" // crypto.SHA512_256 (SHA-2)
    34  	ChecksumSHA512 = "sha512" // crypto.SHA512 (SHA-2)
    35  )
    36  
    37  const (
    38  	badDataCksumPrefix = "BAD DATA CHECKSUM:"
    39  	badMetaCksumPrefix = "BAD META CHECKSUM:"
    40  )
    41  
    42  type (
    43  	noopHash struct{}
    44  
    45  	ErrBadCksum struct {
    46  		prefix  string
    47  		a, b    any
    48  		context string
    49  	}
    50  	Cksum struct {
    51  		ty    string `json:"-"` // Without "json" tag, IterFields function panics
    52  		value string `json:"-"`
    53  	}
    54  	CksumHash struct {
    55  		Cksum
    56  		H   hash.Hash
    57  		sum []byte
    58  	}
    59  	CksumHashSize struct {
    60  		CksumHash
    61  		Size int64
    62  	}
    63  )
    64  
    65  var checksums = StrSet{
    66  	ChecksumNone:   {},
    67  	ChecksumXXHash: {},
    68  	ChecksumMD5:    {},
    69  	ChecksumCRC32C: {},
    70  	ChecksumSHA256: {},
    71  	ChecksumSHA512: {},
    72  }
    73  
    74  // interface guard
    75  var (
    76  	_ hash.Hash                  = (*noopHash)(nil)
    77  	_ encoding.BinaryUnmarshaler = (*noopHash)(nil)
    78  	_ encoding.BinaryUnmarshaler = (*noopHash)(nil)
    79  
    80  	_ io.Writer = (*CksumHashSize)(nil)
    81  )
    82  
    83  var NoneCksum = NewCksum(ChecksumNone, "")
    84  
    85  ///////////////
    86  // CksumHash //
    87  ///////////////
    88  
    89  func NewCksumHash(ty string) (ck *CksumHash) {
    90  	ck = &CksumHash{}
    91  	ck.Init(ty)
    92  	return
    93  }
    94  
    95  func (ck *Cksum) MarshalJSON() ([]byte, error) {
    96  	if ck == nil {
    97  		return nil, nil
    98  	}
    99  	return jsoniter.Marshal(struct {
   100  		Type  string `json:"type"`
   101  		Value string `json:"value"`
   102  	}{Type: ck.ty, Value: ck.value})
   103  }
   104  
   105  func (ck *CksumHash) Init(ty string) {
   106  	Assert(ck.H == nil)
   107  	ck.ty = ty
   108  	switch ty {
   109  	case ChecksumNone, "":
   110  		ck.ty, ck.H = ChecksumNone, newNoopHash()
   111  	case ChecksumXXHash:
   112  		ck.H = xxhash.New64()
   113  	case ChecksumMD5:
   114  		ck.H = md5.New()
   115  	case ChecksumCRC32C:
   116  		ck.H = NewCRC32C()
   117  	case ChecksumSHA256:
   118  		ck.H = sha256.New()
   119  	case ChecksumSHA512:
   120  		ck.H = sha512.New()
   121  	default:
   122  		AssertMsg(false, "unknown checksum type: "+ty)
   123  	}
   124  }
   125  
   126  func (ck *CksumHash) Equal(to *Cksum) bool { return ck.Cksum.Equal(to) }
   127  func (ck *CksumHash) Sum() []byte          { return ck.sum }
   128  
   129  func (ck *CksumHash) Finalize() {
   130  	ck.sum = ck.H.Sum(nil)
   131  	ck.value = hex.EncodeToString(ck.sum)
   132  }
   133  
   134  ///////////////////
   135  // CksumHashSize //
   136  ///////////////////
   137  
   138  func (ck *CksumHashSize) Write(b []byte) (n int, err error) {
   139  	n, err = ck.H.Write(b)
   140  	ck.Size += int64(n)
   141  	return
   142  }
   143  
   144  ///////////
   145  // Cksum //
   146  ///////////
   147  
   148  func (ck *Cksum) IsEmpty() bool { return ck == nil || ck.ty == "" || ck.ty == ChecksumNone }
   149  
   150  func NewCksum(ty, value string) *Cksum {
   151  	if err := ValidateCksumType(ty, true /*empty OK*/); err != nil {
   152  		AssertMsg(false, err.Error())
   153  	}
   154  	if ty == "" {
   155  		Assert(value == "")
   156  	}
   157  	return &Cksum{ty, value}
   158  }
   159  
   160  func (ck *Cksum) Equal(to *Cksum) bool {
   161  	if ck.IsEmpty() || to.IsEmpty() {
   162  		return false
   163  	}
   164  	return ck.ty == to.ty && ck.value == to.value
   165  }
   166  
   167  func (ck *Cksum) Get() (string, string) {
   168  	if ck == nil {
   169  		return ChecksumNone, ""
   170  	}
   171  	return ck.ty, ck.value
   172  }
   173  
   174  func (ck *Cksum) Ty() string { return ck.ty }
   175  
   176  func (ck *Cksum) Type() string {
   177  	if ck == nil {
   178  		return ChecksumNone
   179  	}
   180  	return ck.ty
   181  }
   182  
   183  func (ck *Cksum) Val() string { return ck.value }
   184  
   185  func (ck *Cksum) Value() string {
   186  	if ck == nil {
   187  		return ""
   188  	}
   189  	return ck.value
   190  }
   191  
   192  func (ck *Cksum) Clone() *Cksum {
   193  	return &Cksum{value: ck.Value(), ty: ck.Type()}
   194  }
   195  
   196  func (ck *Cksum) String() string {
   197  	if ck == nil {
   198  		return "checksum <nil>"
   199  	}
   200  	if ck.ty == "" || ck.ty == ChecksumNone {
   201  		return "checksum <none>"
   202  	}
   203  	return ck.ty + "[" + SHead(ck.value) + "]"
   204  }
   205  
   206  //
   207  // helpers
   208  //
   209  
   210  func NewCRC32C() hash.Hash {
   211  	return crc32.New(crc32.MakeTable(crc32.Castagnoli))
   212  }
   213  
   214  func SupportedChecksums() (types []string) {
   215  	types = make([]string, 0, len(checksums))
   216  	for ty := range checksums {
   217  		types = append(types, ty)
   218  	}
   219  	sort.Strings(types)
   220  	for i := range types {
   221  		if types[i] == ChecksumNone {
   222  			copy(types[i:], types[i+1:])
   223  			types[len(types)-1] = ChecksumNone
   224  		}
   225  	}
   226  	return
   227  }
   228  
   229  func ValidateCksumType(ty string, emptyOK ...bool) (err error) {
   230  	if ty == "" && len(emptyOK) > 0 && emptyOK[0] {
   231  		return
   232  	}
   233  	if !checksums.Contains(ty) {
   234  		err = fmt.Errorf("invalid checksum type %q (expecting %v)", ty, SupportedChecksums())
   235  	}
   236  	return
   237  }
   238  
   239  //
   240  // noopHash
   241  //
   242  
   243  func newNoopHash() hash.Hash                     { return &noopHash{} }
   244  func (*noopHash) Write(b []byte) (int, error)    { return len(b), nil }
   245  func (*noopHash) Sum([]byte) []byte              { return nil }
   246  func (*noopHash) Reset()                         {}
   247  func (*noopHash) Size() int                      { return 0 }
   248  func (*noopHash) BlockSize() int                 { return KiB }
   249  func (*noopHash) MarshalBinary() ([]byte, error) { return nil, nil }
   250  func (*noopHash) UnmarshalBinary([]byte) error   { return nil }
   251  
   252  //
   253  // errors
   254  //
   255  
   256  func NewErrDataCksum(a, b *Cksum, context ...string) error {
   257  	ctx := ""
   258  	if len(context) > 0 {
   259  		ctx = context[0]
   260  	}
   261  	return &ErrBadCksum{prefix: badDataCksumPrefix, a: a, b: b, context: ctx}
   262  }
   263  
   264  func NewErrMetaCksum(a, b uint64, context ...string) error {
   265  	ctx := ""
   266  	if len(context) > 0 {
   267  		ctx = context[0]
   268  	}
   269  	return &ErrBadCksum{prefix: badMetaCksumPrefix, a: a, b: b, context: ctx}
   270  }
   271  
   272  func (e *ErrBadCksum) Error() string {
   273  	var context string
   274  	if e.context != "" {
   275  		context = " (context: " + e.context + ")"
   276  	}
   277  	cka, ok1 := e.a.(*Cksum)
   278  	ckb, ok2 := e.b.(*Cksum)
   279  	if ok1 && ok2 {
   280  		if cka != nil && ckb == nil {
   281  			return fmt.Sprintf("%s (%s != %v)%s", e.prefix, cka, ckb, context)
   282  		} else if cka == nil && ckb != nil {
   283  			return fmt.Sprintf("%s (%v != %s)%s", e.prefix, cka, ckb, context)
   284  		} else if cka == nil && ckb == nil {
   285  			return fmt.Sprintf("%s (nil != nil)%s", e.prefix, context)
   286  		}
   287  		t1, v1 := cka.Get()
   288  		t2, v2 := ckb.Get()
   289  		if t1 == t2 {
   290  			return fmt.Sprintf("%s %s(%s != %s)%s", e.prefix, t1, v1, v2, context)
   291  		}
   292  	}
   293  	return fmt.Sprintf("%s (%v != %v)%s", e.prefix, e.a, e.b, context)
   294  }
   295  
   296  func IsErrBadCksum(err error) bool {
   297  	_, ok := err.(*ErrBadCksum)
   298  	return ok
   299  }