github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/shard/key.go (about) 1 // Package shard provides Extract(shard), Create(shard), and associated methods 2 // across all suppported archival formats (see cmn/archive/mime.go) 3 /* 4 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package shard 7 8 import ( 9 "bytes" 10 "crypto/md5" 11 "encoding/hex" 12 "fmt" 13 "hash" 14 "io" 15 "strconv" 16 17 "github.com/NVIDIA/aistore/cmn/cos" 18 ) 19 20 const ( 21 ContentKeyInt = "int" 22 ContentKeyFloat = "float" 23 ContentKeyString = "string" 24 ) 25 26 type ( 27 SingleKeyExtractor struct { 28 name string 29 buf *bytes.Buffer 30 } 31 32 KeyExtractor interface { 33 PrepareExtractor(name string, r cos.ReadSizer, ext string) (cos.ReadSizer, *SingleKeyExtractor, bool) 34 35 // ExtractKey extracts key from either name or reader (file/sgl) 36 ExtractKey(ske *SingleKeyExtractor) (any, error) 37 } 38 39 md5KeyExtractor struct { 40 h hash.Hash 41 } 42 43 nameKeyExtractor struct{} 44 contentKeyExtractor struct { 45 ty string // one of contentKeyTypes: {"int", "string", ... } - see above 46 ext string // file with this extension provides sorting key (of the type `ty`) 47 } 48 49 ErrSortingKeyType struct { 50 ty string 51 } 52 ) 53 54 ///////////////////// 55 // md5KeyExtractor // 56 ///////////////////// 57 58 func NewMD5KeyExtractor() (KeyExtractor, error) { 59 return &md5KeyExtractor{h: md5.New()}, nil 60 } 61 62 func (ke *md5KeyExtractor) ExtractKey(ske *SingleKeyExtractor) (any, error) { 63 s := hex.EncodeToString(ke.h.Sum([]byte(ske.name))) 64 ke.h.Reset() 65 return s, nil 66 } 67 68 func (*md5KeyExtractor) PrepareExtractor(name string, r cos.ReadSizer, _ string) (cos.ReadSizer, *SingleKeyExtractor, bool) { 69 return r, &SingleKeyExtractor{name: name}, false 70 } 71 72 ////////////////////// 73 // nameKeyExtractor // 74 ////////////////////// 75 76 func NewNameKeyExtractor() (KeyExtractor, error) { 77 return &nameKeyExtractor{}, nil 78 } 79 80 func (*nameKeyExtractor) PrepareExtractor(name string, r cos.ReadSizer, _ string) (cos.ReadSizer, *SingleKeyExtractor, bool) { 81 return r, &SingleKeyExtractor{name: name}, false 82 } 83 84 func (*nameKeyExtractor) ExtractKey(ske *SingleKeyExtractor) (any, error) { 85 return ske.name, nil 86 } 87 88 ///////////////////////// 89 // contentKeyExtractor // 90 ///////////////////////// 91 92 func NewContentKeyExtractor(ty, ext string) (KeyExtractor, error) { 93 if err := ValidateContentKeyTy(ty); err != nil { 94 return nil, err 95 } 96 return &contentKeyExtractor{ty: ty, ext: ext}, nil 97 } 98 99 func (ke *contentKeyExtractor) PrepareExtractor(name string, r cos.ReadSizer, ext string) (cos.ReadSizer, *SingleKeyExtractor, bool) { 100 if ke.ext != ext { 101 return r, nil, false 102 } 103 buf := &bytes.Buffer{} 104 tee := cos.NewSizedReader(io.TeeReader(r, buf), r.Size()) 105 return tee, &SingleKeyExtractor{name: name, buf: buf}, true 106 } 107 108 func (ke *contentKeyExtractor) ExtractKey(ske *SingleKeyExtractor) (any, error) { 109 if ske == nil { 110 return nil, nil 111 } 112 b, err := io.ReadAll(ske.buf) 113 ske.buf = nil 114 if err != nil { 115 return nil, err 116 } 117 key := string(b) 118 switch ke.ty { 119 case ContentKeyInt: 120 return strconv.ParseInt(key, 10, 64) 121 case ContentKeyFloat: 122 return strconv.ParseFloat(key, 64) 123 case ContentKeyString: 124 return key, nil 125 default: 126 return nil, &ErrSortingKeyType{ke.ty} 127 } 128 } 129 130 func ValidateContentKeyTy(ty string) error { 131 switch ty { 132 case ContentKeyInt, ContentKeyFloat, ContentKeyString: 133 return nil 134 default: 135 return &ErrSortingKeyType{ty} 136 } 137 } 138 139 func (e *ErrSortingKeyType) Error() string { 140 return fmt.Sprintf("invalid content sorting key %q, expecting one of: 'int', 'float', 'string'", e.ty) 141 }