github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/shard/key.go (about)

     1  // Package shard provides Extract(shard), Create(shard), and associated methods
     2  // across all suppported archival formats (see cmn/archive/mime.go)
     3  /*
     4   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package shard
     7  
     8  import (
     9  	"bytes"
    10  	"crypto/md5"
    11  	"encoding/hex"
    12  	"fmt"
    13  	"hash"
    14  	"io"
    15  	"strconv"
    16  
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  )
    19  
    20  const (
    21  	ContentKeyInt    = "int"
    22  	ContentKeyFloat  = "float"
    23  	ContentKeyString = "string"
    24  )
    25  
    26  type (
    27  	SingleKeyExtractor struct {
    28  		name string
    29  		buf  *bytes.Buffer
    30  	}
    31  
    32  	KeyExtractor interface {
    33  		PrepareExtractor(name string, r cos.ReadSizer, ext string) (cos.ReadSizer, *SingleKeyExtractor, bool)
    34  
    35  		// ExtractKey extracts key from either name or reader (file/sgl)
    36  		ExtractKey(ske *SingleKeyExtractor) (any, error)
    37  	}
    38  
    39  	md5KeyExtractor struct {
    40  		h hash.Hash
    41  	}
    42  
    43  	nameKeyExtractor    struct{}
    44  	contentKeyExtractor struct {
    45  		ty  string // one of contentKeyTypes: {"int", "string", ... } - see above
    46  		ext string // file with this extension provides sorting key (of the type `ty`)
    47  	}
    48  
    49  	ErrSortingKeyType struct {
    50  		ty string
    51  	}
    52  )
    53  
    54  /////////////////////
    55  // md5KeyExtractor //
    56  /////////////////////
    57  
    58  func NewMD5KeyExtractor() (KeyExtractor, error) {
    59  	return &md5KeyExtractor{h: md5.New()}, nil
    60  }
    61  
    62  func (ke *md5KeyExtractor) ExtractKey(ske *SingleKeyExtractor) (any, error) {
    63  	s := hex.EncodeToString(ke.h.Sum([]byte(ske.name)))
    64  	ke.h.Reset()
    65  	return s, nil
    66  }
    67  
    68  func (*md5KeyExtractor) PrepareExtractor(name string, r cos.ReadSizer, _ string) (cos.ReadSizer, *SingleKeyExtractor, bool) {
    69  	return r, &SingleKeyExtractor{name: name}, false
    70  }
    71  
    72  //////////////////////
    73  // nameKeyExtractor //
    74  //////////////////////
    75  
    76  func NewNameKeyExtractor() (KeyExtractor, error) {
    77  	return &nameKeyExtractor{}, nil
    78  }
    79  
    80  func (*nameKeyExtractor) PrepareExtractor(name string, r cos.ReadSizer, _ string) (cos.ReadSizer, *SingleKeyExtractor, bool) {
    81  	return r, &SingleKeyExtractor{name: name}, false
    82  }
    83  
    84  func (*nameKeyExtractor) ExtractKey(ske *SingleKeyExtractor) (any, error) {
    85  	return ske.name, nil
    86  }
    87  
    88  /////////////////////////
    89  // contentKeyExtractor //
    90  /////////////////////////
    91  
    92  func NewContentKeyExtractor(ty, ext string) (KeyExtractor, error) {
    93  	if err := ValidateContentKeyTy(ty); err != nil {
    94  		return nil, err
    95  	}
    96  	return &contentKeyExtractor{ty: ty, ext: ext}, nil
    97  }
    98  
    99  func (ke *contentKeyExtractor) PrepareExtractor(name string, r cos.ReadSizer, ext string) (cos.ReadSizer, *SingleKeyExtractor, bool) {
   100  	if ke.ext != ext {
   101  		return r, nil, false
   102  	}
   103  	buf := &bytes.Buffer{}
   104  	tee := cos.NewSizedReader(io.TeeReader(r, buf), r.Size())
   105  	return tee, &SingleKeyExtractor{name: name, buf: buf}, true
   106  }
   107  
   108  func (ke *contentKeyExtractor) ExtractKey(ske *SingleKeyExtractor) (any, error) {
   109  	if ske == nil {
   110  		return nil, nil
   111  	}
   112  	b, err := io.ReadAll(ske.buf)
   113  	ske.buf = nil
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  	key := string(b)
   118  	switch ke.ty {
   119  	case ContentKeyInt:
   120  		return strconv.ParseInt(key, 10, 64)
   121  	case ContentKeyFloat:
   122  		return strconv.ParseFloat(key, 64)
   123  	case ContentKeyString:
   124  		return key, nil
   125  	default:
   126  		return nil, &ErrSortingKeyType{ke.ty}
   127  	}
   128  }
   129  
   130  func ValidateContentKeyTy(ty string) error {
   131  	switch ty {
   132  	case ContentKeyInt, ContentKeyFloat, ContentKeyString:
   133  		return nil
   134  	default:
   135  		return &ErrSortingKeyType{ty}
   136  	}
   137  }
   138  
   139  func (e *ErrSortingKeyType) Error() string {
   140  	return fmt.Sprintf("invalid content sorting key %q, expecting one of: 'int', 'float', 'string'", e.ty)
   141  }