git.frostfs.info/TrueCloudLab/frostfs-sdk-go@v0.0.0-20241022124111-5361f0ecebd3/object/transformer/transformer.go (about)

     1  package transformer
     2  
     3  import (
     4  	"context"
     5  	"crypto/ecdsa"
     6  	"crypto/sha256"
     7  	"fmt"
     8  
     9  	buffPool "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/util/pool"
    10  	"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/checksum"
    11  	"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
    12  	oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
    13  	"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/session"
    14  	"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/version"
    15  	"git.frostfs.info/TrueCloudLab/tzhash/tz"
    16  )
    17  
    18  type payloadSizeLimiter struct {
    19  	Params
    20  
    21  	written, writtenCurrent uint64
    22  
    23  	current, parent *object.Object
    24  	payload         []byte
    25  
    26  	currentHashers, parentHashers []payloadChecksumHasher
    27  
    28  	previous []oid.ID
    29  
    30  	splitID *object.SplitID
    31  
    32  	parAttrs []object.Attribute
    33  
    34  	nextTarget ObjectWriter
    35  }
    36  
    37  type Params struct {
    38  	Key                    *ecdsa.PrivateKey
    39  	NextTargetInit         TargetInitializer
    40  	SessionToken           *session.Object
    41  	NetworkState           EpochSource
    42  	MaxSize                uint64
    43  	WithoutHomomorphicHash bool
    44  	// SizeHint is a hint for the total payload size to be processed.
    45  	// It is used primarily to optimize allocations and doesn't affect
    46  	// functionality. Primary usecases are providing file size when putting an object
    47  	// with the frostfs-cli or using Content-Length header in gateways.
    48  	SizeHint uint64
    49  	Pool     *buffPool.BufferPool
    50  }
    51  
    52  // NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length
    53  // of the writing object and writes generated objects to targets from initializer.
    54  //
    55  // Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash
    56  // is false.
    57  //
    58  // Objects w/ payload size less or equal than max size remain untouched.
    59  func NewPayloadSizeLimiter(p Params) ChunkedObjectWriter {
    60  	return &payloadSizeLimiter{
    61  		Params:  p,
    62  		splitID: object.NewSplitID(),
    63  	}
    64  }
    65  
    66  func (s *payloadSizeLimiter) WriteHeader(_ context.Context, hdr *object.Object) error {
    67  	s.current = fromObject(hdr)
    68  
    69  	s.initialize()
    70  
    71  	return nil
    72  }
    73  
    74  func (s *payloadSizeLimiter) Write(ctx context.Context, p []byte) (int, error) {
    75  	if err := s.writeChunk(ctx, p); err != nil {
    76  		return 0, err
    77  	}
    78  
    79  	return len(p), nil
    80  }
    81  
    82  func (s *payloadSizeLimiter) Close(ctx context.Context) (*AccessIdentifiers, error) {
    83  	return s.release(ctx, true)
    84  }
    85  
    86  func (s *payloadSizeLimiter) initialize() {
    87  	s.current = fromObject(s.current)
    88  
    89  	// if it is an object after the 1st
    90  	if ln := len(s.previous); ln > 0 {
    91  		// initialize parent object once (after 1st object)
    92  		if ln == 1 {
    93  			ver := version.Current()
    94  			s.parent = fromObject(s.current)
    95  			s.parent.ResetRelations()
    96  			s.parent.SetSignature(nil)
    97  			s.parent.SetAttributes(s.parAttrs...)
    98  			s.parent.SetVersion(&ver)
    99  			s.parentHashers = append(s.parentHashers[:0], s.currentHashers...)
   100  		}
   101  
   102  		// set previous object to the last previous identifier
   103  		s.current.SetPreviousID(s.previous[ln-1])
   104  	}
   105  
   106  	s.initializeCurrent()
   107  }
   108  
   109  func fromObject(obj *object.Object) *object.Object {
   110  	cnr, _ := obj.ContainerID()
   111  
   112  	res := object.New()
   113  	res.SetContainerID(cnr)
   114  	res.SetOwnerID(obj.OwnerID())
   115  	res.SetAttributes(obj.Attributes()...)
   116  	res.SetType(obj.Type())
   117  
   118  	// obj.SetSplitID creates splitHeader but we don't need to do it in case
   119  	// of small objects, so we should make nil check.
   120  	if obj.SplitID() != nil {
   121  		res.SetSplitID(obj.SplitID())
   122  	}
   123  
   124  	return res
   125  }
   126  
   127  func (s *payloadSizeLimiter) initializeCurrent() {
   128  	s.nextTarget = s.NextTargetInit()
   129  	s.writtenCurrent = 0
   130  	s.initPayloadHashers()
   131  
   132  	var payloadSize uint64
   133  
   134  	// Check whether SizeHint is valid.
   135  	if remaining := s.SizeHint - s.written; remaining <= s.SizeHint {
   136  		if remaining >= s.MaxSize {
   137  			payloadSize = s.MaxSize
   138  		} else {
   139  			payloadSize = remaining % s.MaxSize
   140  		}
   141  	}
   142  
   143  	if s.Pool == nil {
   144  		s.payload = make([]byte, 0, payloadSize)
   145  	} else {
   146  		buffer := s.Pool.Get(uint32(payloadSize))
   147  		s.payload = buffer.Data[:0]
   148  	}
   149  }
   150  
   151  func (s *payloadSizeLimiter) initPayloadHashers() {
   152  	s.currentHashers = append(s.currentHashers[:0], payloadChecksumHasher{
   153  		hasher: sha256.New(),
   154  		typ:    checksum.SHA256,
   155  	})
   156  
   157  	if !s.WithoutHomomorphicHash {
   158  		s.currentHashers = append(s.currentHashers, payloadChecksumHasher{
   159  			hasher: tz.New(),
   160  			typ:    checksum.TZ,
   161  		})
   162  	}
   163  }
   164  
   165  func (s *payloadSizeLimiter) release(ctx context.Context, finalize bool) (*AccessIdentifiers, error) {
   166  	// Arg finalize is true only when called from Close method.
   167  	// We finalize parent and generate linking objects only if it is more
   168  	// than 1 object in split-chain.
   169  	withParent := finalize && len(s.previous) > 0
   170  
   171  	if withParent {
   172  		for i := range s.parentHashers {
   173  			s.parentHashers[i].writeChecksum(s.parent)
   174  		}
   175  		s.parent.SetPayloadSize(s.written)
   176  		s.current.SetParent(s.parent)
   177  	}
   178  
   179  	// release current object
   180  	for i := range s.currentHashers {
   181  		s.currentHashers[i].writeChecksum(s.current)
   182  	}
   183  
   184  	ids, err := s.fillHeader()
   185  	if err != nil {
   186  		return nil, fmt.Errorf("fillHeader: %w", err)
   187  	}
   188  
   189  	s.current.SetPayload(s.payload)
   190  	if err := s.nextTarget.WriteObject(ctx, s.current); err != nil {
   191  		return nil, fmt.Errorf("could not write to next target: %w", err)
   192  	}
   193  
   194  	// save identifier of the released object
   195  	s.previous = append(s.previous, ids.SelfID)
   196  
   197  	if withParent {
   198  		// generate and release linking object
   199  		s.initializeLinking(ids.ParentHeader)
   200  		s.initializeCurrent()
   201  
   202  		if _, err := s.release(ctx, false); err != nil {
   203  			return nil, fmt.Errorf("could not release linking object: %w", err)
   204  		}
   205  	}
   206  
   207  	return ids, nil
   208  }
   209  
   210  func (s *payloadSizeLimiter) fillHeader() (*AccessIdentifiers, error) {
   211  	curEpoch := s.NetworkState.CurrentEpoch()
   212  	ver := version.Current()
   213  
   214  	s.current.SetVersion(&ver)
   215  	s.current.SetPayloadSize(s.writtenCurrent)
   216  	s.current.SetSessionToken(s.SessionToken)
   217  	s.current.SetCreationEpoch(curEpoch)
   218  
   219  	var (
   220  		parID  *oid.ID
   221  		parHdr *object.Object
   222  	)
   223  
   224  	if par := s.current.Parent(); par != nil && par.Signature() == nil {
   225  		rawPar := object.NewFromV2(par.ToV2())
   226  
   227  		rawPar.SetSessionToken(s.SessionToken)
   228  		rawPar.SetCreationEpoch(curEpoch)
   229  
   230  		if err := object.SetIDWithSignature(*s.Key, rawPar); err != nil {
   231  			return nil, fmt.Errorf("could not finalize parent object: %w", err)
   232  		}
   233  
   234  		id, _ := rawPar.ID()
   235  		parID = &id
   236  		parHdr = rawPar
   237  
   238  		s.current.SetParent(parHdr)
   239  	}
   240  
   241  	if err := object.SetIDWithSignature(*s.Key, s.current); err != nil {
   242  		return nil, fmt.Errorf("could not finalize object: %w", err)
   243  	}
   244  
   245  	id, _ := s.current.ID()
   246  	return &AccessIdentifiers{
   247  		ParentID:     parID,
   248  		SelfID:       id,
   249  		ParentHeader: parHdr,
   250  		Epoch:        curEpoch,
   251  	}, nil
   252  }
   253  
   254  func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) {
   255  	s.current = fromObject(s.current)
   256  	s.current.SetParent(parHdr)
   257  	s.current.SetChildren(s.previous...)
   258  	s.current.SetSplitID(s.splitID)
   259  }
   260  
   261  func (s *payloadSizeLimiter) writeChunk(ctx context.Context, chunk []byte) error {
   262  	for {
   263  		// statement is true if the previous write of bytes reached exactly the boundary.
   264  		if s.written > 0 && s.written%s.MaxSize == 0 {
   265  			if s.written == s.MaxSize {
   266  				s.prepareFirstChild()
   267  			}
   268  
   269  			// we need to release current object
   270  			if _, err := s.release(ctx, false); err != nil {
   271  				return fmt.Errorf("could not release object: %w", err)
   272  			}
   273  
   274  			// initialize another object
   275  			s.initialize()
   276  		}
   277  
   278  		var (
   279  			ln         = uint64(len(chunk))
   280  			cut        = ln
   281  			leftToEdge = s.MaxSize - s.written%s.MaxSize
   282  		)
   283  
   284  		// write bytes no further than the boundary of the current object
   285  		if ln > leftToEdge {
   286  			cut = leftToEdge
   287  		}
   288  
   289  		if err := s.writeHashes(chunk[:cut]); err != nil {
   290  			return fmt.Errorf("could not write chunk to target: %w", err)
   291  		}
   292  
   293  		// increase written bytes counter
   294  		s.writtenCurrent += cut
   295  		s.written += cut
   296  
   297  		if cut == ln {
   298  			return nil
   299  		}
   300  		// if there are more bytes in buffer we call method again to start filling another object
   301  		chunk = chunk[cut:]
   302  	}
   303  }
   304  
   305  func (s *payloadSizeLimiter) writeHashes(chunk []byte) error {
   306  	s.payload = append(s.payload, chunk...)
   307  
   308  	// The `Write` method of `hash.Hash` never returns an error.
   309  	for i := range s.currentHashers {
   310  		_, _ = s.currentHashers[i].hasher.Write(chunk)
   311  	}
   312  
   313  	for i := range s.parentHashers {
   314  		_, _ = s.parentHashers[i].hasher.Write(chunk)
   315  	}
   316  
   317  	return nil
   318  }
   319  
   320  func (s *payloadSizeLimiter) prepareFirstChild() {
   321  	// initialize split header with split ID on first object in chain
   322  	s.current.InitRelations()
   323  	s.current.SetSplitID(s.splitID)
   324  
   325  	// cut source attributes
   326  	s.parAttrs = s.current.Attributes()
   327  	s.current.SetAttributes()
   328  
   329  	// attributes will be added to parent in detachParent
   330  }