github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/bitrot-streaming.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"hash"
    24  	"io"
    25  	"sync"
    26  
    27  	xhttp "github.com/minio/minio/internal/http"
    28  	"github.com/minio/minio/internal/ioutil"
    29  )
    30  
    31  // Calculates bitrot in chunks and writes the hash into the stream.
    32  type streamingBitrotWriter struct {
    33  	iow          io.WriteCloser
    34  	closeWithErr func(err error) error
    35  	h            hash.Hash
    36  	shardSize    int64
    37  	canClose     *sync.WaitGroup
    38  }
    39  
    40  func (b *streamingBitrotWriter) Write(p []byte) (int, error) {
    41  	if len(p) == 0 {
    42  		return 0, nil
    43  	}
    44  	b.h.Reset()
    45  	b.h.Write(p)
    46  	hashBytes := b.h.Sum(nil)
    47  	_, err := b.iow.Write(hashBytes)
    48  	if err != nil {
    49  		b.closeWithErr(err)
    50  		return 0, err
    51  	}
    52  	n, err := b.iow.Write(p)
    53  	if err != nil {
    54  		b.closeWithErr(err)
    55  		return n, err
    56  	}
    57  	if n != len(p) {
    58  		err = io.ErrShortWrite
    59  		b.closeWithErr(err)
    60  	}
    61  	return n, err
    62  }
    63  
    64  func (b *streamingBitrotWriter) Close() error {
    65  	err := b.iow.Close()
    66  	// Wait for all data to be written before returning else it causes race conditions.
    67  	// Race condition is because of io.PipeWriter implementation. i.e consider the following
    68  	// sequent of operations:
    69  	// 1) pipe.Write()
    70  	// 2) pipe.Close()
    71  	// Now pipe.Close() can return before the data is read on the other end of the pipe and written to the disk
    72  	// Hence an immediate Read() on the file can return incorrect data.
    73  	if b.canClose != nil {
    74  		b.canClose.Wait()
    75  	}
    76  	return err
    77  }
    78  
    79  // newStreamingBitrotWriterBuffer returns streaming bitrot writer implementation.
    80  // The output is written to the supplied writer w.
    81  func newStreamingBitrotWriterBuffer(w io.Writer, algo BitrotAlgorithm, shardSize int64) io.Writer {
    82  	return &streamingBitrotWriter{iow: ioutil.NopCloser(w), h: algo.New(), shardSize: shardSize, canClose: nil, closeWithErr: func(err error) error {
    83  		// Similar to CloseWithError on pipes we always return nil.
    84  		return nil
    85  	}}
    86  }
    87  
    88  // Returns streaming bitrot writer implementation.
    89  func newStreamingBitrotWriter(disk StorageAPI, origvolume, volume, filePath string, length int64, algo BitrotAlgorithm, shardSize int64) io.Writer {
    90  	r, w := io.Pipe()
    91  	h := algo.New()
    92  
    93  	bw := &streamingBitrotWriter{
    94  		iow:          ioutil.NewDeadlineWriter(w, globalDriveConfig.GetMaxTimeout()),
    95  		closeWithErr: w.CloseWithError,
    96  		h:            h,
    97  		shardSize:    shardSize,
    98  		canClose:     &sync.WaitGroup{},
    99  	}
   100  	bw.canClose.Add(1)
   101  	go func() {
   102  		defer bw.canClose.Done()
   103  
   104  		totalFileSize := int64(-1) // For compressed objects length will be unknown (represented by length=-1)
   105  		if length != -1 {
   106  			bitrotSumsTotalSize := ceilFrac(length, shardSize) * int64(h.Size()) // Size used for storing bitrot checksums.
   107  			totalFileSize = bitrotSumsTotalSize + length
   108  		}
   109  		r.CloseWithError(disk.CreateFile(context.TODO(), origvolume, volume, filePath, totalFileSize, r))
   110  	}()
   111  	return bw
   112  }
   113  
   114  // ReadAt() implementation which verifies the bitrot hash available as part of the stream.
   115  type streamingBitrotReader struct {
   116  	disk       StorageAPI
   117  	data       []byte
   118  	rc         io.Reader
   119  	volume     string
   120  	filePath   string
   121  	tillOffset int64
   122  	currOffset int64
   123  	h          hash.Hash
   124  	shardSize  int64
   125  	hashBytes  []byte
   126  }
   127  
   128  func (b *streamingBitrotReader) Close() error {
   129  	if b.rc == nil {
   130  		return nil
   131  	}
   132  	if closer, ok := b.rc.(io.Closer); ok {
   133  		// drain the body for connection reuse at network layer.
   134  		xhttp.DrainBody(struct {
   135  			io.Reader
   136  			io.Closer
   137  		}{
   138  			Reader: b.rc,
   139  			Closer: closeWrapper(func() error { return nil }),
   140  		})
   141  		return closer.Close()
   142  	}
   143  	return nil
   144  }
   145  
   146  func (b *streamingBitrotReader) ReadAt(buf []byte, offset int64) (int, error) {
   147  	var err error
   148  	if offset%b.shardSize != 0 {
   149  		// Offset should always be aligned to b.shardSize
   150  		// Can never happen unless there are programmer bugs
   151  		return 0, errUnexpected
   152  	}
   153  	if b.rc == nil {
   154  		// For the first ReadAt() call we need to open the stream for reading.
   155  		b.currOffset = offset
   156  		streamOffset := (offset/b.shardSize)*int64(b.h.Size()) + offset
   157  		if len(b.data) == 0 && b.tillOffset != streamOffset {
   158  			b.rc, err = b.disk.ReadFileStream(context.TODO(), b.volume, b.filePath, streamOffset, b.tillOffset-streamOffset)
   159  		} else {
   160  			b.rc = io.NewSectionReader(bytes.NewReader(b.data), streamOffset, b.tillOffset-streamOffset)
   161  		}
   162  		if err != nil {
   163  			return 0, err
   164  		}
   165  	}
   166  	if offset != b.currOffset {
   167  		// Can never happen unless there are programmer bugs
   168  		return 0, errUnexpected
   169  	}
   170  	b.h.Reset()
   171  	_, err = io.ReadFull(b.rc, b.hashBytes)
   172  	if err != nil {
   173  		return 0, err
   174  	}
   175  	_, err = io.ReadFull(b.rc, buf)
   176  	if err != nil {
   177  		return 0, err
   178  	}
   179  	b.h.Write(buf)
   180  	if !bytes.Equal(b.h.Sum(nil), b.hashBytes) {
   181  		return 0, errFileCorrupt
   182  	}
   183  	b.currOffset += int64(len(buf))
   184  	return len(buf), nil
   185  }
   186  
   187  // Returns streaming bitrot reader implementation.
   188  func newStreamingBitrotReader(disk StorageAPI, data []byte, volume, filePath string, tillOffset int64, algo BitrotAlgorithm, shardSize int64) *streamingBitrotReader {
   189  	h := algo.New()
   190  	return &streamingBitrotReader{
   191  		disk:       disk,
   192  		data:       data,
   193  		volume:     volume,
   194  		filePath:   filePath,
   195  		tillOffset: ceilFrac(tillOffset, shardSize)*int64(h.Size()) + tillOffset,
   196  		h:          h,
   197  		shardSize:  shardSize,
   198  		hashBytes:  make([]byte, h.Size()),
   199  	}
   200  }