storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/bitrot-streaming.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/hex"
    23  	"fmt"
    24  	"hash"
    25  	"io"
    26  
    27  	"storj.io/minio/cmd/logger"
    28  	"storj.io/minio/pkg/ioutil"
    29  )
    30  
    31  type errHashMismatch struct {
    32  	message string
    33  }
    34  
    35  func (err *errHashMismatch) Error() string {
    36  	return err.message
    37  }
    38  
    39  // Calculates bitrot in chunks and writes the hash into the stream.
    40  type streamingBitrotWriter struct {
    41  	iow          io.WriteCloser
    42  	closeWithErr func(err error) error
    43  	h            hash.Hash
    44  	shardSize    int64
    45  	canClose     chan struct{} // Needed to avoid race explained in Close() call.
    46  }
    47  
    48  func (b *streamingBitrotWriter) Write(p []byte) (int, error) {
    49  	if len(p) == 0 {
    50  		return 0, nil
    51  	}
    52  	b.h.Reset()
    53  	b.h.Write(p)
    54  	hashBytes := b.h.Sum(nil)
    55  	_, err := b.iow.Write(hashBytes)
    56  	if err != nil {
    57  		return 0, err
    58  	}
    59  	return b.iow.Write(p)
    60  }
    61  
    62  func (b *streamingBitrotWriter) Close() error {
    63  	err := b.iow.Close()
    64  	// Wait for all data to be written before returning else it causes race conditions.
    65  	// Race condition is because of io.PipeWriter implementation. i.e consider the following
    66  	// sequent of operations:
    67  	// 1) pipe.Write()
    68  	// 2) pipe.Close()
    69  	// Now pipe.Close() can return before the data is read on the other end of the pipe and written to the disk
    70  	// Hence an immediate Read() on the file can return incorrect data.
    71  	if b.canClose != nil {
    72  		<-b.canClose
    73  	}
    74  	return err
    75  }
    76  
    77  // Returns streaming bitrot writer implementation.
    78  func newStreamingBitrotWriterBuffer(w io.Writer, algo BitrotAlgorithm, shardSize int64) io.WriteCloser {
    79  	return &streamingBitrotWriter{iow: ioutil.NopCloser(w), h: algo.New(), shardSize: shardSize, canClose: nil}
    80  }
    81  
    82  // Returns streaming bitrot writer implementation.
    83  func newStreamingBitrotWriter(disk StorageAPI, volume, filePath string, length int64, algo BitrotAlgorithm, shardSize int64, heal bool) io.Writer {
    84  	r, w := io.Pipe()
    85  	h := algo.New()
    86  
    87  	bw := &streamingBitrotWriter{iow: w, closeWithErr: w.CloseWithError, h: h, shardSize: shardSize, canClose: make(chan struct{})}
    88  
    89  	go func() {
    90  		totalFileSize := int64(-1) // For compressed objects length will be unknown (represented by length=-1)
    91  		if length != -1 {
    92  			bitrotSumsTotalSize := ceilFrac(length, shardSize) * int64(h.Size()) // Size used for storing bitrot checksums.
    93  			totalFileSize = bitrotSumsTotalSize + length
    94  		}
    95  		r.CloseWithError(disk.CreateFile(context.TODO(), volume, filePath, totalFileSize, r))
    96  		close(bw.canClose)
    97  	}()
    98  	return bw
    99  }
   100  
   101  // ReadAt() implementation which verifies the bitrot hash available as part of the stream.
   102  type streamingBitrotReader struct {
   103  	disk       StorageAPI
   104  	data       []byte
   105  	rc         io.Reader
   106  	volume     string
   107  	filePath   string
   108  	tillOffset int64
   109  	currOffset int64
   110  	h          hash.Hash
   111  	shardSize  int64
   112  	hashBytes  []byte
   113  }
   114  
   115  func (b *streamingBitrotReader) Close() error {
   116  	if b.rc == nil {
   117  		return nil
   118  	}
   119  	if closer, ok := b.rc.(io.Closer); ok {
   120  		return closer.Close()
   121  	}
   122  	return nil
   123  }
   124  
   125  func (b *streamingBitrotReader) ReadAt(buf []byte, offset int64) (int, error) {
   126  	var err error
   127  	if offset%b.shardSize != 0 {
   128  		// Offset should always be aligned to b.shardSize
   129  		// Can never happen unless there are programmer bugs
   130  		return 0, errUnexpected
   131  	}
   132  	if b.rc == nil {
   133  		// For the first ReadAt() call we need to open the stream for reading.
   134  		b.currOffset = offset
   135  		streamOffset := (offset/b.shardSize)*int64(b.h.Size()) + offset
   136  		if len(b.data) == 0 && b.tillOffset != streamOffset {
   137  			b.rc, err = b.disk.ReadFileStream(context.TODO(), b.volume, b.filePath, streamOffset, b.tillOffset-streamOffset)
   138  		} else {
   139  			b.rc = io.NewSectionReader(bytes.NewReader(b.data), streamOffset, b.tillOffset-streamOffset)
   140  		}
   141  		if err != nil {
   142  			return 0, err
   143  		}
   144  	}
   145  
   146  	if offset != b.currOffset {
   147  		// Can never happen unless there are programmer bugs
   148  		return 0, errUnexpected
   149  	}
   150  	b.h.Reset()
   151  	_, err = io.ReadFull(b.rc, b.hashBytes)
   152  	if err != nil {
   153  		return 0, err
   154  	}
   155  	_, err = io.ReadFull(b.rc, buf)
   156  	if err != nil {
   157  		return 0, err
   158  	}
   159  	b.h.Write(buf)
   160  
   161  	if !bytes.Equal(b.h.Sum(nil), b.hashBytes) {
   162  		logger.LogIf(GlobalContext, fmt.Errorf("Disk: %s  -> %s/%s - content hash does not match - expected %s, got %s",
   163  			b.disk, b.volume, b.filePath, hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil))))
   164  		return 0, errFileCorrupt
   165  	}
   166  	b.currOffset += int64(len(buf))
   167  	return len(buf), nil
   168  }
   169  
   170  // Returns streaming bitrot reader implementation.
   171  func newStreamingBitrotReader(disk StorageAPI, data []byte, volume, filePath string, tillOffset int64, algo BitrotAlgorithm, shardSize int64) *streamingBitrotReader {
   172  	h := algo.New()
   173  	return &streamingBitrotReader{
   174  		disk:       disk,
   175  		data:       data,
   176  		volume:     volume,
   177  		filePath:   filePath,
   178  		tillOffset: ceilFrac(tillOffset, shardSize)*int64(h.Size()) + tillOffset,
   179  		h:          h,
   180  		shardSize:  shardSize,
   181  		hashBytes:  make([]byte, h.Size()),
   182  	}
   183  }