github.com/ledgerwatch/erigon-lib@v1.0.0/bptree/binary_file.go (about)

     1  /*
     2     Copyright 2022 Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package bptree
    18  
    19  import (
    20  	"bufio"
    21  	"crypto/rand"
    22  	"fmt"
    23  	"io"
    24  	"math/big"
    25  	"os"
    26  	"strconv"
    27  )
    28  
    29  // Size in bytes of data blocks read/written from/to the file system.
    30  const BLOCKSIZE int64 = 4096
    31  
    32  // BinaryFile type represents an open binary file.
    33  type BinaryFile struct {
    34  	file      *os.File
    35  	path      string
    36  	blockSize int64
    37  	size      int64
    38  	opened    bool
    39  }
    40  
    41  // RandomBinaryReader reads data chuncks randomly from a binary file.
    42  type RandomBinaryReader struct {
    43  	sourceFile *BinaryFile
    44  	chunckSize int
    45  }
    46  
    47  func (r RandomBinaryReader) Read(b []byte) (n int, err error) {
    48  	numKeys := len(b) / r.chunckSize
    49  	for i := 0; i < numKeys; i++ {
    50  		bytesRead, err := r.readAtRandomOffset(b[i*r.chunckSize : i*r.chunckSize+r.chunckSize])
    51  		if err != nil {
    52  			return i*r.chunckSize + bytesRead, fmt.Errorf("cannot random read at iteration %d: %w", i, err)
    53  		}
    54  		n += bytesRead
    55  	}
    56  	remainderSize := len(b) % r.chunckSize
    57  	bytesRead, err := r.readAtRandomOffset(b[numKeys*r.chunckSize : numKeys*r.chunckSize+remainderSize])
    58  	if err != nil {
    59  		return numKeys*r.chunckSize + bytesRead, fmt.Errorf("cannot random read remainder %d: %w", remainderSize, err)
    60  	}
    61  	n += bytesRead
    62  	return n, nil
    63  }
    64  
    65  func (r RandomBinaryReader) readAtRandomOffset(b []byte) (n int, err error) {
    66  	randomValue, err := rand.Int(rand.Reader, big.NewInt(r.sourceFile.size-int64(len(b))))
    67  	if err != nil {
    68  		return 0, fmt.Errorf("cannot generate random offset: %w", err)
    69  	}
    70  	randomOffset := randomValue.Int64()
    71  	_, err = r.sourceFile.file.Seek(randomOffset, io.SeekStart)
    72  	if err != nil {
    73  		return 0, fmt.Errorf("cannot seek to offset %d: %w", randomOffset, err)
    74  	}
    75  	bytesRead, err := r.sourceFile.file.Read(b)
    76  	if err != nil {
    77  		return 0, fmt.Errorf("cannot read from source file: %w", err)
    78  	}
    79  	return bytesRead, nil
    80  }
    81  
    82  func CreateBinaryFileByRandomSampling(path string, size int64, sourceFile *BinaryFile, keySize int) *BinaryFile {
    83  	return CreateBinaryFileFromReader(path, "_onlyexisting", size, RandomBinaryReader{sourceFile, keySize})
    84  }
    85  
    86  func CreateBinaryFileByPRNG(path string, size int64) *BinaryFile {
    87  	return CreateBinaryFileFromReader(path, "", size, rand.Reader)
    88  }
    89  
    90  func CreateBinaryFileFromReader(path, suffix string, size int64, reader io.Reader) *BinaryFile {
    91  	file, err := os.OpenFile(path+strconv.FormatInt(size, 10)+suffix, os.O_RDWR|os.O_CREATE, 0644)
    92  	ensure(err == nil, fmt.Sprintf("CreateBinaryFileFromReader: cannot create file %s, error %s\n", file.Name(), err))
    93  
    94  	err = file.Truncate(size)
    95  	ensure(err == nil, fmt.Sprintf("CreateBinaryFileFromReader: cannot truncate file %s to %d, error %s\n", file.Name(), size, err))
    96  
    97  	bufferedFile := bufio.NewWriter(file)
    98  	numBlocks := size / BLOCKSIZE
    99  	remainderSize := size % BLOCKSIZE
   100  	buffer := make([]byte, BLOCKSIZE)
   101  	for i := int64(0); i <= numBlocks; i++ {
   102  		if i == numBlocks {
   103  			buffer = make([]byte, remainderSize)
   104  		}
   105  		bytesRead, err := io.ReadFull(reader, buffer)
   106  		ensure(bytesRead == len(buffer), fmt.Sprintf("CreateBinaryFileFromReader: insufficient bytes read %d, error %s\n", bytesRead, err))
   107  		bytesWritten, err := bufferedFile.Write(buffer)
   108  		ensure(bytesWritten == len(buffer), fmt.Sprintf("CreateBinaryFileFromReader: insufficient bytes written %d, error %s\n", bytesWritten, err))
   109  	}
   110  
   111  	err = bufferedFile.Flush()
   112  	ensure(err == nil, fmt.Sprintf("CreateBinaryFileFromReader: error during flushing %s\n", err))
   113  
   114  	binaryFile := &BinaryFile{path: file.Name(), blockSize: BLOCKSIZE, size: size, file: file, opened: true}
   115  	binaryFile.rewind()
   116  	return binaryFile
   117  }
   118  
   119  func OpenBinaryFile(path string) *BinaryFile {
   120  	file, err := os.Open(path)
   121  	ensure(err == nil, fmt.Sprintf("OpenBinaryFile: cannot open file %s, error %s\n", path, err))
   122  
   123  	info, err := file.Stat()
   124  	ensure(err == nil, fmt.Sprintf("OpenBinaryFile: cannot stat file %s error %s\n", path, err))
   125  	ensure(info.Size() >= 0, fmt.Sprintf("OpenBinaryFile: negative size %d file %s\n", info.Size(), path))
   126  
   127  	binaryFile := &BinaryFile{path: path, blockSize: BLOCKSIZE, size: info.Size(), file: file, opened: true}
   128  	return binaryFile
   129  }
   130  
   131  func (f *BinaryFile) rewind() {
   132  	offset, err := f.file.Seek(0, io.SeekStart)
   133  	ensure(err == nil, fmt.Sprintf("rewind: error during seeking %s\n", err))
   134  	ensure(offset == 0, fmt.Sprintf("rewind: unexpected offset after seeking: %d\n", offset))
   135  }
   136  
   137  func (f *BinaryFile) Name() string {
   138  	return f.path
   139  }
   140  
   141  func (f *BinaryFile) Size() int64 {
   142  	return f.size
   143  }
   144  
   145  func (f *BinaryFile) NewReader() *bufio.Reader {
   146  	ensure(f.opened, fmt.Sprintf("NewReader: file %s is not opened\n", f.path))
   147  	f.rewind()
   148  	return bufio.NewReader(f.file)
   149  }
   150  
   151  func (f *BinaryFile) Close() {
   152  	ensure(f.opened, fmt.Sprintf("Close: file %s is not opened\n", f.path))
   153  	err := f.file.Close()
   154  	ensure(err == nil, fmt.Sprintf("Close: cannot close file %s, error %s\n", f.path, err))
   155  	f.opened = false
   156  }