github.com/ledgerwatch/erigon-lib@v1.0.0/bptree/binary_file.go (about) 1 /* 2 Copyright 2022 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package bptree 18 19 import ( 20 "bufio" 21 "crypto/rand" 22 "fmt" 23 "io" 24 "math/big" 25 "os" 26 "strconv" 27 ) 28 29 // Size in bytes of data blocks read/written from/to the file system. 30 const BLOCKSIZE int64 = 4096 31 32 // BinaryFile type represents an open binary file. 33 type BinaryFile struct { 34 file *os.File 35 path string 36 blockSize int64 37 size int64 38 opened bool 39 } 40 41 // RandomBinaryReader reads data chuncks randomly from a binary file. 42 type RandomBinaryReader struct { 43 sourceFile *BinaryFile 44 chunckSize int 45 } 46 47 func (r RandomBinaryReader) Read(b []byte) (n int, err error) { 48 numKeys := len(b) / r.chunckSize 49 for i := 0; i < numKeys; i++ { 50 bytesRead, err := r.readAtRandomOffset(b[i*r.chunckSize : i*r.chunckSize+r.chunckSize]) 51 if err != nil { 52 return i*r.chunckSize + bytesRead, fmt.Errorf("cannot random read at iteration %d: %w", i, err) 53 } 54 n += bytesRead 55 } 56 remainderSize := len(b) % r.chunckSize 57 bytesRead, err := r.readAtRandomOffset(b[numKeys*r.chunckSize : numKeys*r.chunckSize+remainderSize]) 58 if err != nil { 59 return numKeys*r.chunckSize + bytesRead, fmt.Errorf("cannot random read remainder %d: %w", remainderSize, err) 60 } 61 n += bytesRead 62 return n, nil 63 } 64 65 func (r RandomBinaryReader) readAtRandomOffset(b []byte) (n int, err error) { 66 randomValue, err := rand.Int(rand.Reader, big.NewInt(r.sourceFile.size-int64(len(b)))) 67 if err != nil { 68 return 0, fmt.Errorf("cannot generate random offset: %w", err) 69 } 70 randomOffset := randomValue.Int64() 71 _, err = r.sourceFile.file.Seek(randomOffset, io.SeekStart) 72 if err != nil { 73 return 0, fmt.Errorf("cannot seek to offset %d: %w", randomOffset, err) 74 } 75 bytesRead, err := r.sourceFile.file.Read(b) 76 if err != nil { 77 return 0, fmt.Errorf("cannot read from source file: %w", err) 78 } 79 return bytesRead, nil 80 } 81 82 func CreateBinaryFileByRandomSampling(path string, size int64, sourceFile *BinaryFile, keySize int) *BinaryFile { 83 return CreateBinaryFileFromReader(path, "_onlyexisting", size, RandomBinaryReader{sourceFile, keySize}) 84 } 85 86 func CreateBinaryFileByPRNG(path string, size int64) *BinaryFile { 87 return CreateBinaryFileFromReader(path, "", size, rand.Reader) 88 } 89 90 func CreateBinaryFileFromReader(path, suffix string, size int64, reader io.Reader) *BinaryFile { 91 file, err := os.OpenFile(path+strconv.FormatInt(size, 10)+suffix, os.O_RDWR|os.O_CREATE, 0644) 92 ensure(err == nil, fmt.Sprintf("CreateBinaryFileFromReader: cannot create file %s, error %s\n", file.Name(), err)) 93 94 err = file.Truncate(size) 95 ensure(err == nil, fmt.Sprintf("CreateBinaryFileFromReader: cannot truncate file %s to %d, error %s\n", file.Name(), size, err)) 96 97 bufferedFile := bufio.NewWriter(file) 98 numBlocks := size / BLOCKSIZE 99 remainderSize := size % BLOCKSIZE 100 buffer := make([]byte, BLOCKSIZE) 101 for i := int64(0); i <= numBlocks; i++ { 102 if i == numBlocks { 103 buffer = make([]byte, remainderSize) 104 } 105 bytesRead, err := io.ReadFull(reader, buffer) 106 ensure(bytesRead == len(buffer), fmt.Sprintf("CreateBinaryFileFromReader: insufficient bytes read %d, error %s\n", bytesRead, err)) 107 bytesWritten, err := bufferedFile.Write(buffer) 108 ensure(bytesWritten == len(buffer), fmt.Sprintf("CreateBinaryFileFromReader: insufficient bytes written %d, error %s\n", bytesWritten, err)) 109 } 110 111 err = bufferedFile.Flush() 112 ensure(err == nil, fmt.Sprintf("CreateBinaryFileFromReader: error during flushing %s\n", err)) 113 114 binaryFile := &BinaryFile{path: file.Name(), blockSize: BLOCKSIZE, size: size, file: file, opened: true} 115 binaryFile.rewind() 116 return binaryFile 117 } 118 119 func OpenBinaryFile(path string) *BinaryFile { 120 file, err := os.Open(path) 121 ensure(err == nil, fmt.Sprintf("OpenBinaryFile: cannot open file %s, error %s\n", path, err)) 122 123 info, err := file.Stat() 124 ensure(err == nil, fmt.Sprintf("OpenBinaryFile: cannot stat file %s error %s\n", path, err)) 125 ensure(info.Size() >= 0, fmt.Sprintf("OpenBinaryFile: negative size %d file %s\n", info.Size(), path)) 126 127 binaryFile := &BinaryFile{path: path, blockSize: BLOCKSIZE, size: info.Size(), file: file, opened: true} 128 return binaryFile 129 } 130 131 func (f *BinaryFile) rewind() { 132 offset, err := f.file.Seek(0, io.SeekStart) 133 ensure(err == nil, fmt.Sprintf("rewind: error during seeking %s\n", err)) 134 ensure(offset == 0, fmt.Sprintf("rewind: unexpected offset after seeking: %d\n", offset)) 135 } 136 137 func (f *BinaryFile) Name() string { 138 return f.path 139 } 140 141 func (f *BinaryFile) Size() int64 { 142 return f.size 143 } 144 145 func (f *BinaryFile) NewReader() *bufio.Reader { 146 ensure(f.opened, fmt.Sprintf("NewReader: file %s is not opened\n", f.path)) 147 f.rewind() 148 return bufio.NewReader(f.file) 149 } 150 151 func (f *BinaryFile) Close() { 152 ensure(f.opened, fmt.Sprintf("Close: file %s is not opened\n", f.path)) 153 err := f.file.Close() 154 ensure(err == nil, fmt.Sprintf("Close: cannot close file %s, error %s\n", f.path, err)) 155 f.opened = false 156 }