github.com/ledgerwatch/erigon-lib@v1.0.0/etl/dataprovider.go (about) 1 /* 2 Copyright 2021 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package etl 18 19 import ( 20 "bufio" 21 "encoding/binary" 22 "fmt" 23 "io" 24 "os" 25 26 "github.com/ledgerwatch/log/v3" 27 "golang.org/x/sync/errgroup" 28 ) 29 30 type dataProvider interface { 31 Next(keyBuf, valBuf []byte) ([]byte, []byte, error) 32 Dispose() // Safe for repeated call, doesn't return error - means defer-friendly 33 Wait() error // join point for async providers 34 } 35 36 type fileDataProvider struct { 37 file *os.File 38 reader io.Reader 39 byteReader io.ByteReader // Different interface to the same object as reader 40 wg *errgroup.Group 41 } 42 43 // FlushToDisk - `doFsync` is true only for 'critical' collectors (which should not loose). 44 func FlushToDisk(logPrefix string, b Buffer, tmpdir string, doFsync bool, lvl log.Lvl) (dataProvider, error) { 45 if b.Len() == 0 { 46 return nil, nil 47 } 48 49 provider := &fileDataProvider{reader: nil, wg: &errgroup.Group{}} 50 provider.wg.Go(func() error { 51 b.Sort() 52 53 // if we are going to create files in the system temp dir, we don't need any 54 // subfolders. 55 if tmpdir != "" { 56 if err := os.MkdirAll(tmpdir, 0755); err != nil { 57 return err 58 } 59 } 60 61 bufferFile, err := os.CreateTemp(tmpdir, "erigon-sortable-buf-") 62 if err != nil { 63 return err 64 } 65 provider.file = bufferFile 66 67 if doFsync { 68 defer bufferFile.Sync() //nolint:errcheck 69 } 70 71 w := bufio.NewWriterSize(bufferFile, BufIOSize) 72 defer w.Flush() //nolint:errcheck 73 74 if err = b.Write(w); err != nil { 75 return fmt.Errorf("error writing entries to disk: %w", err) 76 } 77 log.Log(lvl, fmt.Sprintf("[%s] Flushed buffer file", logPrefix), "name", bufferFile.Name()) 78 return nil 79 }) 80 81 return provider, nil 82 } 83 84 func (p *fileDataProvider) Next(keyBuf, valBuf []byte) ([]byte, []byte, error) { 85 if p.reader == nil { 86 _, err := p.file.Seek(0, 0) 87 if err != nil { 88 return nil, nil, err 89 } 90 r := bufio.NewReaderSize(p.file, BufIOSize) 91 p.reader = r 92 p.byteReader = r 93 94 } 95 return readElementFromDisk(p.reader, p.byteReader, keyBuf, valBuf) 96 } 97 98 func (p *fileDataProvider) Wait() error { return p.wg.Wait() } 99 func (p *fileDataProvider) Dispose() { 100 if p.file != nil { //invariant: safe to call multiple time 101 p.Wait() 102 _ = p.file.Close() 103 _ = os.Remove(p.file.Name()) 104 p.file = nil 105 } 106 } 107 108 func (p *fileDataProvider) String() string { 109 return fmt.Sprintf("%T(file: %s)", p, p.file.Name()) 110 } 111 112 func readElementFromDisk(r io.Reader, br io.ByteReader, keyBuf, valBuf []byte) ([]byte, []byte, error) { 113 n, err := binary.ReadVarint(br) 114 if err != nil { 115 return nil, nil, err 116 } 117 if n >= 0 { 118 // Reallocate the slice or extend it if there is enough capacity 119 if keyBuf == nil || len(keyBuf)+int(n) > cap(keyBuf) { 120 newKeyBuf := make([]byte, len(keyBuf)+int(n)) 121 copy(newKeyBuf, keyBuf) 122 keyBuf = newKeyBuf 123 } else { 124 keyBuf = keyBuf[:len(keyBuf)+int(n)] 125 } 126 if _, err = io.ReadFull(r, keyBuf[len(keyBuf)-int(n):]); err != nil { 127 return nil, nil, err 128 } 129 } else { 130 keyBuf = nil 131 } 132 if n, err = binary.ReadVarint(br); err != nil { 133 return nil, nil, err 134 } 135 if n >= 0 { 136 // Reallocate the slice or extend it if there is enough capacity 137 if valBuf == nil || len(valBuf)+int(n) > cap(valBuf) { 138 newValBuf := make([]byte, len(valBuf)+int(n)) 139 copy(newValBuf, valBuf) 140 valBuf = newValBuf 141 } else { 142 valBuf = valBuf[:len(valBuf)+int(n)] 143 } 144 if _, err = io.ReadFull(r, valBuf[len(valBuf)-int(n):]); err != nil { 145 return nil, nil, err 146 } 147 } else { 148 valBuf = nil 149 } 150 return keyBuf, valBuf, err 151 } 152 153 type memoryDataProvider struct { 154 buffer Buffer 155 currentIndex int 156 } 157 158 func KeepInRAM(buffer Buffer) dataProvider { 159 return &memoryDataProvider{buffer, 0} 160 } 161 162 func (p *memoryDataProvider) Next(keyBuf, valBuf []byte) ([]byte, []byte, error) { 163 if p.currentIndex >= p.buffer.Len() { 164 return nil, nil, io.EOF 165 } 166 key, value := p.buffer.Get(p.currentIndex, keyBuf, valBuf) 167 p.currentIndex++ 168 return key, value, nil 169 } 170 171 func (p *memoryDataProvider) Wait() error { return nil } 172 func (p *memoryDataProvider) Dispose() {} 173 174 func (p *memoryDataProvider) String() string { 175 return fmt.Sprintf("%T(buffer.Len: %d)", p, p.buffer.Len()) 176 }