github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/pkg/index/chunk.go (about) 1 // Package index provides tools needed to acquire, read, write and test for set inclusion in an index chunk. 2 // 3 // An index chunk is a data structure with three parts. A FileRange which indicates the first block 4 // and last block of the chunk (inclusive), the Index which carries the list of address appearances 5 // in the given block range, and a Bloom which allows for rapid queries to determine if a given address 6 // appears in the Index without having to read the data from disc. 7 // 8 // The bloom filter returns true or false indicating either that the address MAY appear in the index or 9 // that it definitely does not. (In other words, there are false positives but no false negatives.) 10 // 11 // We do not read the actual data into memory, choosing instead to Seek the data directly from disc. Experimentation 12 // teaches us that this is faster given due to the nature of the data. 13 14 package index 15 16 import ( 17 "io" 18 "os" 19 20 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/base" 21 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config" 22 shell "github.com/ipfs/go-ipfs-api" 23 ) 24 25 // The Chunk data structure consists of three parts. A FileRange, a Index structure, and a Bloom that 26 // carries set membership information for the Index. 27 type Chunk struct { 28 Range base.FileRange 29 Index Index 30 Bloom Bloom 31 } 32 33 // versions returns the version strings found in both the bloom and the index file. 34 func versions(fileName string) (string, string, error) { 35 chunk, err := OpenChunk(fileName, false) 36 if err != nil { 37 return "", "", err 38 } 39 defer chunk.Close() 40 indexVersion := config.VersionTags[chunk.Index.Header.Hash.Hex()] 41 bloomVersion := config.VersionTags[chunk.Bloom.Header.Hash.Hex()] 42 return bloomVersion, indexVersion, nil 43 } 44 45 // OpenChunk returns a fully initialized index chunk. The path argument may point to either a bloom filter file or the 46 // index data file. Either will work. The bloom filter file must exist and will be opened for reading and its header 47 // will be read into memory, but the filter itself is not. The index data file need not exist (it will be downloaded 48 // later if the bloom indicates that its needed). If the index file does exist, however, it will be opened for reading 49 // and its header will be read into memory, but the index data itself will not be. 50 func OpenChunk(path string, check bool) (chunk Chunk, err error) { 51 chunk.Range, err = base.RangeFromFilenameE(path) 52 if err != nil { 53 return 54 } 55 56 chunk.Bloom, err = OpenBloom(ToBloomPath(path), check /* check */) 57 if err != nil { 58 return 59 } 60 61 chunk.Index, err = OpenIndex(ToIndexPath(path), check /* check */) 62 return 63 } 64 65 // Close closes both the bloom filter file pointer and the index data file pointer (if they are open) 66 func (chunk *Chunk) Close() { 67 if chunk.Bloom.File != nil { 68 chunk.Bloom.File.Close() 69 chunk.Bloom.File = nil 70 } 71 72 if chunk.Index.File != nil { 73 chunk.Index.File.Close() 74 chunk.Index.File = nil 75 } 76 } 77 78 // ChunkCid returns IPFS CID for the chunk without uploading it 79 func ChunkCid(path string) (chunkCid string, err error) { 80 file, err := os.OpenFile(path, os.O_RDONLY, 0) 81 if err != nil { 82 return 83 } 84 defer file.Close() 85 return calculateCid(file) 86 } 87 88 func calculateCid(r io.Reader) (chunkCid string, err error) { 89 sh := shell.NewShell(config.GetPinning().LocalPinUrl) 90 return sh.AddNoPin(r) 91 }