github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/internal/init/handle_init_prepare.go (about) 1 // Copyright 2021 The TrueBlocks Authors. All rights reserved. 2 // Use of this source code is governed by a license that can 3 // be found in the LICENSE file. 4 5 package initPkg 6 7 import ( 8 "encoding/binary" 9 "fmt" 10 "os" 11 "sort" 12 "strings" 13 14 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/base" 15 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/colors" 16 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config" 17 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/file" 18 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/index" 19 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger" 20 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/manifest" 21 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/types" 22 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/walk" 23 ) 24 25 // prepareDownloadList returns a list of chunks that need to be modified in some way. There are three cases: 26 // 27 // 1. The chunk is on disc and agrees with the manifest in fileSize, magic number, and hash. In this 28 // case, we do nothing. 29 // 2. The chunk is on disc but does not agree with the manifest for one of the above reasons. In this 30 // case, we delete the chunk from disc and add it to the download list. 31 // 3. The chunk is not on disc. In this case, we add it to the download list. 32 // 33 // Note that in some cases, one part of a chunk may be valid while another part is not. For example, 34 // the index portion of a chunk may be valid, but the bloom filter may not be. In this case, we delete 35 // the entire chunk from disc and add it to the download list. 36 // 37 // If DryRun is true, then we do not delete anything from disc, nor do we add anything to the download list, 38 // but we do report what would have happened. 39 // 40 // Upon return, if a chunk is in the download list, then either its indexHash, its bloomHash, or both contains 41 // the IPFS hash that needs to be downloaded. Any chunks that are not in the download list are valid and remain 42 // on disc. 43 func (opts *InitOptions) prepareDownloadList(chain string, man *manifest.Manifest, blockNums []base.Blknum) ([]types.ChunkRecord, int, int, error) { 44 // The list of files on disc that need to be removed because they are invalid in some way or not in the manifest 45 deleteMap := make(map[base.FileRange]InitReason, len(man.Chunks)) 46 47 // The list of files in the manifest but not on disc so they need to be downloaded 48 downloadMap := make(map[base.FileRange]InitReason, len(man.Chunks)) 49 50 // The list of files that are on disc and later than the latest entry in the manifest. These are 51 // okay and should not be deleted. 52 afterMap := make(map[base.FileRange]InitReason, len(man.Chunks)) 53 54 // We assume we're going to have download everything... 55 for _, chunk := range man.Chunks { 56 downloadMap[base.RangeFromRangeString(chunk.Range)] = FILE_MISSING 57 } 58 59 // Visit each chunk on disc. If the chunk belongs and is of the right size and shape, mark it as OKAY, 60 // otherwise mark it with its reason for being invalid. 61 cleanIndex := func(walker *walk.CacheWalker, path string, first bool) (bool, error) { 62 // sanity... 63 if path != index.ToBloomPath(path) { 64 logger.Fatal("should not happen ==> we're spinning through the bloom filters") 65 } 66 67 // Is the on-disc chunk in the manifest? 68 rng := base.RangeFromFilename(path) 69 chunk := man.ChunkMap[rng.String()] 70 71 if chunk != nil { 72 // Is it valid? 73 bloomStatus, indexStatus, err := isValidChunk(path, chunk.BloomSize, chunk.IndexSize, opts.All) 74 if err != nil { 75 if bloomStatus != FILE_ERROR && indexStatus != FILE_ERROR { 76 logger.Fatal("should not happen ==> implementation error in cleanIndex") 77 } 78 return false, err // bubble the error up 79 } 80 81 if bloomStatus == OKAY && indexStatus == OKAY { 82 // The chunk is valid. We don't need to download it or delete it 83 downloadMap[rng] = OKAY 84 return true, nil 85 } else { 86 // one or the other of them is invalid. We need to delete it and download it 87 // Note: we don't need to delete it, it will get downloaded and overwritten 88 if bloomStatus != OKAY { 89 deleteMap[rng] = bloomStatus 90 downloadMap[rng] = bloomStatus 91 } else { 92 deleteMap[rng] = indexStatus 93 downloadMap[rng] = indexStatus 94 } 95 } 96 97 return true, nil 98 99 } else { 100 lastInManifest := base.FileRange{} 101 if len(man.Chunks) > 0 { 102 lastChunk := man.Chunks[len(man.Chunks)-1] 103 lastInManifest = base.RangeFromRangeString(lastChunk.Range) 104 } 105 106 // The chunk is on disc but not in the manifest. We need to delete it 107 // unless it's after the latest chunk in the manifest, in which case 108 // the user has presembled scraped it and we should leave it alone. 109 if !rng.LaterThan(lastInManifest) { 110 deleteMap[rng] = NOT_IN_MANIFEST 111 } else { 112 afterMap[rng] = AFTER_MANIFEST 113 } 114 return true, nil 115 } 116 } 117 118 walker := walk.NewCacheWalker( 119 chain, 120 opts.Globals.TestMode, 121 10, /* maxTests */ 122 cleanIndex, 123 ) 124 125 if err := walker.WalkBloomFilters(blockNums); err != nil { 126 return nil, 0, 0, err 127 } 128 129 nDeleted := 0 130 for rng, reason := range deleteMap { 131 indexPath := rng.RangeToFilename(chain) 132 bloomPath := index.ToBloomPath(indexPath) 133 indexExists := file.FileExists(indexPath) 134 bloomExists := file.FileExists(bloomPath) 135 if !opts.DryRun { 136 if indexExists { 137 logger.Info("Removing", indexPath) 138 if err := os.Remove(indexPath); err != nil { 139 return nil, 0, nDeleted, err 140 } 141 nDeleted++ 142 } 143 if bloomExists { 144 logger.Info("Removing", bloomPath) 145 if err := os.Remove(bloomPath); err != nil { 146 return nil, 0, nDeleted, err 147 } 148 nDeleted++ 149 } 150 } 151 if bloomExists || indexExists { 152 opts.reportReason("chunk deleted", reason, rng.String()) 153 } 154 } 155 156 downloadList := make([]types.ChunkRecord, 0, len(man.ChunkMap)) 157 nToDownload := 0 158 for _, chunk := range man.ChunkMap { 159 rng := base.RangeFromRangeString(chunk.Range) 160 if downloadMap[rng] == OKAY || rng.Last < opts.FirstBlock { 161 continue 162 } 163 indexPath := rng.RangeToFilename(chain) 164 bloomStatus, indexStatus, err := isValidChunk(index.ToBloomPath(indexPath), chunk.BloomSize, chunk.IndexSize, opts.All) 165 if err != nil { 166 return nil, 0, nDeleted, err 167 } 168 if bloomStatus == OKAY { 169 // if its okay, we don't need to download it 170 chunk.BloomHash = "" 171 chunk.BloomSize = 0 172 } else { 173 nToDownload++ 174 } 175 if indexStatus == OKAY { 176 // if its okay, we don't need to download it 177 chunk.IndexHash = "" 178 chunk.IndexSize = 0 179 } else { 180 nToDownload++ 181 } 182 downloadList = append(downloadList, *chunk) 183 opts.reportReason("chunk downloaded", downloadMap[rng], rng.String()) 184 } 185 186 for rng, reason := range afterMap { 187 opts.reportReason("chunk scraped", reason, rng.String()) 188 } 189 190 sort.Slice(downloadList, func(i, j int) bool { 191 return downloadList[i].Range > downloadList[j].Range 192 }) 193 194 return downloadList, nToDownload, nDeleted, nil 195 } 196 197 func (opts *InitOptions) reportReason(prefix string, status InitReason, path string) { 198 verbose := opts.Globals.Verbose || opts.DryRun 199 if !verbose { 200 return 201 } 202 203 if status == OKAY || status == AFTER_MANIFEST { 204 col := colors.BrightGreen 205 rng := base.RangeFromFilename(path) 206 msg := fmt.Sprintf("%schunk %s%s %s", col, Reasons[status], colors.Off, rng) 207 logger.Info(msg) 208 } else { 209 col := colors.BrightMagenta 210 if status == FILE_ERROR || status == NOT_IN_MANIFEST { 211 col = colors.BrightRed 212 } else if strings.Contains(path, string(os.PathSeparator) + "blooms" + string(os.PathSeparator)) { 213 col = colors.BrightYellow 214 } 215 rng := base.RangeFromFilename(path) 216 msg := fmt.Sprintf("%s%s [%s]%s %s", col, prefix, Reasons[status], colors.Off, rng) 217 logger.Warn(msg) 218 } 219 } 220 221 // isValidChunk validates the bloom file's header and the index if told to do so. Note that in all cases, it resolves both. 222 func isValidChunk(path string, bloomSize, indexSize int64, indexRequired bool) (InitReason, InitReason, error) { 223 if path != index.ToBloomPath(path) { 224 logger.Fatal("should not happen ==> only process bloom folder paths in isValidChunk") 225 } 226 227 var err error 228 indexPath := index.ToIndexPath(path) 229 230 // Resolve the status of the Bloom file first 231 bloom := FILE_MISSING 232 if file.FileExists(path) { 233 bloom = checkSize(path, bloomSize) 234 if bloom == OKAY { 235 bloom, err = checkHeader(path) 236 } 237 } 238 // The bloom filter is resolved. 239 240 // Determine the status of the index (if it exists) 241 idx := OKAY 242 if !file.FileExists(indexPath) { 243 if indexRequired || strings.Contains(indexPath, "000000000-000000000") { 244 idx = FILE_MISSING 245 } 246 } else { 247 idx = checkSize(indexPath, indexSize) 248 if idx == OKAY { 249 idx, err = checkHeader(indexPath) 250 } 251 } 252 253 return bloom, idx, err 254 } 255 256 func checkSize(path string, expected int64) InitReason { 257 if !file.FileExists(path) { 258 logger.Fatal("should not happen ==> file existence already checked") 259 } 260 261 if file.FileSize(path) != expected { 262 return WRONG_SIZE 263 } 264 265 return OKAY 266 } 267 268 func checkHeader(path string) (InitReason, error) { 269 if !file.FileExists(path) { 270 logger.Fatal("should not happen ==> file existence already checked") 271 } 272 273 ff, err := os.OpenFile(path, os.O_RDONLY, 0644) 274 if err != nil { 275 return FILE_ERROR, err 276 } 277 defer ff.Close() 278 279 if path == index.ToBloomPath(path) { 280 var magic uint16 281 err = binary.Read(ff, binary.LittleEndian, &magic) 282 if err != nil { 283 return FILE_ERROR, err 284 } 285 if magic != file.SmallMagicNumber { 286 return WRONG_MAGIC, nil 287 } 288 289 var hash base.Hash 290 err = binary.Read(ff, binary.LittleEndian, &hash) 291 if err != nil { 292 return FILE_ERROR, err 293 } 294 if hash != base.BytesToHash(config.HeaderHash(config.ExpectedVersion())) { 295 return WRONG_HASH, nil 296 } 297 298 return OKAY, nil 299 300 } else if path == index.ToIndexPath(path) { 301 var magic uint32 302 err = binary.Read(ff, binary.LittleEndian, &magic) 303 if err != nil { 304 return FILE_ERROR, err 305 } 306 if magic != file.MagicNumber { 307 return WRONG_MAGIC, nil 308 } 309 310 var hash base.Hash 311 err = binary.Read(ff, binary.LittleEndian, &hash) 312 if err != nil { 313 return FILE_ERROR, err 314 } 315 if hash != base.BytesToHash(config.HeaderHash(config.ExpectedVersion())) { 316 return WRONG_HASH, nil 317 } 318 319 return OKAY, nil 320 321 } else { 322 logger.Fatal("should not happen ==> unknown type in hasValidHeader") 323 return OKAY, nil 324 } 325 } 326 327 type InitReason int 328 329 const ( 330 OKAY InitReason = iota 331 FILE_MISSING 332 WRONG_SIZE 333 WRONG_MAGIC 334 WRONG_HASH 335 FILE_ERROR 336 NOT_IN_MANIFEST 337 AFTER_MANIFEST 338 ) 339 340 var Reasons = map[InitReason]string{ 341 OKAY: "okay", 342 FILE_ERROR: "file error", 343 FILE_MISSING: "file missing", 344 WRONG_SIZE: "wrong size", 345 WRONG_MAGIC: "wrong magic number", 346 WRONG_HASH: "wrong header hash", 347 NOT_IN_MANIFEST: "not in manifest", 348 AFTER_MANIFEST: "range after manifest", 349 }