github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/internal/init/handle_init_prepare.go (about)

     1  // Copyright 2021 The TrueBlocks Authors. All rights reserved.
     2  // Use of this source code is governed by a license that can
     3  // be found in the LICENSE file.
     4  
     5  package initPkg
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"os"
    11  	"sort"
    12  	"strings"
    13  
    14  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/base"
    15  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/colors"
    16  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config"
    17  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/file"
    18  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/index"
    19  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger"
    20  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/manifest"
    21  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/types"
    22  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/walk"
    23  )
    24  
    25  // prepareDownloadList returns a list of chunks that need to be modified in some way. There are three cases:
    26  //
    27  //  1. The chunk is on disc and agrees with the manifest in fileSize, magic number, and hash. In this
    28  //     case, we do nothing.
    29  //  2. The chunk is on disc but does not agree with the manifest for one of the above reasons. In this
    30  //     case, we delete the chunk from disc and add it to the download list.
    31  //  3. The chunk is not on disc. In this case, we add it to the download list.
    32  //
    33  // Note that in some cases, one part of a chunk may be valid while another part is not. For example,
    34  // the index portion of a chunk may be valid, but the bloom filter may not be. In this case, we delete
    35  // the entire chunk from disc and add it to the download list.
    36  //
    37  // If DryRun is true, then we do not delete anything from disc, nor do we add anything to the download list,
    38  // but we do report what would have happened.
    39  //
    40  // Upon return, if a chunk is in the download list, then either its indexHash, its bloomHash, or both contains
    41  // the IPFS hash that needs to be downloaded. Any chunks that are not in the download list are valid and remain
    42  // on disc.
    43  func (opts *InitOptions) prepareDownloadList(chain string, man *manifest.Manifest, blockNums []base.Blknum) ([]types.ChunkRecord, int, int, error) {
    44  	// The list of files on disc that need to be removed because they are invalid in some way or not in the manifest
    45  	deleteMap := make(map[base.FileRange]InitReason, len(man.Chunks))
    46  
    47  	// The list of files in the manifest but not on disc so they need to be downloaded
    48  	downloadMap := make(map[base.FileRange]InitReason, len(man.Chunks))
    49  
    50  	// The list of files that are on disc and later than the latest entry in the manifest. These are
    51  	// okay and should not be deleted.
    52  	afterMap := make(map[base.FileRange]InitReason, len(man.Chunks))
    53  
    54  	// We assume we're going to have download everything...
    55  	for _, chunk := range man.Chunks {
    56  		downloadMap[base.RangeFromRangeString(chunk.Range)] = FILE_MISSING
    57  	}
    58  
    59  	// Visit each chunk on disc. If the chunk belongs and is of the right size and shape, mark it as OKAY,
    60  	// otherwise mark it with its reason for being invalid.
    61  	cleanIndex := func(walker *walk.CacheWalker, path string, first bool) (bool, error) {
    62  		// sanity...
    63  		if path != index.ToBloomPath(path) {
    64  			logger.Fatal("should not happen ==> we're spinning through the bloom filters")
    65  		}
    66  
    67  		// Is the on-disc chunk in the manifest?
    68  		rng := base.RangeFromFilename(path)
    69  		chunk := man.ChunkMap[rng.String()]
    70  
    71  		if chunk != nil {
    72  			// Is it valid?
    73  			bloomStatus, indexStatus, err := isValidChunk(path, chunk.BloomSize, chunk.IndexSize, opts.All)
    74  			if err != nil {
    75  				if bloomStatus != FILE_ERROR && indexStatus != FILE_ERROR {
    76  					logger.Fatal("should not happen ==> implementation error in cleanIndex")
    77  				}
    78  				return false, err // bubble the error up
    79  			}
    80  
    81  			if bloomStatus == OKAY && indexStatus == OKAY {
    82  				// The chunk is valid. We don't need to download it or delete it
    83  				downloadMap[rng] = OKAY
    84  				return true, nil
    85  			} else {
    86  				// one or the other of them is invalid. We need to delete it and download it
    87  				// Note: we don't need to delete it, it will get downloaded and overwritten
    88  				if bloomStatus != OKAY {
    89  					deleteMap[rng] = bloomStatus
    90  					downloadMap[rng] = bloomStatus
    91  				} else {
    92  					deleteMap[rng] = indexStatus
    93  					downloadMap[rng] = indexStatus
    94  				}
    95  			}
    96  
    97  			return true, nil
    98  
    99  		} else {
   100  			lastInManifest := base.FileRange{}
   101  			if len(man.Chunks) > 0 {
   102  				lastChunk := man.Chunks[len(man.Chunks)-1]
   103  				lastInManifest = base.RangeFromRangeString(lastChunk.Range)
   104  			}
   105  
   106  			// The chunk is on disc but not in the manifest. We need to delete it
   107  			// unless it's after the latest chunk in the manifest, in which case
   108  			// the user has presembled scraped it and we should leave it alone.
   109  			if !rng.LaterThan(lastInManifest) {
   110  				deleteMap[rng] = NOT_IN_MANIFEST
   111  			} else {
   112  				afterMap[rng] = AFTER_MANIFEST
   113  			}
   114  			return true, nil
   115  		}
   116  	}
   117  
   118  	walker := walk.NewCacheWalker(
   119  		chain,
   120  		opts.Globals.TestMode,
   121  		10, /* maxTests */
   122  		cleanIndex,
   123  	)
   124  
   125  	if err := walker.WalkBloomFilters(blockNums); err != nil {
   126  		return nil, 0, 0, err
   127  	}
   128  
   129  	nDeleted := 0
   130  	for rng, reason := range deleteMap {
   131  		indexPath := rng.RangeToFilename(chain)
   132  		bloomPath := index.ToBloomPath(indexPath)
   133  		indexExists := file.FileExists(indexPath)
   134  		bloomExists := file.FileExists(bloomPath)
   135  		if !opts.DryRun {
   136  			if indexExists {
   137  				logger.Info("Removing", indexPath)
   138  				if err := os.Remove(indexPath); err != nil {
   139  					return nil, 0, nDeleted, err
   140  				}
   141  				nDeleted++
   142  			}
   143  			if bloomExists {
   144  				logger.Info("Removing", bloomPath)
   145  				if err := os.Remove(bloomPath); err != nil {
   146  					return nil, 0, nDeleted, err
   147  				}
   148  				nDeleted++
   149  			}
   150  		}
   151  		if bloomExists || indexExists {
   152  			opts.reportReason("chunk deleted", reason, rng.String())
   153  		}
   154  	}
   155  
   156  	downloadList := make([]types.ChunkRecord, 0, len(man.ChunkMap))
   157  	nToDownload := 0
   158  	for _, chunk := range man.ChunkMap {
   159  		rng := base.RangeFromRangeString(chunk.Range)
   160  		if downloadMap[rng] == OKAY || rng.Last < opts.FirstBlock {
   161  			continue
   162  		}
   163  		indexPath := rng.RangeToFilename(chain)
   164  		bloomStatus, indexStatus, err := isValidChunk(index.ToBloomPath(indexPath), chunk.BloomSize, chunk.IndexSize, opts.All)
   165  		if err != nil {
   166  			return nil, 0, nDeleted, err
   167  		}
   168  		if bloomStatus == OKAY {
   169  			// if its okay, we don't need to download it
   170  			chunk.BloomHash = ""
   171  			chunk.BloomSize = 0
   172  		} else {
   173  			nToDownload++
   174  		}
   175  		if indexStatus == OKAY {
   176  			// if its okay, we don't need to download it
   177  			chunk.IndexHash = ""
   178  			chunk.IndexSize = 0
   179  		} else {
   180  			nToDownload++
   181  		}
   182  		downloadList = append(downloadList, *chunk)
   183  		opts.reportReason("chunk downloaded", downloadMap[rng], rng.String())
   184  	}
   185  
   186  	for rng, reason := range afterMap {
   187  		opts.reportReason("chunk scraped", reason, rng.String())
   188  	}
   189  
   190  	sort.Slice(downloadList, func(i, j int) bool {
   191  		return downloadList[i].Range > downloadList[j].Range
   192  	})
   193  
   194  	return downloadList, nToDownload, nDeleted, nil
   195  }
   196  
   197  func (opts *InitOptions) reportReason(prefix string, status InitReason, path string) {
   198  	verbose := opts.Globals.Verbose || opts.DryRun
   199  	if !verbose {
   200  		return
   201  	}
   202  
   203  	if status == OKAY || status == AFTER_MANIFEST {
   204  		col := colors.BrightGreen
   205  		rng := base.RangeFromFilename(path)
   206  		msg := fmt.Sprintf("%schunk %s%s %s", col, Reasons[status], colors.Off, rng)
   207  		logger.Info(msg)
   208  	} else {
   209  		col := colors.BrightMagenta
   210  		if status == FILE_ERROR || status == NOT_IN_MANIFEST {
   211  			col = colors.BrightRed
   212  		} else if strings.Contains(path, string(os.PathSeparator) + "blooms" + string(os.PathSeparator)) {
   213  			col = colors.BrightYellow
   214  		}
   215  		rng := base.RangeFromFilename(path)
   216  		msg := fmt.Sprintf("%s%s [%s]%s %s", col, prefix, Reasons[status], colors.Off, rng)
   217  		logger.Warn(msg)
   218  	}
   219  }
   220  
   221  // isValidChunk validates the bloom file's header and the index if told to do so. Note that in all cases, it resolves both.
   222  func isValidChunk(path string, bloomSize, indexSize int64, indexRequired bool) (InitReason, InitReason, error) {
   223  	if path != index.ToBloomPath(path) {
   224  		logger.Fatal("should not happen ==> only process bloom folder paths in isValidChunk")
   225  	}
   226  
   227  	var err error
   228  	indexPath := index.ToIndexPath(path)
   229  
   230  	// Resolve the status of the Bloom file first
   231  	bloom := FILE_MISSING
   232  	if file.FileExists(path) {
   233  		bloom = checkSize(path, bloomSize)
   234  		if bloom == OKAY {
   235  			bloom, err = checkHeader(path)
   236  		}
   237  	}
   238  	// The bloom filter is resolved.
   239  
   240  	// Determine the status of the index (if it exists)
   241  	idx := OKAY
   242  	if !file.FileExists(indexPath) {
   243  		if indexRequired || strings.Contains(indexPath, "000000000-000000000") {
   244  			idx = FILE_MISSING
   245  		}
   246  	} else {
   247  		idx = checkSize(indexPath, indexSize)
   248  		if idx == OKAY {
   249  			idx, err = checkHeader(indexPath)
   250  		}
   251  	}
   252  
   253  	return bloom, idx, err
   254  }
   255  
   256  func checkSize(path string, expected int64) InitReason {
   257  	if !file.FileExists(path) {
   258  		logger.Fatal("should not happen ==> file existence already checked")
   259  	}
   260  
   261  	if file.FileSize(path) != expected {
   262  		return WRONG_SIZE
   263  	}
   264  
   265  	return OKAY
   266  }
   267  
   268  func checkHeader(path string) (InitReason, error) {
   269  	if !file.FileExists(path) {
   270  		logger.Fatal("should not happen ==> file existence already checked")
   271  	}
   272  
   273  	ff, err := os.OpenFile(path, os.O_RDONLY, 0644)
   274  	if err != nil {
   275  		return FILE_ERROR, err
   276  	}
   277  	defer ff.Close()
   278  
   279  	if path == index.ToBloomPath(path) {
   280  		var magic uint16
   281  		err = binary.Read(ff, binary.LittleEndian, &magic)
   282  		if err != nil {
   283  			return FILE_ERROR, err
   284  		}
   285  		if magic != file.SmallMagicNumber {
   286  			return WRONG_MAGIC, nil
   287  		}
   288  
   289  		var hash base.Hash
   290  		err = binary.Read(ff, binary.LittleEndian, &hash)
   291  		if err != nil {
   292  			return FILE_ERROR, err
   293  		}
   294  		if hash != base.BytesToHash(config.HeaderHash(config.ExpectedVersion())) {
   295  			return WRONG_HASH, nil
   296  		}
   297  
   298  		return OKAY, nil
   299  
   300  	} else if path == index.ToIndexPath(path) {
   301  		var magic uint32
   302  		err = binary.Read(ff, binary.LittleEndian, &magic)
   303  		if err != nil {
   304  			return FILE_ERROR, err
   305  		}
   306  		if magic != file.MagicNumber {
   307  			return WRONG_MAGIC, nil
   308  		}
   309  
   310  		var hash base.Hash
   311  		err = binary.Read(ff, binary.LittleEndian, &hash)
   312  		if err != nil {
   313  			return FILE_ERROR, err
   314  		}
   315  		if hash != base.BytesToHash(config.HeaderHash(config.ExpectedVersion())) {
   316  			return WRONG_HASH, nil
   317  		}
   318  
   319  		return OKAY, nil
   320  
   321  	} else {
   322  		logger.Fatal("should not happen ==> unknown type in hasValidHeader")
   323  		return OKAY, nil
   324  	}
   325  }
   326  
   327  type InitReason int
   328  
   329  const (
   330  	OKAY InitReason = iota
   331  	FILE_MISSING
   332  	WRONG_SIZE
   333  	WRONG_MAGIC
   334  	WRONG_HASH
   335  	FILE_ERROR
   336  	NOT_IN_MANIFEST
   337  	AFTER_MANIFEST
   338  )
   339  
   340  var Reasons = map[InitReason]string{
   341  	OKAY:            "okay",
   342  	FILE_ERROR:      "file error",
   343  	FILE_MISSING:    "file missing",
   344  	WRONG_SIZE:      "wrong size",
   345  	WRONG_MAGIC:     "wrong magic number",
   346  	WRONG_HASH:      "wrong header hash",
   347  	NOT_IN_MANIFEST: "not in manifest",
   348  	AFTER_MANIFEST:  "range after manifest",
   349  }