code.vegaprotocol.io/vega@v0.79.0/datanode/networkhistory/store/store.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package store
    17  
    18  import (
    19  	"archive/zip"
    20  	"context"
    21  	"encoding/hex"
    22  	"encoding/json"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"path/filepath"
    28  	"sort"
    29  	"strconv"
    30  	"strings"
    31  	"time"
    32  
    33  	"code.vegaprotocol.io/vega/datanode/metrics"
    34  	"code.vegaprotocol.io/vega/datanode/networkhistory/segment"
    35  	"code.vegaprotocol.io/vega/libs/memory"
    36  	"code.vegaprotocol.io/vega/logging"
    37  
    38  	"github.com/dustin/go-humanize"
    39  	icore "github.com/ipfs/boxo/coreiface"
    40  	"github.com/ipfs/go-cid"
    41  	files "github.com/ipfs/go-libipfs/files"
    42  	ipfslogging "github.com/ipfs/go-log"
    43  	"github.com/ipfs/interface-go-ipfs-core/path"
    44  	"github.com/ipfs/kubo/config"
    45  	serialize "github.com/ipfs/kubo/config/serialize"
    46  	"github.com/ipfs/kubo/core"
    47  	"github.com/ipfs/kubo/core/bootstrap"
    48  	"github.com/ipfs/kubo/core/coreapi"
    49  	"github.com/ipfs/kubo/core/corehttp"
    50  	"github.com/ipfs/kubo/core/corerepo"
    51  	"github.com/ipfs/kubo/core/node/libp2p"
    52  	"github.com/ipfs/kubo/core/node/libp2p/fd"
    53  	"github.com/ipfs/kubo/plugin/loader"
    54  	"github.com/ipfs/kubo/repo"
    55  	"github.com/ipfs/kubo/repo/fsrepo"
    56  	"github.com/libp2p/go-libp2p/core/peer"
    57  	ma "github.com/multiformats/go-multiaddr"
    58  	"github.com/prometheus/client_golang/prometheus"
    59  )
    60  
    61  const segmentMetaDataFile = "metadata.json"
    62  
    63  var ErrSegmentNotFound = errors.New("segment not found")
    64  
    65  type index interface {
    66  	Get(height int64) (segment.Full, error)
    67  	Add(metaData segment.Full) error
    68  	Remove(indexEntry segment.Full) error
    69  	ListAllEntriesOldestFirst() (segment.Segments[segment.Full], error)
    70  	ListAllEntriesMostRecentFirst() (segment.Segments[segment.Full], error)
    71  	GetHighestBlockHeightEntry() (segment.Full, error)
    72  	Close() error
    73  }
    74  
    75  type IpfsNode struct {
    76  	IpfsId peer.ID
    77  	Addr   ma.Multiaddr
    78  }
    79  
    80  func (i IpfsNode) IpfsAddress() (ma.Multiaddr, error) {
    81  	ipfsProtocol, err := ma.NewMultiaddr(fmt.Sprintf("/p2p/%s", i.IpfsId))
    82  	if err != nil {
    83  		return nil, fmt.Errorf("failed to create new p2p multi address: %w", err)
    84  	}
    85  
    86  	return i.Addr.Encapsulate(ipfsProtocol), nil
    87  }
    88  
    89  type PeerConnection struct {
    90  	Local  IpfsNode
    91  	Remote IpfsNode
    92  }
    93  
    94  type Store struct {
    95  	log          *logging.Logger
    96  	cfg          Config
    97  	identity     config.Identity
    98  	ipfsAPI      icore.CoreAPI
    99  	ipfsNode     *core.IpfsNode
   100  	ipfsRepo     repo.Repo
   101  	index        index
   102  	swarmKeySeed string
   103  	swarmKey     string
   104  	lastGC       time.Time
   105  
   106  	indexPath  string
   107  	stagingDir string
   108  	ipfsPath   string
   109  }
   110  
   111  // This global var is to prevent IPFS plugins being loaded twice because IPFS uses a dependency injection framework that
   112  // has global state which results in an error if ipfs plugins are loaded twice.  In practice this is currently only an
   113  // issue when running tests as we only have one IPFS node instance when running datanode.
   114  var plugins *loader.PluginLoader
   115  
   116  func New(ctx context.Context, log *logging.Logger, chainID string, cfg Config, networkHistoryHome string, maxMemoryPercent uint8,
   117  ) (*Store, error) {
   118  	if log.IsDebug() {
   119  		ipfslogging.SetDebugLogging()
   120  	}
   121  
   122  	storePath := filepath.Join(networkHistoryHome, "store")
   123  
   124  	p := &Store{
   125  		log:        log.Named("store"),
   126  		cfg:        cfg,
   127  		indexPath:  filepath.Join(storePath, "index"),
   128  		stagingDir: filepath.Join(storePath, "staging"),
   129  		ipfsPath:   filepath.Join(storePath, "ipfs"),
   130  	}
   131  
   132  	err := os.MkdirAll(p.indexPath, os.ModePerm)
   133  	if err != nil {
   134  		return nil, fmt.Errorf("failed to create index path:%w", err)
   135  	}
   136  
   137  	err = os.MkdirAll(p.stagingDir, os.ModePerm)
   138  	if err != nil {
   139  		return nil, fmt.Errorf("failed to create staging path:%w", err)
   140  	}
   141  
   142  	idxLog := log.With(logging.String("component", "index"))
   143  	p.index, err = NewIndex(p.indexPath, idxLog)
   144  	if err != nil {
   145  		return nil, fmt.Errorf("failed to create index:%w", err)
   146  	}
   147  
   148  	if len(chainID) == 0 {
   149  		return nil, fmt.Errorf("chain ID must be set")
   150  	}
   151  
   152  	if len(cfg.PeerID) == 0 || len(cfg.PrivKey) == 0 {
   153  		return nil, fmt.Errorf("the ipfs peer id and priv key must be set")
   154  	}
   155  
   156  	p.identity = config.Identity{
   157  		PeerID:  cfg.PeerID,
   158  		PrivKey: cfg.PrivKey,
   159  	}
   160  
   161  	p.log.Infof("starting network history store with ipfs Peer Id:%s", p.identity.PeerID)
   162  
   163  	if plugins == nil {
   164  		plugins, err = loadPlugins(p.ipfsPath)
   165  		if err != nil {
   166  			return nil, fmt.Errorf("failed to load ipfs plugins:%w", err)
   167  		}
   168  	}
   169  
   170  	p.log.Debugf("ipfs swarm port:%d", cfg.SwarmPort)
   171  	ipfsCfg, err := createIpfsNodeConfiguration(p.log, p.identity, cfg.SwarmPort)
   172  
   173  	p.log.Debugf("ipfs bootstrap peers:%v", ipfsCfg.Bootstrap)
   174  
   175  	if err != nil {
   176  		return nil, fmt.Errorf("failed to create ipfs node configuration:%w", err)
   177  	}
   178  
   179  	p.swarmKeySeed = cfg.GetSwarmKeySeed(log, chainID)
   180  
   181  	p.ipfsNode, p.ipfsRepo, p.swarmKey, err = createIpfsNode(ctx, log, p.ipfsPath, ipfsCfg, p.swarmKeySeed, maxMemoryPercent)
   182  	if err != nil {
   183  		return nil, fmt.Errorf("failed to create ipfs node:%w", err)
   184  	}
   185  
   186  	if p.ipfsNode.PNetFingerprint != nil {
   187  		log.Infof("Swarm is limited to private network of peers with the fingerprint %x", p.ipfsNode.PNetFingerprint)
   188  	}
   189  
   190  	p.ipfsAPI, err = coreapi.NewCoreAPI(p.ipfsNode)
   191  	if err != nil {
   192  		return nil, fmt.Errorf("failed to create ipfs api:%w", err)
   193  	}
   194  
   195  	peers, err := config.ParseBootstrapPeers(cfg.BootstrapPeers)
   196  	if err != nil {
   197  		return nil, fmt.Errorf("failed to parse bootstrap peers: %w", err)
   198  	}
   199  
   200  	if err = p.ipfsNode.Bootstrap(bootstrap.BootstrapConfigWithPeers(peers)); err != nil {
   201  		return nil, fmt.Errorf("failed to bootstrap peers: %w", err)
   202  	}
   203  
   204  	if err = setupMetrics(p.ipfsNode); err != nil {
   205  		return nil, fmt.Errorf("failed to setup metrics: %w", err)
   206  	}
   207  
   208  	return p, nil
   209  }
   210  
   211  func (p *Store) Stop() {
   212  	p.log.Info("Cleaning up network history store")
   213  	if p.ipfsNode != nil {
   214  		p.log.Info("Closing IPFS node")
   215  		if err := p.ipfsNode.Close(); err != nil {
   216  			p.log.Error("Failed to close IPFS node", logging.Error(err))
   217  		}
   218  	}
   219  
   220  	if p.index != nil {
   221  		if err := p.index.Close(); err != nil {
   222  			p.log.Error("Failed to close LevelDB:%s", logging.Error(err))
   223  		}
   224  		p.log.Info("LevelDB closed")
   225  	}
   226  }
   227  
   228  func (p *Store) GetSwarmKey() string {
   229  	return p.swarmKey
   230  }
   231  
   232  func (p *Store) GetSwarmKeySeed() string {
   233  	return p.swarmKeySeed
   234  }
   235  
   236  func (p *Store) GetLocalNode() (IpfsNode, error) {
   237  	localNodeMultiAddress, err := ma.NewMultiaddr(fmt.Sprintf("/ip4/0.0.0.0/tcp/%d", p.cfg.SwarmPort))
   238  	if err != nil {
   239  		return IpfsNode{}, fmt.Errorf("failed to create default multi addr: %w", err)
   240  	}
   241  
   242  	localNode := IpfsNode{
   243  		IpfsId: p.ipfsNode.PeerHost.Network().LocalPeer(),
   244  		Addr:   localNodeMultiAddress,
   245  	}
   246  
   247  	connectedPeers := p.GetConnectedPeers()
   248  	if err != nil {
   249  		return IpfsNode{}, fmt.Errorf("failed to get connected peers: %w", err)
   250  	}
   251  
   252  	tcpProtocol := ma.ProtocolWithName("tcp")
   253  	for _, cp := range connectedPeers {
   254  		port, err := cp.Local.Addr.ValueForProtocol(tcpProtocol.Code)
   255  		if err == nil {
   256  			if port == strconv.Itoa(p.cfg.SwarmPort) {
   257  				localNode.Addr = cp.Local.Addr
   258  				break
   259  			}
   260  		}
   261  	}
   262  
   263  	return localNode, nil
   264  }
   265  
   266  func (p *Store) GetConnectedPeers() []PeerConnection {
   267  	peerConnections := make([]PeerConnection, 0, 10)
   268  
   269  	thisNode := p.ipfsNode.PeerHost.Network().LocalPeer()
   270  	peers := p.ipfsNode.PeerHost.Network().Peers()
   271  
   272  	for _, peer := range peers {
   273  		if peer == thisNode {
   274  			continue
   275  		}
   276  
   277  		connections := p.ipfsNode.PeerHost.Network().ConnsToPeer(peer)
   278  		for _, conn := range connections {
   279  			peerConnections = append(peerConnections, PeerConnection{
   280  				Local: IpfsNode{
   281  					IpfsId: conn.LocalPeer(),
   282  					Addr:   conn.LocalMultiaddr(),
   283  				},
   284  				Remote: IpfsNode{
   285  					IpfsId: conn.RemotePeer(),
   286  					Addr:   conn.RemoteMultiaddr(),
   287  				},
   288  			})
   289  		}
   290  	}
   291  
   292  	return peerConnections
   293  }
   294  
   295  func (p *Store) ResetIndex() error {
   296  	err := os.RemoveAll(p.indexPath)
   297  	if err != nil {
   298  		return fmt.Errorf("failed to remove index path:%w", err)
   299  	}
   300  
   301  	err = os.MkdirAll(p.indexPath, os.ModePerm)
   302  	if err != nil {
   303  		return fmt.Errorf("failed to create index path:%w", err)
   304  	}
   305  
   306  	idxLog := p.log.With(logging.String("component", "index"))
   307  	p.index, err = NewIndex(p.indexPath, idxLog)
   308  	if err != nil {
   309  		return fmt.Errorf("failed to create index:%w", err)
   310  	}
   311  
   312  	return nil
   313  }
   314  
   315  func (p *Store) GetPeerID() string {
   316  	return p.identity.PeerID
   317  }
   318  
   319  func (p *Store) ConnectedToPeer(peerIDStr string) (bool, error) {
   320  	p.ipfsNode.PeerHost.Network().Conns()
   321  
   322  	for _, pr := range p.ipfsNode.PeerHost.Network().Peers() {
   323  		if pr.String() == peerIDStr {
   324  			return true, nil
   325  		}
   326  	}
   327  	return false, nil
   328  }
   329  
   330  func (p *Store) AddSnapshotData(ctx context.Context, s segment.Unpublished) (err error) {
   331  	historyID := fmt.Sprintf("%s-%d-%d", s.ChainID, s.HeightFrom, s.HeightTo)
   332  
   333  	p.log.Infof("adding history %s", historyID)
   334  
   335  	defer func() {
   336  		_ = os.RemoveAll(s.UnpublishedSnapshotDataDirectory())
   337  	}()
   338  
   339  	previousHistorySegmentID, err := p.GetPreviousHistorySegmentID(s.HeightFrom)
   340  	if err != nil {
   341  		if !errors.Is(err, ErrSegmentNotFound) {
   342  			return fmt.Errorf("failed to get previous history segment id:%w", err)
   343  		}
   344  	}
   345  
   346  	metaData := segment.MetaData{
   347  		Base:                     s.Base,
   348  		PreviousHistorySegmentID: previousHistorySegmentID,
   349  	}
   350  
   351  	contentID, err := p.addHistorySegment(ctx, s.UnpublishedSnapshotDataDirectory(), metaData)
   352  	if err != nil {
   353  		return fmt.Errorf("failed to add file:%w", err)
   354  	}
   355  
   356  	p.log.Info("finished adding history to network history store",
   357  		logging.String("history segment id", contentID.String()),
   358  		logging.String("chain id", s.ChainID),
   359  		logging.Int64("from height", s.HeightFrom),
   360  		logging.Int64("to height", s.HeightTo),
   361  		logging.String("previous history segment id", previousHistorySegmentID),
   362  	)
   363  
   364  	if time.Now().After(p.lastGC.Add(p.cfg.GarbageCollectionInterval.Duration)) {
   365  		if err := p.CollectGarbage(ctx); err != nil {
   366  			return err
   367  		}
   368  	}
   369  
   370  	ipfsSize, err := p.ipfsRepo.GetStorageUsage(ctx)
   371  	if err != nil {
   372  		return fmt.Errorf("failed to get the ipfs storage usage: %w", err)
   373  	}
   374  	metrics.SetNetworkHistoryIpfsStoreBytes(float64(ipfsSize))
   375  
   376  	return nil
   377  }
   378  
   379  func (p *Store) CollectGarbage(ctx context.Context) (err error) {
   380  	p.lastGC = time.Now()
   381  	p.log.Debug("AddSnapshotData: removing old history segments")
   382  	segments, err := p.garbageCollectOldHistorySegments(ctx)
   383  	if err != nil {
   384  		return fmt.Errorf("failed to remove old history segments:%w", err)
   385  	}
   386  	p.log.Infof("removed %d old history segments", len(segments))
   387  	return nil
   388  }
   389  
   390  func (p *Store) GetHighestBlockHeightEntry() (segment.Full, error) {
   391  	entry, err := p.index.GetHighestBlockHeightEntry()
   392  	if err != nil {
   393  		if errors.Is(err, ErrIndexEntryNotFound) {
   394  			return segment.Full{}, ErrSegmentNotFound
   395  		}
   396  
   397  		return segment.Full{}, fmt.Errorf("failed to get highest block height entry from index:%w", err)
   398  	}
   399  
   400  	return entry, nil
   401  }
   402  
   403  func (p *Store) ListAllIndexEntriesOldestFirst() (segment.Segments[segment.Full], error) {
   404  	return p.index.ListAllEntriesOldestFirst()
   405  }
   406  
   407  func (p *Store) ListAllIndexEntriesMostRecentFirst() (segment.Segments[segment.Full], error) {
   408  	return p.index.ListAllEntriesMostRecentFirst()
   409  }
   410  
   411  func setupMetrics(ipfsNode *core.IpfsNode) error {
   412  	err := prometheus.Register(&corehttp.IpfsNodeCollector{Node: ipfsNode})
   413  	if err != nil {
   414  		if _, ok := err.(prometheus.AlreadyRegisteredError); !ok {
   415  			return fmt.Errorf("failed to initialise IPFS metrics:%w", err)
   416  		}
   417  	}
   418  
   419  	return nil
   420  }
   421  
   422  func (p *Store) GetPreviousHistorySegmentID(fromHeight int64) (string, error) {
   423  	var err error
   424  	var previousHistorySegment segment.Full
   425  	if fromHeight > 0 {
   426  		height := fromHeight - 1
   427  		previousHistorySegment, err = p.index.Get(height)
   428  		if errors.Is(err, ErrIndexEntryNotFound) {
   429  			return "", ErrSegmentNotFound
   430  		}
   431  
   432  		if err != nil {
   433  			return "", fmt.Errorf("failed to get index entry for height:%w", err)
   434  		}
   435  	}
   436  	return previousHistorySegment.HistorySegmentID, nil
   437  }
   438  
   439  func (p *Store) addHistorySegment(ctx context.Context, zipFilePath string, metadata segment.MetaData) (cid.Cid, error) {
   440  	newZipFile, err := p.zipSegmentDataWithMetadata(zipFilePath, metadata)
   441  	defer os.Remove(newZipFile)
   442  	if err != nil {
   443  		return cid.Cid{}, fmt.Errorf("rewriting zip to include metadata:%w", err)
   444  	}
   445  
   446  	contentID, err := p.addFileToIpfs(ctx, newZipFile)
   447  	if err != nil {
   448  		return cid.Cid{}, fmt.Errorf("failed to add history segement %s to ipfs:%w", zipFilePath, err)
   449  	}
   450  
   451  	if err = p.index.Add(segment.Full{
   452  		MetaData:         metadata,
   453  		HistorySegmentID: contentID.String(),
   454  	}); err != nil {
   455  		return cid.Cid{}, fmt.Errorf("failed to update meta data store:%w", err)
   456  	}
   457  	return contentID, nil
   458  }
   459  
   460  func (p *Store) zipSegmentDataWithMetadata(segmentDataDir string, metadata segment.MetaData) (string, error) {
   461  	// Create a temporary zip file for including the metadata JSON file
   462  	tmpfile, err := os.CreateTemp("", metadata.ZipFileName())
   463  	if err != nil {
   464  		return "", fmt.Errorf("failed add history segment; unable to create temp file:%w", err)
   465  	}
   466  
   467  	defer tmpfile.Close()
   468  
   469  	zipWriter := zip.NewWriter(tmpfile)
   470  	defer zipWriter.Close()
   471  
   472  	metaDataBytes, err := json.Marshal(metadata)
   473  	if err != nil {
   474  		return "", fmt.Errorf("failed to marshal meta data:%w", err)
   475  	}
   476  
   477  	metaDataWriter, err := zipWriter.Create("metadata.json")
   478  	if err != nil {
   479  		return "", fmt.Errorf("failed to create metadata.json:%w", err)
   480  	}
   481  
   482  	_, err = metaDataWriter.Write(metaDataBytes)
   483  	if err != nil {
   484  		return "", fmt.Errorf("failed to write metadata.json:%w", err)
   485  	}
   486  
   487  	zipSegmentData(segmentDataDir, zipWriter)
   488  
   489  	return tmpfile.Name(), nil
   490  }
   491  
   492  func zipSegmentData(segmentDataDir string, zipWriter *zip.Writer) error {
   493  	// See comment below about why we use this time
   494  	modifiedTime, err := time.Parse(time.DateTime, "1979-11-30 00:00:00")
   495  	if err != nil {
   496  		return err
   497  	}
   498  
   499  	return filepath.Walk(segmentDataDir, func(path string, info os.FileInfo, _ error) error {
   500  		if info.IsDir() {
   501  			return nil
   502  		}
   503  
   504  		zipBasePath, err := filepath.Rel(segmentDataDir, path)
   505  		if err != nil {
   506  			return err
   507  		}
   508  
   509  		// The previous method of rewriting the zip file to include the metadata.json resulted in the Modified and
   510  		// ModifiedDate header fields being set to the values below due to code in the archive/zip/Reader.go and
   511  		// archive/zip/Writer.go which relates to legacy ms dos fields.	To ensure that segments have the same IPFS
   512  		// content ID as they would have had when the zip was rewritten it is necessary to set these fields on the
   513  		// zip headers created directly from the uncompressed files.
   514  		header := zip.FileHeader{
   515  			Name:         zipBasePath,
   516  			Method:       zip.Deflate,
   517  			Modified:     modifiedTime,
   518  			ModifiedDate: 65406,
   519  		}
   520  		fw, err := zipWriter.CreateHeader(&header)
   521  		if err != nil {
   522  			return fmt.Errorf("error creating file header: %w", err)
   523  		}
   524  
   525  		file, err := os.Open(path)
   526  		if err != nil {
   527  			return fmt.Errorf("failed to open segment data file: %w", err)
   528  		}
   529  		defer file.Close()
   530  
   531  		_, err = io.Copy(fw, file)
   532  		if err != nil {
   533  			return err
   534  		}
   535  
   536  		return nil
   537  	})
   538  }
   539  
   540  func (p *Store) GetSegmentForHeight(toHeight int64) (segment.Full, error) {
   541  	return p.index.Get(toHeight)
   542  }
   543  
   544  func (p *Store) GetHistorySegmentReader(ctx context.Context, historySegmentID string) (io.ReadSeekCloser, int64, error) {
   545  	ipfsCid, err := cid.Parse(historySegmentID)
   546  	if err != nil {
   547  		return nil, 0, fmt.Errorf("failed to parse history segment id:%w", err)
   548  	}
   549  
   550  	ipfsFile, err := p.ipfsAPI.Unixfs().Get(ctx, path.IpfsPath(ipfsCid))
   551  	if err != nil {
   552  		return nil, 0, fmt.Errorf("failed to get ipfs file:%w", err)
   553  	}
   554  
   555  	fileSize, err := ipfsFile.Size()
   556  	if err != nil {
   557  		return nil, 0, fmt.Errorf("failed to get ipfs file size:%w", err)
   558  	}
   559  
   560  	return files.ToFile(ipfsFile), fileSize, nil
   561  }
   562  
   563  func (p *Store) CopyHistorySegmentToFile(ctx context.Context, historySegmentID string, targetFile string) error {
   564  	ipfsCid, err := cid.Parse(historySegmentID)
   565  	if err != nil {
   566  		return fmt.Errorf("failed to parse history segment id:%w", err)
   567  	}
   568  
   569  	ipfsFile, err := p.ipfsAPI.Unixfs().Get(ctx, path.IpfsPath(ipfsCid))
   570  	if err != nil {
   571  		return fmt.Errorf("failed to get ipfs file:%w", err)
   572  	}
   573  
   574  	if err = files.WriteTo(ipfsFile, targetFile); err != nil {
   575  		return fmt.Errorf("failed to write to staging file:%w", err)
   576  	}
   577  	return nil
   578  }
   579  
   580  func (p *Store) garbageCollectOldHistorySegments(ctx context.Context) ([]segment.Full, error) {
   581  	latestSegment, err := p.index.GetHighestBlockHeightEntry()
   582  	if err != nil {
   583  		return nil, fmt.Errorf("failed to get latest segment:%w", err)
   584  	}
   585  
   586  	entries, err := p.index.ListAllEntriesOldestFirst()
   587  	if err != nil {
   588  		return nil, fmt.Errorf("failed to list all entries:%w", err)
   589  	}
   590  
   591  	var segmentsToRemove []segment.Full
   592  	for _, segment := range entries {
   593  		if segment.HeightTo < (latestSegment.HeightTo - p.cfg.HistoryRetentionBlockSpan) {
   594  			segmentsToRemove = append(segmentsToRemove, segment)
   595  		} else {
   596  			break
   597  		}
   598  	}
   599  
   600  	if err = p.RemoveSegments(ctx, segmentsToRemove); err != nil {
   601  		return nil, fmt.Errorf("failed to remove segments: %w", err)
   602  	}
   603  
   604  	if len(segmentsToRemove) > 0 {
   605  		// The GarbageCollect method is async
   606  		err = corerepo.GarbageCollect(p.ipfsNode, ctx)
   607  
   608  		// Do not want to return before the GC is done as adding new data to the node whilst GC is running is not permitted
   609  		unlocker := p.ipfsNode.GCLocker.GCLock(ctx)
   610  		defer unlocker.Unlock(ctx)
   611  
   612  		if err != nil {
   613  			return nil, fmt.Errorf("failed to garbage collect ipfs repo")
   614  		}
   615  	}
   616  
   617  	return segmentsToRemove, nil
   618  }
   619  
   620  func (p *Store) RemoveSegments(ctx context.Context, segmentsToRemove []segment.Full) error {
   621  	for _, segment := range segmentsToRemove {
   622  		err := p.unpinSegment(ctx, segment)
   623  		if err != nil {
   624  			return fmt.Errorf("failed to unpin segment:%w", err)
   625  		}
   626  
   627  		err = p.index.Remove(segment)
   628  		if err != nil {
   629  			return fmt.Errorf("failed to remove segment from index: %w", err)
   630  		}
   631  	}
   632  	return nil
   633  }
   634  
   635  func (p *Store) FetchHistorySegment(ctx context.Context, historySegmentID string) (segment.Full, error) {
   636  	// We don't know what the filename is yet as that gets lost in IPFS - so we just use a generic name
   637  	// until we peek at the metadata.json file inside to figure out the proper name and rename it.
   638  
   639  	historySegment := filepath.Join(p.stagingDir, "segment.zip")
   640  
   641  	err := os.RemoveAll(historySegment)
   642  	if err != nil {
   643  		return segment.Full{}, fmt.Errorf("failed to remove existing history segment zip: %w", err)
   644  	}
   645  
   646  	contentID, err := cid.Parse(historySegmentID)
   647  	if err != nil {
   648  		return segment.Full{}, fmt.Errorf("failed to parse snapshotId into CID:%w", err)
   649  	}
   650  
   651  	rootNodeFile, err := p.ipfsAPI.Unixfs().Get(ctx, path.IpfsPath(contentID))
   652  	if err != nil {
   653  		connInfo, swarmError := p.ipfsAPI.Swarm().Peers(ctx)
   654  		if swarmError != nil {
   655  			return segment.Full{}, fmt.Errorf("failed to get peers: %w", err)
   656  		}
   657  
   658  		peerAddrs := ""
   659  		for _, peer := range connInfo {
   660  			peerAddrs += fmt.Sprintf(",%s", peer.Address())
   661  		}
   662  
   663  		return segment.Full{}, fmt.Errorf("could not get file with CID, connected peer addresses %s: %w", peerAddrs, err)
   664  	}
   665  
   666  	err = files.WriteTo(rootNodeFile, historySegment)
   667  	if err != nil {
   668  		// check if the file exists and if so, remove it
   669  		_, statErr := os.Stat(historySegment)
   670  		if statErr == nil {
   671  			remErr := os.Remove(historySegment)
   672  			if remErr != nil {
   673  				return segment.Full{}, fmt.Errorf("could not write out the fetched history segment: %w, and could not remove existing history segment: %v", err, remErr)
   674  			}
   675  		}
   676  
   677  		return segment.Full{}, fmt.Errorf("could not write out the fetched history segment: %w", err)
   678  	}
   679  
   680  	zipReader, err := zip.OpenReader(historySegment)
   681  	if err != nil {
   682  		return segment.Full{}, fmt.Errorf("failed to open history segment: %w", err)
   683  	}
   684  	defer func() { _ = zipReader.Close() }()
   685  
   686  	metaFile, err := zipReader.Open(segmentMetaDataFile)
   687  	if err != nil {
   688  		return segment.Full{}, fmt.Errorf("failed to open history segment metadata file: %w", err)
   689  	}
   690  
   691  	metaBytes, err := io.ReadAll(metaFile)
   692  	if err != nil {
   693  		return segment.Full{}, fmt.Errorf("failed to read index entry:%w", err)
   694  	}
   695  
   696  	var metaData segment.MetaData
   697  	if err = json.Unmarshal(metaBytes, &metaData); err != nil {
   698  		return segment.Full{}, fmt.Errorf("failed to unmarshal index entry:%w", err)
   699  	}
   700  
   701  	renamedSegmentPath := filepath.Join(p.stagingDir, metaData.ZipFileName())
   702  	err = os.Rename(historySegment, renamedSegmentPath)
   703  	if err != nil {
   704  		return segment.Full{}, fmt.Errorf("failed to rename history segment: %w", err)
   705  	}
   706  
   707  	indexEntry := segment.Full{
   708  		MetaData:         metaData,
   709  		HistorySegmentID: historySegmentID,
   710  	}
   711  
   712  	err = p.ipfsAPI.Pin().Add(ctx, path.IpfsPath(contentID))
   713  	if err != nil {
   714  		return segment.Full{}, fmt.Errorf("failed to pin fetched segment: %w", err)
   715  	}
   716  
   717  	if err = p.index.Add(indexEntry); err != nil {
   718  		return segment.Full{}, fmt.Errorf("failed to add index entry:%w", err)
   719  	}
   720  
   721  	return indexEntry, nil
   722  }
   723  
   724  func (p *Store) StagedSegment(ctx context.Context, s segment.Full) (segment.Staged, error) {
   725  	p.log.Info("staging full-segment", logging.String("segment", s.ZipFileName()))
   726  	ss := segment.Staged{
   727  		Full:      s,
   728  		Directory: p.stagingDir,
   729  	}
   730  	if _, err := os.Stat(ss.ZipFilePath()); err != nil {
   731  		_, err = p.FetchHistorySegment(ctx, ss.HistorySegmentID)
   732  		if err != nil {
   733  			return segment.Staged{}, fmt.Errorf("failed to fetch history segment into staging area:%w", err)
   734  		}
   735  	}
   736  	return ss, nil
   737  }
   738  
   739  func (p *Store) StagedContiguousHistory(ctx context.Context, chunk segment.ContiguousHistory[segment.Full]) (segment.ContiguousHistory[segment.Staged], error) {
   740  	staged := segment.ContiguousHistory[segment.Staged]{}
   741  
   742  	for _, s := range chunk.Segments {
   743  		ss, err := p.StagedSegment(ctx, s)
   744  		if err != nil {
   745  			return segment.ContiguousHistory[segment.Staged]{}, err
   746  		}
   747  		if ok := staged.Add(ss); !ok {
   748  			return segment.ContiguousHistory[segment.Staged]{}, fmt.Errorf("failed to build staged chunk; input chunk not contiguous")
   749  		}
   750  	}
   751  
   752  	return staged, nil
   753  }
   754  
   755  func createIpfsNodeConfiguration(log *logging.Logger, identity config.Identity, swarmPort int) (*config.Config, error) {
   756  	cfg, err := config.InitWithIdentity(identity)
   757  
   758  	// Don't try and do local node discovery with mDNS; we're probably on the internet if running
   759  	// for real, and in tests we explicitly want to set up our network by specifying bootstrap peers
   760  	cfg.Discovery.MDNS.Enabled = false
   761  
   762  	if err != nil {
   763  		return nil, fmt.Errorf("failed to initialise ipfs config:%w", err)
   764  	}
   765  
   766  	const ipfsConfigDefaultSwarmPort = "4001"
   767  	updatedSwarmAddrs := make([]string, 0, 10)
   768  	for _, addr := range cfg.Addresses.Swarm {
   769  		// Exclude ip6 addresses cause hang on lookup
   770  		if !strings.Contains(addr, "/ip6/") {
   771  			updatedSwarmAddrs = append(updatedSwarmAddrs, strings.ReplaceAll(addr, ipfsConfigDefaultSwarmPort, strconv.Itoa(swarmPort)))
   772  		}
   773  	}
   774  
   775  	cfg.Addresses.Swarm = updatedSwarmAddrs
   776  	cfg.Bootstrap = []string{} // we'll provide these later, but we empty them here so we don't get the default set
   777  	prettyCfgJSON, _ := json.MarshalIndent(cfg, "", "  ")
   778  	log.Debugf("IPFS Node Config:\n%s", prettyCfgJSON)
   779  
   780  	return cfg, nil
   781  }
   782  
   783  func updateRepoConfig(path string, conf *config.Config) error {
   784  	configFilename, err := config.Filename(path, "")
   785  	if err != nil {
   786  		return fmt.Errorf("failed to get the configuration file path:%w", err)
   787  	}
   788  
   789  	if err = serialize.WriteConfigFile(configFilename, conf); err != nil {
   790  		return fmt.Errorf("failed to write the config file:%w", err)
   791  	}
   792  
   793  	return nil
   794  }
   795  
   796  func loadPlugins(externalPluginsPath string) (*loader.PluginLoader, error) {
   797  	// Load any external plugins if available on externalPluginsPath
   798  	plugins, err := loader.NewPluginLoader(filepath.Join(externalPluginsPath, "plugins"))
   799  	if err != nil {
   800  		return nil, fmt.Errorf("error loading plugins: %s", err)
   801  	}
   802  
   803  	// Load preloaded and external plugins
   804  	if err := plugins.Initialize(); err != nil {
   805  		return nil, fmt.Errorf("error initializing plugins: %s", err)
   806  	}
   807  
   808  	if err := plugins.Inject(); err != nil {
   809  		return nil, fmt.Errorf("error injecting plugins: %s", err)
   810  	}
   811  
   812  	return plugins, nil
   813  }
   814  
   815  func generateSwarmKeyFile(swarmKeySeed string, repoPath string) (string, error) {
   816  	file, err := os.Create(filepath.Join(repoPath, "swarm.key"))
   817  	defer func() { _ = file.Close() }()
   818  	if err != nil {
   819  		return "", fmt.Errorf("failed to create swarm key file:%w", err)
   820  	}
   821  
   822  	key := make([]byte, 32)
   823  
   824  	copy(key, swarmKeySeed)
   825  	hx := hex.EncodeToString(key)
   826  
   827  	swarmKey := fmt.Sprintf("/key/swarm/psk/1.0.0/\n/base16/\n%s", hx)
   828  	_, err = io.WriteString(file, swarmKey)
   829  
   830  	if err != nil {
   831  		return "", fmt.Errorf("failed to write to file:%w", err)
   832  	}
   833  
   834  	return swarmKey, nil
   835  }
   836  
   837  func createNode(ctx context.Context, log *logging.Logger, repoPath string, maxMemoryPercent uint8) (*core.IpfsNode, repo.Repo, error) {
   838  	repo, err := fsrepo.Open(repoPath)
   839  	if err != nil {
   840  		return nil, nil, fmt.Errorf("failed to open ipfs repo:%w", err)
   841  	}
   842  
   843  	// Construct the node
   844  	nodeOptions := &core.BuildCfg{
   845  		Online:    true,
   846  		Permanent: true,
   847  		Routing:   libp2p.DHTOption, // This option sets the node to be a full DHT node (both fetching and storing DHT Records)
   848  		Repo:      repo,
   849  	}
   850  
   851  	err = setLibP2PResourceManagerLimits(repo, maxMemoryPercent)
   852  	if err != nil {
   853  		return nil, nil, fmt.Errorf("failed to set libp2p resource manager limits:%w", err)
   854  	}
   855  
   856  	node, err := core.NewNode(ctx, nodeOptions)
   857  	if err != nil {
   858  		return nil, nil, fmt.Errorf("failed to create new node:%w", err)
   859  	}
   860  
   861  	printSwarmAddrs(node, log)
   862  
   863  	// Attach the Core API to the constructed node
   864  	return node, repo, nil
   865  }
   866  
   867  // The LibP2P Resource manager protects the IPFS node from malicious and non-malicious attacks, the limits used to enforce
   868  // these protections are based on the max memory and max file descriptor limits set in the swarms resource manager config.
   869  // This method overrides the defaults and sets limits that we consider sensible in the context of a data-node.
   870  func setLibP2PResourceManagerLimits(repo repo.Repo, maxMemoryPercent uint8) error {
   871  	cfg, err := repo.Config()
   872  	if err != nil {
   873  		return fmt.Errorf("failed to get repo config:%w", err)
   874  	}
   875  
   876  	// Use max memory percent if set, otherwise use libP2P defaults
   877  	if maxMemoryPercent > 0 {
   878  		totalMem, err := memory.TotalMemory()
   879  		if err != nil {
   880  			return fmt.Errorf("failed to get total memory: %w", err)
   881  		}
   882  
   883  		// Set the maximum to a quarter of the data-nodes max memory
   884  		maxMemoryString := humanize.Bytes(uint64(float64(totalMem) * (float64(maxMemoryPercent) / (4 * 100))))
   885  		cfg.Swarm.ResourceMgr.MaxMemory = config.NewOptionalString(maxMemoryString)
   886  	}
   887  
   888  	// Set the maximum to a quarter of the systems available file descriptors
   889  	maxFileDescriptors := int64(fd.GetNumFDs()) / 4
   890  	fdBytes, err := json.Marshal(&maxFileDescriptors)
   891  	if err != nil {
   892  		return fmt.Errorf("failed to marshal max file descriptors:%w", err)
   893  	}
   894  
   895  	fdOptionalInteger := config.OptionalInteger{}
   896  	err = fdOptionalInteger.UnmarshalJSON(fdBytes)
   897  	if err != nil {
   898  		return fmt.Errorf("failed to unmarshal max file descriptors:%w", err)
   899  	}
   900  
   901  	cfg.Swarm.ResourceMgr.MaxFileDescriptors = &fdOptionalInteger
   902  
   903  	return nil
   904  }
   905  
   906  func printSwarmAddrs(node *core.IpfsNode, log *logging.Logger) {
   907  	if !node.IsOnline {
   908  		log.Debugf("Swarm not listening, running in offline mode.")
   909  		return
   910  	}
   911  
   912  	ifaceAddrs, err := node.PeerHost.Network().InterfaceListenAddresses()
   913  	if err != nil {
   914  		log.Debugf("failed to read listening addresses: %s", err)
   915  	}
   916  	lisAddrs := make([]string, len(ifaceAddrs))
   917  	for i, addr := range ifaceAddrs {
   918  		lisAddrs[i] = addr.String()
   919  	}
   920  	sort.Strings(lisAddrs)
   921  	for _, addr := range lisAddrs {
   922  		log.Debugf("Swarm listening on %s\n", addr)
   923  	}
   924  
   925  	nodePhostAddrs := node.PeerHost.Addrs()
   926  	addrs := make([]string, len(nodePhostAddrs))
   927  	for i, addr := range nodePhostAddrs {
   928  		addrs[i] = addr.String()
   929  	}
   930  	sort.Strings(addrs)
   931  	for _, addr := range addrs {
   932  		log.Debugf("Swarm announcing %s\n", addr)
   933  	}
   934  }
   935  
   936  func createIpfsNode(ctx context.Context, log *logging.Logger, repoPath string,
   937  	cfg *config.Config, swarmKeySeed string, maxMemoryPercent uint8,
   938  ) (*core.IpfsNode, repo.Repo, string, error) {
   939  	// Only inits the repo if it does not already exist
   940  	err := fsrepo.Init(repoPath, cfg)
   941  	if err != nil {
   942  		return nil, nil, "", fmt.Errorf("failed to initialise ipfs configuration:%w", err)
   943  	}
   944  
   945  	// Update to take account of any new bootstrap nodes
   946  	err = updateRepoConfig(repoPath, cfg)
   947  	if err != nil {
   948  		return nil, nil, "", fmt.Errorf("failed to update ipfs configuration:%w", err)
   949  	}
   950  
   951  	swarmKey, err := generateSwarmKeyFile(swarmKeySeed, repoPath)
   952  	if err != nil {
   953  		return nil, nil, "", fmt.Errorf("failed to generate swarm key file:%w", err)
   954  	}
   955  
   956  	node, repo, err := createNode(ctx, log, repoPath, maxMemoryPercent)
   957  	if err != nil {
   958  		return nil, nil, "", fmt.Errorf("failed to create node: %w", err)
   959  	}
   960  
   961  	return node, repo, swarmKey, nil
   962  }
   963  
   964  func (p *Store) addFileToIpfs(ctx context.Context, path string) (cid.Cid, error) {
   965  	file, err := os.Open(path)
   966  	if err != nil {
   967  		return cid.Cid{}, err
   968  	}
   969  	defer func() { _ = file.Close() }()
   970  
   971  	st, err := file.Stat()
   972  	if err != nil {
   973  		return cid.Cid{}, err
   974  	}
   975  
   976  	f, err := files.NewReaderPathFile(path, file, st)
   977  	if err != nil {
   978  		return cid.Cid{}, err
   979  	}
   980  
   981  	fileCid, err := p.ipfsAPI.Unixfs().Add(ctx, f)
   982  	if err != nil {
   983  		return cid.Cid{}, fmt.Errorf("failed to add file: %s", err)
   984  	}
   985  
   986  	err = p.ipfsAPI.Pin().Add(ctx, fileCid)
   987  	if err != nil {
   988  		return cid.Cid{}, fmt.Errorf("failed to pin file: %s", err)
   989  	}
   990  	return fileCid.Cid(), nil
   991  }
   992  
   993  func (p *Store) unpinSegment(ctx context.Context, segment segment.Full) error {
   994  	contentID, err := cid.Decode(segment.HistorySegmentID)
   995  	if err != nil {
   996  		return fmt.Errorf("failed to decode history segment id:%w", err)
   997  	}
   998  
   999  	path := path.IpfsPath(contentID)
  1000  
  1001  	if err = p.ipfsAPI.Pin().Rm(ctx, path); err != nil {
  1002  		return fmt.Errorf("failed to unpin segment:%w", err)
  1003  	}
  1004  
  1005  	return nil
  1006  }