github.com/ipld/go-ipld-prime@v0.21.0/traversal/fns.go (about)

     1  package traversal
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  
     7  	"github.com/ipld/go-ipld-prime/datamodel"
     8  	"github.com/ipld/go-ipld-prime/linking"
     9  	"github.com/ipld/go-ipld-prime/linking/preload"
    10  )
    11  
    12  // This file defines interfaces for things users provide,
    13  //  plus a few of the parameters they'll need to receieve.
    14  //--------------------------------------------------------
    15  
    16  // VisitFn is a read-only visitor.
    17  type VisitFn func(Progress, datamodel.Node) error
    18  
    19  // TransformFn is like a visitor that can also return a new Node to replace the visited one.
    20  type TransformFn func(Progress, datamodel.Node) (datamodel.Node, error)
    21  
    22  // AdvVisitFn is like VisitFn, but for use with AdvTraversal: it gets additional arguments describing *why* this node is visited.
    23  type AdvVisitFn func(Progress, datamodel.Node, VisitReason) error
    24  
    25  // VisitReason provides additional information to traversals using AdvVisitFn.
    26  type VisitReason byte
    27  
    28  const (
    29  	// VisitReason_SelectionMatch tells AdvVisitFn that this node was explicitly selected.  (This is the set of nodes that VisitFn is called for.)
    30  	VisitReason_SelectionMatch VisitReason = 'm'
    31  	// VisitReason_SelectionParent tells AdvVisitFn that this node is a parent of one that will be explicitly selected.  (These calls only happen if the feature is enabled -- enabling parent detection requires a different algorithm and adds some overhead.)
    32  	VisitReason_SelectionParent VisitReason = 'p'
    33  	// VisitReason_SelectionCandidate tells AdvVisitFn that this node was visited while searching for selection matches.  It is not necessarily implied that any explicit match will be a child of this node; only that we had to consider it.  (Merkle-proofs generally need to include any node in this group.)
    34  	VisitReason_SelectionCandidate VisitReason = 'x'
    35  )
    36  
    37  // Progress tracks a traversal as it proceeds. It is used initially to begin a traversal, and it is then passed to the visit function as the traversal proceeds.
    38  //
    39  // As the traversal descends into the graph, new Progress values are created and passed to the visit function with updated properties representing the current state of the traversal.
    40  //
    41  // Most customization of a traversal is done by setting a Cfg property on a Progress before beginning the traversal.
    42  // Typical customization involves setting a LinkSystem for link loading and/or tracking.
    43  //
    44  // Advanced traversal control options, such as LinkVisitOnlyOnce and StartAtPath, are also available in the Cfg but may have surprising effects on traversal behavior; be careful when using them.
    45  //
    46  // Budgets are set on the Progress option because a Budget, while set at the beginning of a traversal, is also updated as the traversal proceeds, with its fields being monotonically decremented.
    47  // Beware of using Budgets in tandem with a Preloader! The preloader discovers links in a lateral scan of a whole block, before rewinding for a depth-first walk for traversal-proper.
    48  // Budgets are intended to be used for the depth-first walk, and there is no way to know ahead of time how the budget may impact the lateral parts of the graph that the preloader encounters.
    49  // Currently a best-guess approach is used to try and have the preloader adhere to the budget, but with typical real-world graphs, this is likely to be inaccurate.
    50  // In the case of inaccuracies, the budget will be properly applied to the traversal-proper, but the preloader may receive a different set of links than the traversal-proper will.
    51  type Progress struct {
    52  	// Cfg is the configuration for the traversal, set by user.
    53  	Cfg *Config
    54  
    55  	// Budget, if present, tracks "budgets" for how many more steps we're willing to take before we should halt.
    56  	// Budget is initially set by user, but is then updated as the traversal proceeds.
    57  	Budget *Budget
    58  
    59  	// Path is how we reached the current point in the traversal.
    60  	Path datamodel.Path
    61  
    62  	// LastBlock stores the Path and Link of the last block edge we had to load.  (It will always be zero in traversals with no linkloader.)
    63  	LastBlock struct {
    64  		Path datamodel.Path
    65  		Link datamodel.Link
    66  	}
    67  
    68  	// PastStartAtPath indicates whether the traversal has progressed passed the StartAtPath in the config -- use to avoid path checks when inside a sub portion of a DAG that is entirely inside the "not-skipped" portion of a traversal
    69  	PastStartAtPath bool
    70  
    71  	// SeenLinks is a set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true.
    72  	SeenLinks map[datamodel.Link]struct{}
    73  }
    74  
    75  // Config is a set of options for a traversal. Set a Config on a Progress to customize the traversal.
    76  type Config struct {
    77  	// Ctx is the context carried through a traversal.
    78  	// Optional; use it if you need cancellation.
    79  	Ctx context.Context
    80  
    81  	// LinkSystem is used for automatic link loading, and also any storing if mutation features (e.g. traversal.Transform) are used.
    82  	LinkSystem linking.LinkSystem
    83  
    84  	// LinkTargetNodePrototypeChooser is a chooser for Node implementations to produce during automatic link traversal.
    85  	LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser
    86  
    87  	// LinkVisitOnlyOnce controls repeat-link visitation.
    88  	// By default, we visit across links wherever we see them again, even if we've visited them before, because the reason for visiting might be different than it was before since we got to it via a different path.
    89  	// If set to true, track links we've seen before in Progress.SeenLinks and do not visit them again.
    90  	// Note that sufficiently complex selectors may require valid revisiting of some links, so setting this to true can change behavior noticably and should be done with care.
    91  	LinkVisitOnlyOnce bool
    92  
    93  	// StartAtPath, if set, causes a traversal to skip forward until passing this path, and only then begins calling visit functions.
    94  	// Block loads will also be skipped wherever possible.
    95  	StartAtPath datamodel.Path
    96  
    97  	// Preloader receives links within each block prior to traversal-proper by performing a lateral scan of a block without descending into links themselves before backing up and doing a traversal-proper.
    98  	// This can be used to asynchronously load blocks that will be required at a later phase of the retrieval, or even to load blocks in a different order than the traversal would otherwise do.
    99  	// Preload calls are not de-duplicated, it is up to the receiver to do so if desired.
   100  	// Beware of using both Budget and Preloader!  See the documentation on Progress for more information on this usage and the likely surprising effects.
   101  	Preloader preload.Loader
   102  }
   103  
   104  // Budget is a set of monotonically-decrementing "budgets" for how many more steps we're willing to take before we should halt.
   105  //
   106  // The fields of Budget are described as "monotonically-decrementing", because that's what the traversal library will do with them,
   107  // but they are user-accessable and can be reset to higher numbers again by code in the visitor callbacks.  This is not recommended (why?), but possible.
   108  
   109  // If you set any budgets (by having a non-nil Progress.Budget field), you must set some value for all of them.
   110  // Traversal halts when _any_ of the budgets reaches zero.
   111  // The max value of an int (math.MaxInt64) is acceptable for any budget you don't care about.
   112  //
   113  // Beware of using both Budget and Preloader!  See the documentation on Progress for more information on this usage and the likely surprising effects.
   114  type Budget struct {
   115  	// NodeBudget is a monotonically-decrementing "budget" for how many more nodes we're willing to visit before halting.
   116  	NodeBudget int64
   117  	// LinkBudget is a monotonically-decrementing "budget" for how many more links we're willing to load before halting.
   118  	// (This is not aware of any caching; it's purely in terms of links encountered and traversed.)
   119  	LinkBudget int64
   120  }
   121  
   122  // Clone returns a copy of the budget.
   123  func (b *Budget) Clone() *Budget {
   124  	if b == nil {
   125  		return nil
   126  	}
   127  	return &Budget{
   128  		NodeBudget: b.NodeBudget,
   129  		LinkBudget: b.LinkBudget,
   130  	}
   131  }
   132  
   133  // LinkTargetNodePrototypeChooser is a function that returns a NodePrototype based on
   134  // the information in a Link and/or its LinkContext.
   135  //
   136  // A LinkTargetNodePrototypeChooser can be used in a traversal.Config to be clear about
   137  // what kind of Node implementation to use when loading a Link.
   138  // In a simple example, it could constantly return a `basicnode.Prototype.Any`.
   139  // In a more complex example, a program using `bind` over native Go types
   140  // could decide what kind of native type is expected, and return a
   141  // `bind.NodeBuilder` for that specific concrete native type.
   142  type LinkTargetNodePrototypeChooser func(datamodel.Link, linking.LinkContext) (datamodel.NodePrototype, error)
   143  
   144  // SkipMe is a signalling "error" which can be used to tell traverse to skip some data.
   145  //
   146  // SkipMe can be returned by the Config.LinkLoader to skip entire blocks without aborting the walk.
   147  // (This can be useful if you know you don't have data on hand,
   148  // but want to continue the walk in other areas anyway;
   149  // or, if you're doing a way where you know that it's valid to memoize seen
   150  // areas based on Link alone.)
   151  type SkipMe struct{}
   152  
   153  func (SkipMe) Error() string {
   154  	return "skip"
   155  }
   156  
   157  type ErrBudgetExceeded struct {
   158  	BudgetKind string // "node"|"link"
   159  	Path       datamodel.Path
   160  	Link       datamodel.Link // only present if BudgetKind=="link"
   161  }
   162  
   163  func (e *ErrBudgetExceeded) Error() string {
   164  	msg := fmt.Sprintf("traversal budget exceeded: budget for %ss reached zero while on path %q", e.BudgetKind, e.Path)
   165  	if e.Link != nil {
   166  		msg += fmt.Sprintf(" (link: %q)", e.Link)
   167  	}
   168  	return msg
   169  }
   170  
   171  func (e *ErrBudgetExceeded) Is(target error) bool {
   172  	_, ok := target.(*ErrBudgetExceeded)
   173  	return ok
   174  }