github.com/ipld/go-ipld-prime@v0.21.0/traversal/fns.go (about) 1 package traversal 2 3 import ( 4 "context" 5 "fmt" 6 7 "github.com/ipld/go-ipld-prime/datamodel" 8 "github.com/ipld/go-ipld-prime/linking" 9 "github.com/ipld/go-ipld-prime/linking/preload" 10 ) 11 12 // This file defines interfaces for things users provide, 13 // plus a few of the parameters they'll need to receieve. 14 //-------------------------------------------------------- 15 16 // VisitFn is a read-only visitor. 17 type VisitFn func(Progress, datamodel.Node) error 18 19 // TransformFn is like a visitor that can also return a new Node to replace the visited one. 20 type TransformFn func(Progress, datamodel.Node) (datamodel.Node, error) 21 22 // AdvVisitFn is like VisitFn, but for use with AdvTraversal: it gets additional arguments describing *why* this node is visited. 23 type AdvVisitFn func(Progress, datamodel.Node, VisitReason) error 24 25 // VisitReason provides additional information to traversals using AdvVisitFn. 26 type VisitReason byte 27 28 const ( 29 // VisitReason_SelectionMatch tells AdvVisitFn that this node was explicitly selected. (This is the set of nodes that VisitFn is called for.) 30 VisitReason_SelectionMatch VisitReason = 'm' 31 // VisitReason_SelectionParent tells AdvVisitFn that this node is a parent of one that will be explicitly selected. (These calls only happen if the feature is enabled -- enabling parent detection requires a different algorithm and adds some overhead.) 32 VisitReason_SelectionParent VisitReason = 'p' 33 // VisitReason_SelectionCandidate tells AdvVisitFn that this node was visited while searching for selection matches. It is not necessarily implied that any explicit match will be a child of this node; only that we had to consider it. (Merkle-proofs generally need to include any node in this group.) 34 VisitReason_SelectionCandidate VisitReason = 'x' 35 ) 36 37 // Progress tracks a traversal as it proceeds. It is used initially to begin a traversal, and it is then passed to the visit function as the traversal proceeds. 38 // 39 // As the traversal descends into the graph, new Progress values are created and passed to the visit function with updated properties representing the current state of the traversal. 40 // 41 // Most customization of a traversal is done by setting a Cfg property on a Progress before beginning the traversal. 42 // Typical customization involves setting a LinkSystem for link loading and/or tracking. 43 // 44 // Advanced traversal control options, such as LinkVisitOnlyOnce and StartAtPath, are also available in the Cfg but may have surprising effects on traversal behavior; be careful when using them. 45 // 46 // Budgets are set on the Progress option because a Budget, while set at the beginning of a traversal, is also updated as the traversal proceeds, with its fields being monotonically decremented. 47 // Beware of using Budgets in tandem with a Preloader! The preloader discovers links in a lateral scan of a whole block, before rewinding for a depth-first walk for traversal-proper. 48 // Budgets are intended to be used for the depth-first walk, and there is no way to know ahead of time how the budget may impact the lateral parts of the graph that the preloader encounters. 49 // Currently a best-guess approach is used to try and have the preloader adhere to the budget, but with typical real-world graphs, this is likely to be inaccurate. 50 // In the case of inaccuracies, the budget will be properly applied to the traversal-proper, but the preloader may receive a different set of links than the traversal-proper will. 51 type Progress struct { 52 // Cfg is the configuration for the traversal, set by user. 53 Cfg *Config 54 55 // Budget, if present, tracks "budgets" for how many more steps we're willing to take before we should halt. 56 // Budget is initially set by user, but is then updated as the traversal proceeds. 57 Budget *Budget 58 59 // Path is how we reached the current point in the traversal. 60 Path datamodel.Path 61 62 // LastBlock stores the Path and Link of the last block edge we had to load. (It will always be zero in traversals with no linkloader.) 63 LastBlock struct { 64 Path datamodel.Path 65 Link datamodel.Link 66 } 67 68 // PastStartAtPath indicates whether the traversal has progressed passed the StartAtPath in the config -- use to avoid path checks when inside a sub portion of a DAG that is entirely inside the "not-skipped" portion of a traversal 69 PastStartAtPath bool 70 71 // SeenLinks is a set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true. 72 SeenLinks map[datamodel.Link]struct{} 73 } 74 75 // Config is a set of options for a traversal. Set a Config on a Progress to customize the traversal. 76 type Config struct { 77 // Ctx is the context carried through a traversal. 78 // Optional; use it if you need cancellation. 79 Ctx context.Context 80 81 // LinkSystem is used for automatic link loading, and also any storing if mutation features (e.g. traversal.Transform) are used. 82 LinkSystem linking.LinkSystem 83 84 // LinkTargetNodePrototypeChooser is a chooser for Node implementations to produce during automatic link traversal. 85 LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser 86 87 // LinkVisitOnlyOnce controls repeat-link visitation. 88 // By default, we visit across links wherever we see them again, even if we've visited them before, because the reason for visiting might be different than it was before since we got to it via a different path. 89 // If set to true, track links we've seen before in Progress.SeenLinks and do not visit them again. 90 // Note that sufficiently complex selectors may require valid revisiting of some links, so setting this to true can change behavior noticably and should be done with care. 91 LinkVisitOnlyOnce bool 92 93 // StartAtPath, if set, causes a traversal to skip forward until passing this path, and only then begins calling visit functions. 94 // Block loads will also be skipped wherever possible. 95 StartAtPath datamodel.Path 96 97 // Preloader receives links within each block prior to traversal-proper by performing a lateral scan of a block without descending into links themselves before backing up and doing a traversal-proper. 98 // This can be used to asynchronously load blocks that will be required at a later phase of the retrieval, or even to load blocks in a different order than the traversal would otherwise do. 99 // Preload calls are not de-duplicated, it is up to the receiver to do so if desired. 100 // Beware of using both Budget and Preloader! See the documentation on Progress for more information on this usage and the likely surprising effects. 101 Preloader preload.Loader 102 } 103 104 // Budget is a set of monotonically-decrementing "budgets" for how many more steps we're willing to take before we should halt. 105 // 106 // The fields of Budget are described as "monotonically-decrementing", because that's what the traversal library will do with them, 107 // but they are user-accessable and can be reset to higher numbers again by code in the visitor callbacks. This is not recommended (why?), but possible. 108 109 // If you set any budgets (by having a non-nil Progress.Budget field), you must set some value for all of them. 110 // Traversal halts when _any_ of the budgets reaches zero. 111 // The max value of an int (math.MaxInt64) is acceptable for any budget you don't care about. 112 // 113 // Beware of using both Budget and Preloader! See the documentation on Progress for more information on this usage and the likely surprising effects. 114 type Budget struct { 115 // NodeBudget is a monotonically-decrementing "budget" for how many more nodes we're willing to visit before halting. 116 NodeBudget int64 117 // LinkBudget is a monotonically-decrementing "budget" for how many more links we're willing to load before halting. 118 // (This is not aware of any caching; it's purely in terms of links encountered and traversed.) 119 LinkBudget int64 120 } 121 122 // Clone returns a copy of the budget. 123 func (b *Budget) Clone() *Budget { 124 if b == nil { 125 return nil 126 } 127 return &Budget{ 128 NodeBudget: b.NodeBudget, 129 LinkBudget: b.LinkBudget, 130 } 131 } 132 133 // LinkTargetNodePrototypeChooser is a function that returns a NodePrototype based on 134 // the information in a Link and/or its LinkContext. 135 // 136 // A LinkTargetNodePrototypeChooser can be used in a traversal.Config to be clear about 137 // what kind of Node implementation to use when loading a Link. 138 // In a simple example, it could constantly return a `basicnode.Prototype.Any`. 139 // In a more complex example, a program using `bind` over native Go types 140 // could decide what kind of native type is expected, and return a 141 // `bind.NodeBuilder` for that specific concrete native type. 142 type LinkTargetNodePrototypeChooser func(datamodel.Link, linking.LinkContext) (datamodel.NodePrototype, error) 143 144 // SkipMe is a signalling "error" which can be used to tell traverse to skip some data. 145 // 146 // SkipMe can be returned by the Config.LinkLoader to skip entire blocks without aborting the walk. 147 // (This can be useful if you know you don't have data on hand, 148 // but want to continue the walk in other areas anyway; 149 // or, if you're doing a way where you know that it's valid to memoize seen 150 // areas based on Link alone.) 151 type SkipMe struct{} 152 153 func (SkipMe) Error() string { 154 return "skip" 155 } 156 157 type ErrBudgetExceeded struct { 158 BudgetKind string // "node"|"link" 159 Path datamodel.Path 160 Link datamodel.Link // only present if BudgetKind=="link" 161 } 162 163 func (e *ErrBudgetExceeded) Error() string { 164 msg := fmt.Sprintf("traversal budget exceeded: budget for %ss reached zero while on path %q", e.BudgetKind, e.Path) 165 if e.Link != nil { 166 msg += fmt.Sprintf(" (link: %q)", e.Link) 167 } 168 return msg 169 } 170 171 func (e *ErrBudgetExceeded) Is(target error) bool { 172 _, ok := target.(*ErrBudgetExceeded) 173 return ok 174 }