github.com/ipld/go-ipld-prime@v0.21.0/linking/functions.go (about) 1 package linking 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 8 "github.com/ipld/go-ipld-prime/datamodel" 9 ) 10 11 // This file contains all the functions on LinkSystem. 12 // These are the helpful, user-facing functions we expect folks to use "most of the time" when loading and storing data. 13 14 // Variations: 15 // - Load vs Store vs ComputeLink 16 // - Load vs LoadPlusRaw 17 // - With or without LinkContext? 18 // - Brevity would be nice but I can't think of what to name the functions, so: everything takes LinkContext. Zero value is fine though. 19 // - [for load direction only]: Prototype (and return Node|error) or Assembler (and just return error)? 20 // - naming: Load vs Fill. 21 // - 'Must' variants. 22 23 // Can we get as far as a `QuickLoad(lnk Link) (Node, error)` function, which doesn't even ask you for a NodePrototype? 24 // No, not quite. (Alas.) If we tried to do so, and make it use `basicnode.Prototype`, we'd have import cycles; ded. 25 26 // Load looks up some data identified by a Link, and does everything necessary to turn it into usable data. 27 // In detail, that means it: 28 // brings that data into memory, 29 // verifies the hash, 30 // parses it into the Data Model using a codec, 31 // and returns an IPLD Node. 32 // 33 // Where the data will be loaded from is determined by the configuration of the LinkSystem 34 // (namely, the StorageReadOpener callback, which can either be set directly, 35 // or configured via the SetReadStorage function). 36 // 37 // The in-memory form used for the returned Node is determined by the given NodePrototype parameter. 38 // A new builder and a new node will be allocated, via NodePrototype.NewBuilder. 39 // (If you'd like more control over memory allocation, you may wish to see the Fill function instead.) 40 // 41 // A schema may also be used, and apply additional data validation during loading, 42 // by using a schema.TypedNodePrototype as the NodePrototype argument. 43 // 44 // The LinkContext parameter may be used to pass contextual information down to the loading layer. 45 // 46 // Which hashing function is used to validate the loaded data is determined by LinkSystem.HasherChooser. 47 // Which codec is used to parse the loaded data into the Data Model is determined by LinkSystem.DecoderChooser. 48 // 49 // The LinkSystem.NodeReifier callback is also applied before returning the Node, 50 // and so Load may also thereby return an ADL. 51 func (lsys *LinkSystem) Load(lnkCtx LinkContext, lnk datamodel.Link, np datamodel.NodePrototype) (datamodel.Node, error) { 52 nb := np.NewBuilder() 53 if err := lsys.Fill(lnkCtx, lnk, nb); err != nil { 54 return nil, err 55 } 56 nd := nb.Build() 57 if lsys.NodeReifier == nil { 58 return nd, nil 59 } 60 return lsys.NodeReifier(lnkCtx, nd, lsys) 61 } 62 63 // MustLoad is identical to Load, but panics in the case of errors. 64 // 65 // This function is meant for convenience of use in test and demo code, but should otherwise probably be avoided. 66 func (lsys *LinkSystem) MustLoad(lnkCtx LinkContext, lnk datamodel.Link, np datamodel.NodePrototype) datamodel.Node { 67 if n, err := lsys.Load(lnkCtx, lnk, np); err != nil { 68 panic(err) 69 } else { 70 return n 71 } 72 } 73 74 // LoadPlusRaw is similar to Load, but additionally retains and returns the byte slice of the raw data parsed. 75 // 76 // Be wary of using this with large data, since it will hold all data in memory at once. 77 // For more control over streaming, you may want to construct a LinkSystem where you wrap the storage opener callbacks, 78 // and thus can access the streams (and tee them, or whatever you need to do) as they're opened. 79 // This function is meant for convenience when data sizes are small enough that fitting them into memory at once is not a problem. 80 func (lsys *LinkSystem) LoadPlusRaw(lnkCtx LinkContext, lnk datamodel.Link, np datamodel.NodePrototype) (datamodel.Node, []byte, error) { 81 // Choose all the parts. 82 decoder, err := lsys.DecoderChooser(lnk) 83 if err != nil { 84 return nil, nil, ErrLinkingSetup{"could not choose a decoder", err} 85 } 86 // Use LoadRaw to get the data. 87 // If we're going to have everything in memory at once, we might as well do that first, and then give the codec and the hasher the whole thing at once. 88 block, err := lsys.LoadRaw(lnkCtx, lnk) 89 if err != nil { 90 return nil, block, err 91 } 92 // Create a NodeBuilder. 93 // Deploy the codec. 94 // Build the node. 95 nb := np.NewBuilder() 96 if err := decoder(nb, bytes.NewBuffer(block)); err != nil { 97 return nil, block, err 98 } 99 nd := nb.Build() 100 // Consider applying NodeReifier, if applicable. 101 if lsys.NodeReifier == nil { 102 return nd, block, nil 103 } 104 nd, err = lsys.NodeReifier(lnkCtx, nd, lsys) 105 return nd, block, err 106 } 107 108 // LoadRaw looks up some data identified by a Link, brings that data into memory, 109 // verifies the hash, and returns it directly as a byte slice. 110 // 111 // LoadRaw does not return a data model view of the data, 112 // nor does it verify that a codec can parse the data at all! 113 // Use this function at your own risk; it does not provide the same guarantees as the Load or Fill functions do. 114 func (lsys *LinkSystem) LoadRaw(lnkCtx LinkContext, lnk datamodel.Link) ([]byte, error) { 115 if lnkCtx.Ctx == nil { 116 lnkCtx.Ctx = context.Background() 117 } 118 // Choose all the parts. 119 hasher, err := lsys.HasherChooser(lnk.Prototype()) 120 if err != nil { 121 return nil, ErrLinkingSetup{"could not choose a hasher", err} 122 } 123 if lsys.StorageReadOpener == nil { 124 return nil, ErrLinkingSetup{"no storage configured for reading", io.ErrClosedPipe} // REVIEW: better cause? 125 } 126 // Open storage: get the data. 127 // FUTURE: this could probably use storage.ReadableStorage.Get instead of streaming and a buffer, if we refactored LinkSystem to carry that interface through. 128 reader, err := lsys.StorageReadOpener(lnkCtx, lnk) 129 if err != nil { 130 return nil, err 131 } 132 if closer, ok := reader.(io.Closer); ok { 133 defer closer.Close() 134 } 135 var buf bytes.Buffer 136 if _, err := io.Copy(&buf, reader); err != nil { 137 return nil, err 138 } 139 // Compute the hash. 140 // (Then do a bit of a jig to build a link out of it -- because that's what we do the actual hash equality check on.) 141 hasher.Write(buf.Bytes()) 142 hash := hasher.Sum(nil) 143 lnk2 := lnk.Prototype().BuildLink(hash) 144 if lnk2.Binary() != lnk.Binary() { 145 return nil, ErrHashMismatch{Actual: lnk2, Expected: lnk} 146 } 147 // No codec to deploy; this is the raw load function. 148 // So we're done. 149 return buf.Bytes(), nil 150 } 151 152 // Fill is similar to Load, but allows more control over memory allocations. 153 // Instead of taking a NodePrototype parameter, Fill takes a NodeAssembler parameter: 154 // this allows you to use your own NodeBuilder (and reset it, etc, thus controlling allocations), 155 // or, to fill in some part of a larger structure. 156 // 157 // Note that Fill does not regard NodeReifier, even if one has been configured. 158 // (This is in contrast to Load, which does regard a NodeReifier if one is configured, and thus may return an ADL node). 159 func (lsys *LinkSystem) Fill(lnkCtx LinkContext, lnk datamodel.Link, na datamodel.NodeAssembler) error { 160 if lnkCtx.Ctx == nil { 161 lnkCtx.Ctx = context.Background() 162 } 163 // Choose all the parts. 164 decoder, err := lsys.DecoderChooser(lnk) 165 if err != nil { 166 return ErrLinkingSetup{"could not choose a decoder", err} 167 } 168 hasher, err := lsys.HasherChooser(lnk.Prototype()) 169 if err != nil { 170 return ErrLinkingSetup{"could not choose a hasher", err} 171 } 172 if lsys.StorageReadOpener == nil { 173 return ErrLinkingSetup{"no storage configured for reading", io.ErrClosedPipe} // REVIEW: better cause? 174 } 175 // Open storage; get a reader stream. 176 reader, err := lsys.StorageReadOpener(lnkCtx, lnk) 177 if err != nil { 178 return err 179 } 180 if closer, ok := reader.(io.Closer); ok { 181 defer closer.Close() 182 } 183 // TrustedStorage indicates the data coming out of this reader has already been hashed and verified earlier. 184 // As a result, we can skip rehashing it 185 if lsys.TrustedStorage { 186 return decoder(na, reader) 187 } 188 // Tee the stream so that the hasher is fed as the unmarshal progresses through the stream. 189 tee := io.TeeReader(reader, hasher) 190 // The actual read is then dragged forward by the codec. 191 decodeErr := decoder(na, tee) 192 if decodeErr != nil { 193 // It is important to security to check the hash before returning any other observation about the content, 194 // so, if the decode process returns any error, we have several steps to take before potentially returning it. 195 // First, we try to copy any data remaining that wasn't already pulled through the TeeReader by the decoder, 196 // so that the hasher can reach the end of the stream. 197 // If _that_ errors, return the I/O level error. 198 // We hang onto decodeErr for a while: we can't return that until all the way after we check the hash equality. 199 _, err := io.Copy(hasher, reader) 200 if err != nil { 201 return err 202 } 203 } 204 // Compute the hash. 205 // (Then do a bit of a jig to build a link out of it -- because that's what we do the actual hash equality check on.) 206 hash := hasher.Sum(nil) 207 lnk2 := lnk.Prototype().BuildLink(hash) 208 if lnk2.Binary() != lnk.Binary() { 209 return ErrHashMismatch{Actual: lnk2, Expected: lnk} 210 } 211 // If we got all the way through IO and through the hash check: 212 // now, finally, if we did get an error from the codec, we can admit to that. 213 if decodeErr != nil { 214 return decodeErr 215 } 216 return nil 217 } 218 219 // MustFill is identical to Fill, but panics in the case of errors. 220 // 221 // This function is meant for convenience of use in test and demo code, but should otherwise probably be avoided. 222 func (lsys *LinkSystem) MustFill(lnkCtx LinkContext, lnk datamodel.Link, na datamodel.NodeAssembler) { 223 if err := lsys.Fill(lnkCtx, lnk, na); err != nil { 224 panic(err) 225 } 226 } 227 228 func (lsys *LinkSystem) Store(lnkCtx LinkContext, lp datamodel.LinkPrototype, n datamodel.Node) (datamodel.Link, error) { 229 if lnkCtx.Ctx == nil { 230 lnkCtx.Ctx = context.Background() 231 } 232 // Choose all the parts. 233 encoder, err := lsys.EncoderChooser(lp) 234 if err != nil { 235 return nil, ErrLinkingSetup{"could not choose an encoder", err} 236 } 237 hasher, err := lsys.HasherChooser(lp) 238 if err != nil { 239 return nil, ErrLinkingSetup{"could not choose a hasher", err} 240 } 241 if lsys.StorageWriteOpener == nil { 242 return nil, ErrLinkingSetup{"no storage configured for writing", io.ErrClosedPipe} // REVIEW: better cause? 243 } 244 // Open storage write stream, feed serial data to the storage and the hasher, and funnel the codec output into both. 245 writer, commitFn, err := lsys.StorageWriteOpener(lnkCtx) 246 if err != nil { 247 return nil, err 248 } 249 tee := io.MultiWriter(writer, hasher) 250 err = encoder(n, tee) 251 if err != nil { 252 return nil, err 253 } 254 lnk := lp.BuildLink(hasher.Sum(nil)) 255 return lnk, commitFn(lnk) 256 } 257 258 func (lsys *LinkSystem) MustStore(lnkCtx LinkContext, lp datamodel.LinkPrototype, n datamodel.Node) datamodel.Link { 259 if lnk, err := lsys.Store(lnkCtx, lp, n); err != nil { 260 panic(err) 261 } else { 262 return lnk 263 } 264 } 265 266 // ComputeLink returns a Link for the given data, but doesn't do anything else 267 // (e.g. it doesn't try to store any of the serial-form data anywhere else). 268 func (lsys *LinkSystem) ComputeLink(lp datamodel.LinkPrototype, n datamodel.Node) (datamodel.Link, error) { 269 encoder, err := lsys.EncoderChooser(lp) 270 if err != nil { 271 return nil, ErrLinkingSetup{"could not choose an encoder", err} 272 } 273 hasher, err := lsys.HasherChooser(lp) 274 if err != nil { 275 return nil, ErrLinkingSetup{"could not choose a hasher", err} 276 } 277 err = encoder(n, hasher) 278 if err != nil { 279 return nil, err 280 } 281 return lp.BuildLink(hasher.Sum(nil)), nil 282 } 283 284 func (lsys *LinkSystem) MustComputeLink(lp datamodel.LinkPrototype, n datamodel.Node) datamodel.Link { 285 if lnk, err := lsys.ComputeLink(lp, n); err != nil { 286 panic(err) 287 } else { 288 return lnk 289 } 290 }