github.com/ipld/go-ipld-prime@v0.21.0/linking/functions.go (about)

     1  package linking
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  
     8  	"github.com/ipld/go-ipld-prime/datamodel"
     9  )
    10  
    11  // This file contains all the functions on LinkSystem.
    12  // These are the helpful, user-facing functions we expect folks to use "most of the time" when loading and storing data.
    13  
    14  // Variations:
    15  // - Load vs Store vs ComputeLink
    16  // - Load vs LoadPlusRaw
    17  // - With or without LinkContext?
    18  //   - Brevity would be nice but I can't think of what to name the functions, so: everything takes LinkContext.  Zero value is fine though.
    19  // - [for load direction only]: Prototype (and return Node|error) or Assembler (and just return error)?
    20  //   - naming: Load vs Fill.
    21  // - 'Must' variants.
    22  
    23  // Can we get as far as a `QuickLoad(lnk Link) (Node, error)` function, which doesn't even ask you for a NodePrototype?
    24  //  No, not quite.  (Alas.)  If we tried to do so, and make it use `basicnode.Prototype`, we'd have import cycles; ded.
    25  
    26  // Load looks up some data identified by a Link, and does everything necessary to turn it into usable data.
    27  // In detail, that means it:
    28  // brings that data into memory,
    29  // verifies the hash,
    30  // parses it into the Data Model using a codec,
    31  // and returns an IPLD Node.
    32  //
    33  // Where the data will be loaded from is determined by the configuration of the LinkSystem
    34  // (namely, the StorageReadOpener callback, which can either be set directly,
    35  // or configured via the SetReadStorage function).
    36  //
    37  // The in-memory form used for the returned Node is determined by the given NodePrototype parameter.
    38  // A new builder and a new node will be allocated, via NodePrototype.NewBuilder.
    39  // (If you'd like more control over memory allocation, you may wish to see the Fill function instead.)
    40  //
    41  // A schema may also be used, and apply additional data validation during loading,
    42  // by using a schema.TypedNodePrototype as the NodePrototype argument.
    43  //
    44  // The LinkContext parameter may be used to pass contextual information down to the loading layer.
    45  //
    46  // Which hashing function is used to validate the loaded data is determined by LinkSystem.HasherChooser.
    47  // Which codec is used to parse the loaded data into the Data Model is determined by LinkSystem.DecoderChooser.
    48  //
    49  // The LinkSystem.NodeReifier callback is also applied before returning the Node,
    50  // and so Load may also thereby return an ADL.
    51  func (lsys *LinkSystem) Load(lnkCtx LinkContext, lnk datamodel.Link, np datamodel.NodePrototype) (datamodel.Node, error) {
    52  	nb := np.NewBuilder()
    53  	if err := lsys.Fill(lnkCtx, lnk, nb); err != nil {
    54  		return nil, err
    55  	}
    56  	nd := nb.Build()
    57  	if lsys.NodeReifier == nil {
    58  		return nd, nil
    59  	}
    60  	return lsys.NodeReifier(lnkCtx, nd, lsys)
    61  }
    62  
    63  // MustLoad is identical to Load, but panics in the case of errors.
    64  //
    65  // This function is meant for convenience of use in test and demo code, but should otherwise probably be avoided.
    66  func (lsys *LinkSystem) MustLoad(lnkCtx LinkContext, lnk datamodel.Link, np datamodel.NodePrototype) datamodel.Node {
    67  	if n, err := lsys.Load(lnkCtx, lnk, np); err != nil {
    68  		panic(err)
    69  	} else {
    70  		return n
    71  	}
    72  }
    73  
    74  // LoadPlusRaw is similar to Load, but additionally retains and returns the byte slice of the raw data parsed.
    75  //
    76  // Be wary of using this with large data, since it will hold all data in memory at once.
    77  // For more control over streaming, you may want to construct a LinkSystem where you wrap the storage opener callbacks,
    78  // and thus can access the streams (and tee them, or whatever you need to do) as they're opened.
    79  // This function is meant for convenience when data sizes are small enough that fitting them into memory at once is not a problem.
    80  func (lsys *LinkSystem) LoadPlusRaw(lnkCtx LinkContext, lnk datamodel.Link, np datamodel.NodePrototype) (datamodel.Node, []byte, error) {
    81  	// Choose all the parts.
    82  	decoder, err := lsys.DecoderChooser(lnk)
    83  	if err != nil {
    84  		return nil, nil, ErrLinkingSetup{"could not choose a decoder", err}
    85  	}
    86  	// Use LoadRaw to get the data.
    87  	//  If we're going to have everything in memory at once, we might as well do that first, and then give the codec and the hasher the whole thing at once.
    88  	block, err := lsys.LoadRaw(lnkCtx, lnk)
    89  	if err != nil {
    90  		return nil, block, err
    91  	}
    92  	// Create a NodeBuilder.
    93  	// Deploy the codec.
    94  	// Build the node.
    95  	nb := np.NewBuilder()
    96  	if err := decoder(nb, bytes.NewBuffer(block)); err != nil {
    97  		return nil, block, err
    98  	}
    99  	nd := nb.Build()
   100  	// Consider applying NodeReifier, if applicable.
   101  	if lsys.NodeReifier == nil {
   102  		return nd, block, nil
   103  	}
   104  	nd, err = lsys.NodeReifier(lnkCtx, nd, lsys)
   105  	return nd, block, err
   106  }
   107  
   108  // LoadRaw looks up some data identified by a Link, brings that data into memory,
   109  // verifies the hash, and returns it directly as a byte slice.
   110  //
   111  // LoadRaw does not return a data model view of the data,
   112  // nor does it verify that a codec can parse the data at all!
   113  // Use this function at your own risk; it does not provide the same guarantees as the Load or Fill functions do.
   114  func (lsys *LinkSystem) LoadRaw(lnkCtx LinkContext, lnk datamodel.Link) ([]byte, error) {
   115  	if lnkCtx.Ctx == nil {
   116  		lnkCtx.Ctx = context.Background()
   117  	}
   118  	// Choose all the parts.
   119  	hasher, err := lsys.HasherChooser(lnk.Prototype())
   120  	if err != nil {
   121  		return nil, ErrLinkingSetup{"could not choose a hasher", err}
   122  	}
   123  	if lsys.StorageReadOpener == nil {
   124  		return nil, ErrLinkingSetup{"no storage configured for reading", io.ErrClosedPipe} // REVIEW: better cause?
   125  	}
   126  	// Open storage: get the data.
   127  	// FUTURE: this could probably use storage.ReadableStorage.Get instead of streaming and a buffer, if we refactored LinkSystem to carry that interface through.
   128  	reader, err := lsys.StorageReadOpener(lnkCtx, lnk)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	if closer, ok := reader.(io.Closer); ok {
   133  		defer closer.Close()
   134  	}
   135  	var buf bytes.Buffer
   136  	if _, err := io.Copy(&buf, reader); err != nil {
   137  		return nil, err
   138  	}
   139  	// Compute the hash.
   140  	// (Then do a bit of a jig to build a link out of it -- because that's what we do the actual hash equality check on.)
   141  	hasher.Write(buf.Bytes())
   142  	hash := hasher.Sum(nil)
   143  	lnk2 := lnk.Prototype().BuildLink(hash)
   144  	if lnk2.Binary() != lnk.Binary() {
   145  		return nil, ErrHashMismatch{Actual: lnk2, Expected: lnk}
   146  	}
   147  	// No codec to deploy; this is the raw load function.
   148  	// So we're done.
   149  	return buf.Bytes(), nil
   150  }
   151  
   152  // Fill is similar to Load, but allows more control over memory allocations.
   153  // Instead of taking a NodePrototype parameter, Fill takes a NodeAssembler parameter:
   154  // this allows you to use your own NodeBuilder (and reset it, etc, thus controlling allocations),
   155  // or, to fill in some part of a larger structure.
   156  //
   157  // Note that Fill does not regard NodeReifier, even if one has been configured.
   158  // (This is in contrast to Load, which does regard a NodeReifier if one is configured, and thus may return an ADL node).
   159  func (lsys *LinkSystem) Fill(lnkCtx LinkContext, lnk datamodel.Link, na datamodel.NodeAssembler) error {
   160  	if lnkCtx.Ctx == nil {
   161  		lnkCtx.Ctx = context.Background()
   162  	}
   163  	// Choose all the parts.
   164  	decoder, err := lsys.DecoderChooser(lnk)
   165  	if err != nil {
   166  		return ErrLinkingSetup{"could not choose a decoder", err}
   167  	}
   168  	hasher, err := lsys.HasherChooser(lnk.Prototype())
   169  	if err != nil {
   170  		return ErrLinkingSetup{"could not choose a hasher", err}
   171  	}
   172  	if lsys.StorageReadOpener == nil {
   173  		return ErrLinkingSetup{"no storage configured for reading", io.ErrClosedPipe} // REVIEW: better cause?
   174  	}
   175  	// Open storage; get a reader stream.
   176  	reader, err := lsys.StorageReadOpener(lnkCtx, lnk)
   177  	if err != nil {
   178  		return err
   179  	}
   180  	if closer, ok := reader.(io.Closer); ok {
   181  		defer closer.Close()
   182  	}
   183  	// TrustedStorage indicates the data coming out of this reader has already been hashed and verified earlier.
   184  	// As a result, we can skip rehashing it
   185  	if lsys.TrustedStorage {
   186  		return decoder(na, reader)
   187  	}
   188  	// Tee the stream so that the hasher is fed as the unmarshal progresses through the stream.
   189  	tee := io.TeeReader(reader, hasher)
   190  	// The actual read is then dragged forward by the codec.
   191  	decodeErr := decoder(na, tee)
   192  	if decodeErr != nil {
   193  		// It is important to security to check the hash before returning any other observation about the content,
   194  		// so, if the decode process returns any error, we have several steps to take before potentially returning it.
   195  		// First, we try to copy any data remaining that wasn't already pulled through the TeeReader by the decoder,
   196  		// so that the hasher can reach the end of the stream.
   197  		// If _that_ errors, return the I/O level error.
   198  		// We hang onto decodeErr for a while: we can't return that until all the way after we check the hash equality.
   199  		_, err := io.Copy(hasher, reader)
   200  		if err != nil {
   201  			return err
   202  		}
   203  	}
   204  	// Compute the hash.
   205  	// (Then do a bit of a jig to build a link out of it -- because that's what we do the actual hash equality check on.)
   206  	hash := hasher.Sum(nil)
   207  	lnk2 := lnk.Prototype().BuildLink(hash)
   208  	if lnk2.Binary() != lnk.Binary() {
   209  		return ErrHashMismatch{Actual: lnk2, Expected: lnk}
   210  	}
   211  	// If we got all the way through IO and through the hash check:
   212  	// now, finally, if we did get an error from the codec, we can admit to that.
   213  	if decodeErr != nil {
   214  		return decodeErr
   215  	}
   216  	return nil
   217  }
   218  
   219  // MustFill is identical to Fill, but panics in the case of errors.
   220  //
   221  // This function is meant for convenience of use in test and demo code, but should otherwise probably be avoided.
   222  func (lsys *LinkSystem) MustFill(lnkCtx LinkContext, lnk datamodel.Link, na datamodel.NodeAssembler) {
   223  	if err := lsys.Fill(lnkCtx, lnk, na); err != nil {
   224  		panic(err)
   225  	}
   226  }
   227  
   228  func (lsys *LinkSystem) Store(lnkCtx LinkContext, lp datamodel.LinkPrototype, n datamodel.Node) (datamodel.Link, error) {
   229  	if lnkCtx.Ctx == nil {
   230  		lnkCtx.Ctx = context.Background()
   231  	}
   232  	// Choose all the parts.
   233  	encoder, err := lsys.EncoderChooser(lp)
   234  	if err != nil {
   235  		return nil, ErrLinkingSetup{"could not choose an encoder", err}
   236  	}
   237  	hasher, err := lsys.HasherChooser(lp)
   238  	if err != nil {
   239  		return nil, ErrLinkingSetup{"could not choose a hasher", err}
   240  	}
   241  	if lsys.StorageWriteOpener == nil {
   242  		return nil, ErrLinkingSetup{"no storage configured for writing", io.ErrClosedPipe} // REVIEW: better cause?
   243  	}
   244  	// Open storage write stream, feed serial data to the storage and the hasher, and funnel the codec output into both.
   245  	writer, commitFn, err := lsys.StorageWriteOpener(lnkCtx)
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  	tee := io.MultiWriter(writer, hasher)
   250  	err = encoder(n, tee)
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  	lnk := lp.BuildLink(hasher.Sum(nil))
   255  	return lnk, commitFn(lnk)
   256  }
   257  
   258  func (lsys *LinkSystem) MustStore(lnkCtx LinkContext, lp datamodel.LinkPrototype, n datamodel.Node) datamodel.Link {
   259  	if lnk, err := lsys.Store(lnkCtx, lp, n); err != nil {
   260  		panic(err)
   261  	} else {
   262  		return lnk
   263  	}
   264  }
   265  
   266  // ComputeLink returns a Link for the given data, but doesn't do anything else
   267  // (e.g. it doesn't try to store any of the serial-form data anywhere else).
   268  func (lsys *LinkSystem) ComputeLink(lp datamodel.LinkPrototype, n datamodel.Node) (datamodel.Link, error) {
   269  	encoder, err := lsys.EncoderChooser(lp)
   270  	if err != nil {
   271  		return nil, ErrLinkingSetup{"could not choose an encoder", err}
   272  	}
   273  	hasher, err := lsys.HasherChooser(lp)
   274  	if err != nil {
   275  		return nil, ErrLinkingSetup{"could not choose a hasher", err}
   276  	}
   277  	err = encoder(n, hasher)
   278  	if err != nil {
   279  		return nil, err
   280  	}
   281  	return lp.BuildLink(hasher.Sum(nil)), nil
   282  }
   283  
   284  func (lsys *LinkSystem) MustComputeLink(lp datamodel.LinkPrototype, n datamodel.Node) datamodel.Link {
   285  	if lnk, err := lsys.ComputeLink(lp, n); err != nil {
   286  		panic(err)
   287  	} else {
   288  		return lnk
   289  	}
   290  }