github.com/ipld/go-ipld-prime@v0.21.0/codec/dagcbor/unmarshal.go (about)

     1  package dagcbor
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"math"
     8  
     9  	cid "github.com/ipfs/go-cid"
    10  	"github.com/polydawn/refmt/cbor"
    11  	"github.com/polydawn/refmt/shared"
    12  	"github.com/polydawn/refmt/tok"
    13  
    14  	"github.com/ipld/go-ipld-prime/datamodel"
    15  	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
    16  	"github.com/ipld/go-ipld-prime/node/basicnode"
    17  )
    18  
    19  var (
    20  	ErrInvalidMultibase         = errors.New("invalid multibase on IPLD link")
    21  	ErrAllocationBudgetExceeded = errors.New("message structure demanded too many resources to process")
    22  	ErrTrailingBytes            = errors.New("unexpected content after end of cbor object")
    23  )
    24  
    25  const (
    26  	mapEntryGasScore  = 8
    27  	listEntryGasScore = 4
    28  )
    29  
    30  // This file should be identical to the general feature in the parent package,
    31  // except for the `case tok.TBytes` block,
    32  // which has dag-cbor's special sauce for detecting schemafree links.
    33  
    34  // DecodeOptions can be used to customize the behavior of a decoding function.
    35  // The Decode method on this struct fits the codec.Decoder function interface.
    36  type DecodeOptions struct {
    37  	// If true, parse DAG-CBOR tag(42) as Link nodes, otherwise reject them
    38  	AllowLinks bool
    39  
    40  	// TODO: ExperimentalDeterminism enforces map key order, but not the other parts
    41  	// of the spec such as integers or floats. See the fuzz failures spotted in
    42  	// https://github.com/ipld/go-ipld-prime/pull/389.
    43  	// When we're done implementing strictness, deprecate the option in favor of
    44  	// StrictDeterminism, but keep accepting both for backwards compatibility.
    45  
    46  	// ExperimentalDeterminism requires decoded DAG-CBOR bytes to be canonical as per
    47  	// the spec. For example, this means that integers and floats be encoded in
    48  	// a particular way, and map keys be sorted.
    49  	//
    50  	// The decoder does not enforce this requirement by default, as the codec
    51  	// was originally implemented without these rules. Because of that, there's
    52  	// a significant amount of published data that isn't canonical but should
    53  	// still decode with the default settings for backwards compatibility.
    54  	//
    55  	// Note that this option is experimental as it only implements partial strictness.
    56  	ExperimentalDeterminism bool
    57  
    58  	// If true, the decoder stops reading from the stream at the end of a full,
    59  	// valid CBOR object. This may be useful for parsing a stream of undelimited
    60  	// CBOR objects.
    61  	// As per standard IPLD behavior, in the default mode the parser considers the
    62  	// entire block to be part of the CBOR object and will error if there is
    63  	// extraneous data after the end of the object.
    64  	DontParseBeyondEnd bool
    65  }
    66  
    67  // Decode deserializes data from the given io.Reader and feeds it into the given datamodel.NodeAssembler.
    68  // Decode fits the codec.Decoder function interface.
    69  //
    70  // The behavior of the decoder can be customized by setting fields in the DecodeOptions struct before calling this method.
    71  func (cfg DecodeOptions) Decode(na datamodel.NodeAssembler, r io.Reader) error {
    72  	// Probe for a builtin fast path.  Shortcut to that if possible.
    73  	type detectFastPath interface {
    74  		DecodeDagCbor(io.Reader) error
    75  	}
    76  	if na2, ok := na.(detectFastPath); ok {
    77  		return na2.DecodeDagCbor(r)
    78  	}
    79  	// Okay, generic builder path.
    80  	err := Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{
    81  		CoerceUndefToNull: true,
    82  	}, r), cfg)
    83  
    84  	if err != nil {
    85  		return err
    86  	}
    87  
    88  	if cfg.DontParseBeyondEnd {
    89  		return nil
    90  	}
    91  
    92  	var buf [1]byte
    93  	_, err = io.ReadFull(r, buf[:])
    94  	switch err {
    95  	case io.EOF:
    96  		return nil
    97  	case nil:
    98  		return ErrTrailingBytes
    99  	default:
   100  		return err
   101  	}
   102  }
   103  
   104  // Future work: we would like to remove the Unmarshal function,
   105  // and in particular, stop seeing types from refmt (like shared.TokenSource) be visible.
   106  // Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types;
   107  // we should improve our API so that this can be done with only our own types in this package.
   108  
   109  // Unmarshal is a deprecated function.
   110  // Please consider switching to DecodeOptions.Decode instead.
   111  func Unmarshal(na datamodel.NodeAssembler, tokSrc shared.TokenSource, options DecodeOptions) error {
   112  	// Have a gas budget, which will be decremented as we allocate memory, and an error returned when execeeded (or about to be exceeded).
   113  	//  This is a DoS defense mechanism.
   114  	//  It's *roughly* in units of bytes (but only very, VERY roughly) -- it also treats words as 1 in many cases.
   115  	// FUTURE: this ought be configurable somehow.  (How, and at what granularity though?)
   116  	var gas int64 = 1048576 * 10
   117  	return unmarshal1(na, tokSrc, &gas, options)
   118  }
   119  
   120  func unmarshal1(na datamodel.NodeAssembler, tokSrc shared.TokenSource, gas *int64, options DecodeOptions) error {
   121  	var tk tok.Token
   122  	done, err := tokSrc.Step(&tk)
   123  	if err == io.EOF {
   124  		return io.ErrUnexpectedEOF
   125  	}
   126  	if err != nil {
   127  		return err
   128  	}
   129  	if done && !tk.Type.IsValue() && tk.Type != tok.TNull {
   130  		return fmt.Errorf("unexpected eof")
   131  	}
   132  	return unmarshal2(na, tokSrc, &tk, gas, options)
   133  }
   134  
   135  // starts with the first token already primed.  Necessary to get recursion
   136  //
   137  //	to flow right without a peek+unpeek system.
   138  func unmarshal2(na datamodel.NodeAssembler, tokSrc shared.TokenSource, tk *tok.Token, gas *int64, options DecodeOptions) error {
   139  	// FUTURE: check for schema.TypedNodeBuilder that's going to parse a Link (they can slurp any token kind they want).
   140  	switch tk.Type {
   141  	case tok.TMapOpen:
   142  		expectLen := int64(tk.Length)
   143  		allocLen := int64(tk.Length)
   144  		if tk.Length == -1 {
   145  			expectLen = math.MaxInt64
   146  			allocLen = 0
   147  		} else {
   148  			if *gas-allocLen < 0 { // halt early if this will clearly demand too many resources
   149  				return ErrAllocationBudgetExceeded
   150  			}
   151  		}
   152  		ma, err := na.BeginMap(allocLen)
   153  		if err != nil {
   154  			return err
   155  		}
   156  		var observedLen int64
   157  		lastKey := ""
   158  		for {
   159  			_, err := tokSrc.Step(tk)
   160  			if err != nil {
   161  				return err
   162  			}
   163  			switch tk.Type {
   164  			case tok.TMapClose:
   165  				if expectLen != math.MaxInt64 && observedLen != expectLen {
   166  					return fmt.Errorf("unexpected mapClose before declared length")
   167  				}
   168  				return ma.Finish()
   169  			case tok.TString:
   170  				*gas -= int64(len(tk.Str) + mapEntryGasScore)
   171  				if *gas < 0 {
   172  					return ErrAllocationBudgetExceeded
   173  				}
   174  				// continue
   175  			default:
   176  				return fmt.Errorf("unexpected %s token while expecting map key", tk.Type)
   177  			}
   178  			observedLen++
   179  			if observedLen > expectLen {
   180  				return fmt.Errorf("unexpected continuation of map elements beyond declared length")
   181  			}
   182  			if observedLen > 1 && options.ExperimentalDeterminism {
   183  				if len(lastKey) > len(tk.Str) || lastKey > tk.Str {
   184  					return fmt.Errorf("map key %q is not after %q as per RFC7049", tk.Str, lastKey)
   185  				}
   186  			}
   187  			lastKey = tk.Str
   188  			mva, err := ma.AssembleEntry(tk.Str)
   189  			if err != nil { // return in error if the key was rejected
   190  				return err
   191  			}
   192  			err = unmarshal1(mva, tokSrc, gas, options)
   193  			if err != nil { // return in error if some part of the recursion errored
   194  				return err
   195  			}
   196  		}
   197  	case tok.TMapClose:
   198  		return fmt.Errorf("unexpected mapClose token")
   199  	case tok.TArrOpen:
   200  		expectLen := int64(tk.Length)
   201  		allocLen := int64(tk.Length)
   202  		if tk.Length == -1 {
   203  			expectLen = math.MaxInt64
   204  			allocLen = 0
   205  		} else {
   206  			if *gas-allocLen < 0 { // halt early if this will clearly demand too many resources
   207  				return ErrAllocationBudgetExceeded
   208  			}
   209  		}
   210  		la, err := na.BeginList(allocLen)
   211  		if err != nil {
   212  			return err
   213  		}
   214  		var observedLen int64
   215  		for {
   216  			_, err := tokSrc.Step(tk)
   217  			if err != nil {
   218  				return err
   219  			}
   220  			switch tk.Type {
   221  			case tok.TArrClose:
   222  				if expectLen != math.MaxInt64 && observedLen != expectLen {
   223  					return fmt.Errorf("unexpected arrClose before declared length")
   224  				}
   225  				return la.Finish()
   226  			default:
   227  				*gas -= listEntryGasScore
   228  				if *gas < 0 {
   229  					return ErrAllocationBudgetExceeded
   230  				}
   231  				observedLen++
   232  				if observedLen > expectLen {
   233  					return fmt.Errorf("unexpected continuation of array elements beyond declared length")
   234  				}
   235  				err := unmarshal2(la.AssembleValue(), tokSrc, tk, gas, options)
   236  				if err != nil { // return in error if some part of the recursion errored
   237  					return err
   238  				}
   239  			}
   240  		}
   241  	case tok.TArrClose:
   242  		return fmt.Errorf("unexpected arrClose token")
   243  	case tok.TNull:
   244  		return na.AssignNull()
   245  	case tok.TString:
   246  		*gas -= int64(len(tk.Str))
   247  		if *gas < 0 {
   248  			return ErrAllocationBudgetExceeded
   249  		}
   250  		return na.AssignString(tk.Str)
   251  	case tok.TBytes:
   252  		*gas -= int64(len(tk.Bytes))
   253  		if *gas < 0 {
   254  			return ErrAllocationBudgetExceeded
   255  		}
   256  		if !tk.Tagged {
   257  			return na.AssignBytes(tk.Bytes)
   258  		}
   259  		switch tk.Tag {
   260  		case linkTag:
   261  			if !options.AllowLinks {
   262  				return fmt.Errorf("unhandled cbor tag %d", tk.Tag)
   263  			}
   264  			if len(tk.Bytes) < 1 || tk.Bytes[0] != 0 {
   265  				return ErrInvalidMultibase
   266  			}
   267  			elCid, err := cid.Cast(tk.Bytes[1:])
   268  			if err != nil {
   269  				return err
   270  			}
   271  			return na.AssignLink(cidlink.Link{Cid: elCid})
   272  		default:
   273  			return fmt.Errorf("unhandled cbor tag %d", tk.Tag)
   274  		}
   275  	case tok.TBool:
   276  		*gas -= 1
   277  		if *gas < 0 {
   278  			return ErrAllocationBudgetExceeded
   279  		}
   280  		return na.AssignBool(tk.Bool)
   281  	case tok.TInt:
   282  		*gas -= 1
   283  		if *gas < 0 {
   284  			return ErrAllocationBudgetExceeded
   285  		}
   286  		return na.AssignInt(tk.Int)
   287  	case tok.TUint:
   288  		*gas -= 1
   289  		if *gas < 0 {
   290  			return ErrAllocationBudgetExceeded
   291  		}
   292  		// note that this pushes any overflow errors up the stack when AsInt() may
   293  		// be called on a UintNode that is too large to cast to an int64
   294  		if tk.Uint > math.MaxInt64 {
   295  			return na.AssignNode(basicnode.NewUint(tk.Uint))
   296  		}
   297  		return na.AssignInt(int64(tk.Uint))
   298  	case tok.TFloat64:
   299  		*gas -= 1
   300  		if *gas < 0 {
   301  			return ErrAllocationBudgetExceeded
   302  		}
   303  		return na.AssignFloat(tk.Float64)
   304  	default:
   305  		panic("unreachable")
   306  	}
   307  }