github.com/ipld/go-ipld-prime@v0.21.0/codec/dagcbor/unmarshal.go (about) 1 package dagcbor 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "math" 8 9 cid "github.com/ipfs/go-cid" 10 "github.com/polydawn/refmt/cbor" 11 "github.com/polydawn/refmt/shared" 12 "github.com/polydawn/refmt/tok" 13 14 "github.com/ipld/go-ipld-prime/datamodel" 15 cidlink "github.com/ipld/go-ipld-prime/linking/cid" 16 "github.com/ipld/go-ipld-prime/node/basicnode" 17 ) 18 19 var ( 20 ErrInvalidMultibase = errors.New("invalid multibase on IPLD link") 21 ErrAllocationBudgetExceeded = errors.New("message structure demanded too many resources to process") 22 ErrTrailingBytes = errors.New("unexpected content after end of cbor object") 23 ) 24 25 const ( 26 mapEntryGasScore = 8 27 listEntryGasScore = 4 28 ) 29 30 // This file should be identical to the general feature in the parent package, 31 // except for the `case tok.TBytes` block, 32 // which has dag-cbor's special sauce for detecting schemafree links. 33 34 // DecodeOptions can be used to customize the behavior of a decoding function. 35 // The Decode method on this struct fits the codec.Decoder function interface. 36 type DecodeOptions struct { 37 // If true, parse DAG-CBOR tag(42) as Link nodes, otherwise reject them 38 AllowLinks bool 39 40 // TODO: ExperimentalDeterminism enforces map key order, but not the other parts 41 // of the spec such as integers or floats. See the fuzz failures spotted in 42 // https://github.com/ipld/go-ipld-prime/pull/389. 43 // When we're done implementing strictness, deprecate the option in favor of 44 // StrictDeterminism, but keep accepting both for backwards compatibility. 45 46 // ExperimentalDeterminism requires decoded DAG-CBOR bytes to be canonical as per 47 // the spec. For example, this means that integers and floats be encoded in 48 // a particular way, and map keys be sorted. 49 // 50 // The decoder does not enforce this requirement by default, as the codec 51 // was originally implemented without these rules. Because of that, there's 52 // a significant amount of published data that isn't canonical but should 53 // still decode with the default settings for backwards compatibility. 54 // 55 // Note that this option is experimental as it only implements partial strictness. 56 ExperimentalDeterminism bool 57 58 // If true, the decoder stops reading from the stream at the end of a full, 59 // valid CBOR object. This may be useful for parsing a stream of undelimited 60 // CBOR objects. 61 // As per standard IPLD behavior, in the default mode the parser considers the 62 // entire block to be part of the CBOR object and will error if there is 63 // extraneous data after the end of the object. 64 DontParseBeyondEnd bool 65 } 66 67 // Decode deserializes data from the given io.Reader and feeds it into the given datamodel.NodeAssembler. 68 // Decode fits the codec.Decoder function interface. 69 // 70 // The behavior of the decoder can be customized by setting fields in the DecodeOptions struct before calling this method. 71 func (cfg DecodeOptions) Decode(na datamodel.NodeAssembler, r io.Reader) error { 72 // Probe for a builtin fast path. Shortcut to that if possible. 73 type detectFastPath interface { 74 DecodeDagCbor(io.Reader) error 75 } 76 if na2, ok := na.(detectFastPath); ok { 77 return na2.DecodeDagCbor(r) 78 } 79 // Okay, generic builder path. 80 err := Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{ 81 CoerceUndefToNull: true, 82 }, r), cfg) 83 84 if err != nil { 85 return err 86 } 87 88 if cfg.DontParseBeyondEnd { 89 return nil 90 } 91 92 var buf [1]byte 93 _, err = io.ReadFull(r, buf[:]) 94 switch err { 95 case io.EOF: 96 return nil 97 case nil: 98 return ErrTrailingBytes 99 default: 100 return err 101 } 102 } 103 104 // Future work: we would like to remove the Unmarshal function, 105 // and in particular, stop seeing types from refmt (like shared.TokenSource) be visible. 106 // Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types; 107 // we should improve our API so that this can be done with only our own types in this package. 108 109 // Unmarshal is a deprecated function. 110 // Please consider switching to DecodeOptions.Decode instead. 111 func Unmarshal(na datamodel.NodeAssembler, tokSrc shared.TokenSource, options DecodeOptions) error { 112 // Have a gas budget, which will be decremented as we allocate memory, and an error returned when execeeded (or about to be exceeded). 113 // This is a DoS defense mechanism. 114 // It's *roughly* in units of bytes (but only very, VERY roughly) -- it also treats words as 1 in many cases. 115 // FUTURE: this ought be configurable somehow. (How, and at what granularity though?) 116 var gas int64 = 1048576 * 10 117 return unmarshal1(na, tokSrc, &gas, options) 118 } 119 120 func unmarshal1(na datamodel.NodeAssembler, tokSrc shared.TokenSource, gas *int64, options DecodeOptions) error { 121 var tk tok.Token 122 done, err := tokSrc.Step(&tk) 123 if err == io.EOF { 124 return io.ErrUnexpectedEOF 125 } 126 if err != nil { 127 return err 128 } 129 if done && !tk.Type.IsValue() && tk.Type != tok.TNull { 130 return fmt.Errorf("unexpected eof") 131 } 132 return unmarshal2(na, tokSrc, &tk, gas, options) 133 } 134 135 // starts with the first token already primed. Necessary to get recursion 136 // 137 // to flow right without a peek+unpeek system. 138 func unmarshal2(na datamodel.NodeAssembler, tokSrc shared.TokenSource, tk *tok.Token, gas *int64, options DecodeOptions) error { 139 // FUTURE: check for schema.TypedNodeBuilder that's going to parse a Link (they can slurp any token kind they want). 140 switch tk.Type { 141 case tok.TMapOpen: 142 expectLen := int64(tk.Length) 143 allocLen := int64(tk.Length) 144 if tk.Length == -1 { 145 expectLen = math.MaxInt64 146 allocLen = 0 147 } else { 148 if *gas-allocLen < 0 { // halt early if this will clearly demand too many resources 149 return ErrAllocationBudgetExceeded 150 } 151 } 152 ma, err := na.BeginMap(allocLen) 153 if err != nil { 154 return err 155 } 156 var observedLen int64 157 lastKey := "" 158 for { 159 _, err := tokSrc.Step(tk) 160 if err != nil { 161 return err 162 } 163 switch tk.Type { 164 case tok.TMapClose: 165 if expectLen != math.MaxInt64 && observedLen != expectLen { 166 return fmt.Errorf("unexpected mapClose before declared length") 167 } 168 return ma.Finish() 169 case tok.TString: 170 *gas -= int64(len(tk.Str) + mapEntryGasScore) 171 if *gas < 0 { 172 return ErrAllocationBudgetExceeded 173 } 174 // continue 175 default: 176 return fmt.Errorf("unexpected %s token while expecting map key", tk.Type) 177 } 178 observedLen++ 179 if observedLen > expectLen { 180 return fmt.Errorf("unexpected continuation of map elements beyond declared length") 181 } 182 if observedLen > 1 && options.ExperimentalDeterminism { 183 if len(lastKey) > len(tk.Str) || lastKey > tk.Str { 184 return fmt.Errorf("map key %q is not after %q as per RFC7049", tk.Str, lastKey) 185 } 186 } 187 lastKey = tk.Str 188 mva, err := ma.AssembleEntry(tk.Str) 189 if err != nil { // return in error if the key was rejected 190 return err 191 } 192 err = unmarshal1(mva, tokSrc, gas, options) 193 if err != nil { // return in error if some part of the recursion errored 194 return err 195 } 196 } 197 case tok.TMapClose: 198 return fmt.Errorf("unexpected mapClose token") 199 case tok.TArrOpen: 200 expectLen := int64(tk.Length) 201 allocLen := int64(tk.Length) 202 if tk.Length == -1 { 203 expectLen = math.MaxInt64 204 allocLen = 0 205 } else { 206 if *gas-allocLen < 0 { // halt early if this will clearly demand too many resources 207 return ErrAllocationBudgetExceeded 208 } 209 } 210 la, err := na.BeginList(allocLen) 211 if err != nil { 212 return err 213 } 214 var observedLen int64 215 for { 216 _, err := tokSrc.Step(tk) 217 if err != nil { 218 return err 219 } 220 switch tk.Type { 221 case tok.TArrClose: 222 if expectLen != math.MaxInt64 && observedLen != expectLen { 223 return fmt.Errorf("unexpected arrClose before declared length") 224 } 225 return la.Finish() 226 default: 227 *gas -= listEntryGasScore 228 if *gas < 0 { 229 return ErrAllocationBudgetExceeded 230 } 231 observedLen++ 232 if observedLen > expectLen { 233 return fmt.Errorf("unexpected continuation of array elements beyond declared length") 234 } 235 err := unmarshal2(la.AssembleValue(), tokSrc, tk, gas, options) 236 if err != nil { // return in error if some part of the recursion errored 237 return err 238 } 239 } 240 } 241 case tok.TArrClose: 242 return fmt.Errorf("unexpected arrClose token") 243 case tok.TNull: 244 return na.AssignNull() 245 case tok.TString: 246 *gas -= int64(len(tk.Str)) 247 if *gas < 0 { 248 return ErrAllocationBudgetExceeded 249 } 250 return na.AssignString(tk.Str) 251 case tok.TBytes: 252 *gas -= int64(len(tk.Bytes)) 253 if *gas < 0 { 254 return ErrAllocationBudgetExceeded 255 } 256 if !tk.Tagged { 257 return na.AssignBytes(tk.Bytes) 258 } 259 switch tk.Tag { 260 case linkTag: 261 if !options.AllowLinks { 262 return fmt.Errorf("unhandled cbor tag %d", tk.Tag) 263 } 264 if len(tk.Bytes) < 1 || tk.Bytes[0] != 0 { 265 return ErrInvalidMultibase 266 } 267 elCid, err := cid.Cast(tk.Bytes[1:]) 268 if err != nil { 269 return err 270 } 271 return na.AssignLink(cidlink.Link{Cid: elCid}) 272 default: 273 return fmt.Errorf("unhandled cbor tag %d", tk.Tag) 274 } 275 case tok.TBool: 276 *gas -= 1 277 if *gas < 0 { 278 return ErrAllocationBudgetExceeded 279 } 280 return na.AssignBool(tk.Bool) 281 case tok.TInt: 282 *gas -= 1 283 if *gas < 0 { 284 return ErrAllocationBudgetExceeded 285 } 286 return na.AssignInt(tk.Int) 287 case tok.TUint: 288 *gas -= 1 289 if *gas < 0 { 290 return ErrAllocationBudgetExceeded 291 } 292 // note that this pushes any overflow errors up the stack when AsInt() may 293 // be called on a UintNode that is too large to cast to an int64 294 if tk.Uint > math.MaxInt64 { 295 return na.AssignNode(basicnode.NewUint(tk.Uint)) 296 } 297 return na.AssignInt(int64(tk.Uint)) 298 case tok.TFloat64: 299 *gas -= 1 300 if *gas < 0 { 301 return ErrAllocationBudgetExceeded 302 } 303 return na.AssignFloat(tk.Float64) 304 default: 305 panic("unreachable") 306 } 307 }