github.com/ipld/go-ipld-prime@v0.21.0/codec/dagcbor/marshal.go (about) 1 package dagcbor 2 3 import ( 4 "fmt" 5 "io" 6 "sort" 7 8 "github.com/polydawn/refmt/cbor" 9 "github.com/polydawn/refmt/shared" 10 "github.com/polydawn/refmt/tok" 11 12 "github.com/ipld/go-ipld-prime/codec" 13 "github.com/ipld/go-ipld-prime/datamodel" 14 cidlink "github.com/ipld/go-ipld-prime/linking/cid" 15 ) 16 17 // This file should be identical to the general feature in the parent package, 18 // except for the `case datamodel.Kind_Link` block, 19 // which is dag-cbor's special sauce for schemafree links. 20 21 // EncodeOptions can be used to customize the behavior of an encoding function. 22 // The Encode method on this struct fits the codec.Encoder function interface. 23 type EncodeOptions struct { 24 // If true, allow encoding of Link nodes as CBOR tag(42); 25 // otherwise, reject them as unencodable. 26 AllowLinks bool 27 28 // Control the sorting of map keys, using one of the `codec.MapSortMode_*` constants. 29 MapSortMode codec.MapSortMode 30 } 31 32 // Encode walks the given datamodel.Node and serializes it to the given io.Writer. 33 // Encode fits the codec.Encoder function interface. 34 // 35 // The behavior of the encoder can be customized by setting fields in the EncodeOptions struct before calling this method. 36 func (cfg EncodeOptions) Encode(n datamodel.Node, w io.Writer) error { 37 // Probe for a builtin fast path. Shortcut to that if possible. 38 type detectFastPath interface { 39 EncodeDagCbor(io.Writer) error 40 } 41 if n2, ok := n.(detectFastPath); ok { 42 return n2.EncodeDagCbor(w) 43 } 44 // Okay, generic inspection path. 45 return Marshal(n, cbor.NewEncoder(w), cfg) 46 } 47 48 // Future work: we would like to remove the Marshal function, 49 // and in particular, stop seeing types from refmt (like shared.TokenSink) be visible. 50 // Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types; 51 // we should improve our API so that this can be done with only our own types in this package. 52 53 // Marshal is a deprecated function. 54 // Please consider switching to EncodeOptions.Encode instead. 55 func Marshal(n datamodel.Node, sink shared.TokenSink, options EncodeOptions) error { 56 var tk tok.Token 57 return marshal(n, &tk, sink, options) 58 } 59 60 func marshal(n datamodel.Node, tk *tok.Token, sink shared.TokenSink, options EncodeOptions) error { 61 switch n.Kind() { 62 case datamodel.Kind_Invalid: 63 return fmt.Errorf("cannot traverse a node that is absent") 64 case datamodel.Kind_Null: 65 tk.Type = tok.TNull 66 _, err := sink.Step(tk) 67 return err 68 case datamodel.Kind_Map: 69 return marshalMap(n, tk, sink, options) 70 case datamodel.Kind_List: 71 // Emit start of list. 72 tk.Type = tok.TArrOpen 73 l := n.Length() 74 tk.Length = int(l) // TODO: overflow check 75 if _, err := sink.Step(tk); err != nil { 76 return err 77 } 78 // Emit list contents (and recurse). 79 for i := int64(0); i < l; i++ { 80 v, err := n.LookupByIndex(i) 81 if err != nil { 82 return err 83 } 84 if err := marshal(v, tk, sink, options); err != nil { 85 return err 86 } 87 } 88 // Emit list close. 89 tk.Type = tok.TArrClose 90 _, err := sink.Step(tk) 91 return err 92 case datamodel.Kind_Bool: 93 v, err := n.AsBool() 94 if err != nil { 95 return err 96 } 97 tk.Type = tok.TBool 98 tk.Bool = v 99 _, err = sink.Step(tk) 100 return err 101 case datamodel.Kind_Int: 102 if uin, ok := n.(datamodel.UintNode); ok { 103 v, err := uin.AsUint() 104 if err != nil { 105 return err 106 } 107 tk.Type = tok.TUint 108 tk.Uint = v 109 } else { 110 v, err := n.AsInt() 111 if err != nil { 112 return err 113 } 114 tk.Type = tok.TInt 115 tk.Int = v 116 } 117 _, err := sink.Step(tk) 118 return err 119 case datamodel.Kind_Float: 120 v, err := n.AsFloat() 121 if err != nil { 122 return err 123 } 124 tk.Type = tok.TFloat64 125 tk.Float64 = v 126 _, err = sink.Step(tk) 127 return err 128 case datamodel.Kind_String: 129 v, err := n.AsString() 130 if err != nil { 131 return err 132 } 133 tk.Type = tok.TString 134 tk.Str = v 135 _, err = sink.Step(tk) 136 return err 137 case datamodel.Kind_Bytes: 138 v, err := n.AsBytes() 139 if err != nil { 140 return err 141 } 142 tk.Type = tok.TBytes 143 tk.Bytes = v 144 _, err = sink.Step(tk) 145 return err 146 case datamodel.Kind_Link: 147 if !options.AllowLinks { 148 return fmt.Errorf("cannot Marshal ipld links to CBOR") 149 } 150 v, err := n.AsLink() 151 if err != nil { 152 return err 153 } 154 switch lnk := v.(type) { 155 case cidlink.Link: 156 if !lnk.Cid.Defined() { 157 return fmt.Errorf("encoding undefined CIDs are not supported by this codec") 158 } 159 tk.Type = tok.TBytes 160 tk.Bytes = append([]byte{0}, lnk.Bytes()...) 161 tk.Tagged = true 162 tk.Tag = linkTag 163 _, err = sink.Step(tk) 164 tk.Tagged = false 165 return err 166 default: 167 return fmt.Errorf("schemafree link emission only supported by this codec for CID type links") 168 } 169 default: 170 panic("unreachable") 171 } 172 } 173 174 func marshalMap(n datamodel.Node, tk *tok.Token, sink shared.TokenSink, options EncodeOptions) error { 175 // Emit start of map. 176 tk.Type = tok.TMapOpen 177 expectedLength := int(n.Length()) 178 tk.Length = expectedLength // TODO: overflow check 179 if _, err := sink.Step(tk); err != nil { 180 return err 181 } 182 if options.MapSortMode != codec.MapSortMode_None { 183 // Collect map entries, then sort by key 184 type entry struct { 185 key string 186 value datamodel.Node 187 } 188 entries := []entry{} 189 for itr := n.MapIterator(); !itr.Done(); { 190 k, v, err := itr.Next() 191 if err != nil { 192 return err 193 } 194 keyStr, err := k.AsString() 195 if err != nil { 196 return err 197 } 198 entries = append(entries, entry{keyStr, v}) 199 } 200 if len(entries) != expectedLength { 201 return fmt.Errorf("map Length() does not match number of MapIterator() entries") 202 } 203 // Apply the desired sort function. 204 switch options.MapSortMode { 205 case codec.MapSortMode_Lexical: 206 sort.Slice(entries, func(i, j int) bool { 207 return entries[i].key < entries[j].key 208 }) 209 case codec.MapSortMode_RFC7049: 210 sort.Slice(entries, func(i, j int) bool { 211 // RFC7049 style sort as per DAG-CBOR spec 212 li, lj := len(entries[i].key), len(entries[j].key) 213 if li == lj { 214 return entries[i].key < entries[j].key 215 } 216 return li < lj 217 }) 218 } 219 // Emit map contents (and recurse). 220 for _, e := range entries { 221 tk.Type = tok.TString 222 tk.Str = e.key 223 if _, err := sink.Step(tk); err != nil { 224 return err 225 } 226 if err := marshal(e.value, tk, sink, options); err != nil { 227 return err 228 } 229 } 230 } else { // no sorting 231 // Emit map contents (and recurse). 232 var entryCount int 233 for itr := n.MapIterator(); !itr.Done(); { 234 k, v, err := itr.Next() 235 if err != nil { 236 return err 237 } 238 entryCount++ 239 tk.Type = tok.TString 240 tk.Str, err = k.AsString() 241 if err != nil { 242 return err 243 } 244 if _, err := sink.Step(tk); err != nil { 245 return err 246 } 247 if err := marshal(v, tk, sink, options); err != nil { 248 return err 249 } 250 } 251 if entryCount != expectedLength { 252 return fmt.Errorf("map Length() does not match number of MapIterator() entries") 253 } 254 } 255 // Emit map close. 256 tk.Type = tok.TMapClose 257 _, err := sink.Step(tk) 258 return err 259 } 260 261 // EncodedLength will calculate the length in bytes that the encoded form of the 262 // provided Node will occupy. 263 // 264 // Note that this function requires a full walk of the Node's graph, which may 265 // not necessarily be a trivial cost and will incur some allocations. Using this 266 // method to calculate buffers to pre-allocate may not result in performance 267 // gains, but rather incur an overall cost. Use with care. 268 func EncodedLength(n datamodel.Node) (int64, error) { 269 switch n.Kind() { 270 case datamodel.Kind_Invalid: 271 return 0, fmt.Errorf("cannot traverse a node that is absent") 272 case datamodel.Kind_Null: 273 return 1, nil // 0xf6 274 case datamodel.Kind_Map: 275 length := uintLength(uint64(n.Length())) // length prefixed major 5 276 for itr := n.MapIterator(); !itr.Done(); { 277 k, v, err := itr.Next() 278 if err != nil { 279 return 0, err 280 } 281 keyLength, err := EncodedLength(k) 282 if err != nil { 283 return 0, err 284 } 285 length += keyLength 286 valueLength, err := EncodedLength(v) 287 if err != nil { 288 return 0, err 289 } 290 length += valueLength 291 } 292 return length, nil 293 case datamodel.Kind_List: 294 nl := n.Length() 295 length := uintLength(uint64(nl)) // length prefixed major 4 296 for i := int64(0); i < nl; i++ { 297 v, err := n.LookupByIndex(i) 298 if err != nil { 299 return 0, err 300 } 301 innerLength, err := EncodedLength(v) 302 if err != nil { 303 return 0, err 304 } 305 length += innerLength 306 } 307 return length, nil 308 case datamodel.Kind_Bool: 309 return 1, nil // 0xf4 or 0xf5 310 case datamodel.Kind_Int: 311 v, err := n.AsInt() 312 if err != nil { 313 return 0, err 314 } 315 if v < 0 { 316 v = -v - 1 // negint is stored as one less than actual 317 } 318 return uintLength(uint64(v)), nil // major 0 or 1, as small as possible 319 case datamodel.Kind_Float: 320 return 9, nil // always major 7 and 64-bit float 321 case datamodel.Kind_String: 322 v, err := n.AsString() 323 if err != nil { 324 return 0, err 325 } 326 327 return uintLength(uint64(len(v))) + int64(len(v)), nil // length prefixed major 3 328 case datamodel.Kind_Bytes: 329 v, err := n.AsBytes() 330 if err != nil { 331 return 0, err 332 } 333 return uintLength(uint64(len(v))) + int64(len(v)), nil // length prefixed major 2 334 case datamodel.Kind_Link: 335 v, err := n.AsLink() 336 if err != nil { 337 return 0, err 338 } 339 switch lnk := v.(type) { 340 case cidlink.Link: 341 length := int64(2) // tag,42: 0xd82a 342 bl := int64(len(lnk.Bytes())) + 1 // additional 0x00 in front of the CID bytes 343 length += uintLength(uint64(bl)) + bl // length prefixed major 2 344 return length, err 345 default: 346 return 0, fmt.Errorf("schemafree link emission only supported by this codec for CID type links") 347 } 348 default: 349 panic("unreachable") 350 } 351 } 352 353 // Calculate how many bytes an integer, and therefore also the leading bytes of 354 // a length-prefixed token. CBOR will pack it up into the smallest possible 355 // uint representation, even merging it with the major if it's <=23. 356 357 type boundaryLength struct { 358 upperBound uint64 359 length int64 360 } 361 362 var lengthBoundaries = []boundaryLength{ 363 {24, 1}, // packed major|minor 364 {256, 2}, // major, 8-bit length 365 {65536, 3}, // major, 16-bit length 366 {4294967296, 5}, // major, 32-bit length 367 {0, 9}, // major, 64-bit length 368 } 369 370 func uintLength(ii uint64) int64 { 371 for _, lb := range lengthBoundaries { 372 if ii < lb.upperBound { 373 return lb.length 374 } 375 } 376 // maximum number of bytes to pack this int 377 // if this int is used as a length prefix for a map, list, string or bytes 378 // then we likely have a very bad Node that shouldn't be encoded, but the 379 // encoder may raise problems with that if the memory allocator doesn't first. 380 return lengthBoundaries[len(lengthBoundaries)-1].length 381 }