github.com/ipld/go-ipld-prime@v0.21.0/codec/dagcbor/marshal.go (about)

     1  package dagcbor
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"sort"
     7  
     8  	"github.com/polydawn/refmt/cbor"
     9  	"github.com/polydawn/refmt/shared"
    10  	"github.com/polydawn/refmt/tok"
    11  
    12  	"github.com/ipld/go-ipld-prime/codec"
    13  	"github.com/ipld/go-ipld-prime/datamodel"
    14  	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
    15  )
    16  
    17  // This file should be identical to the general feature in the parent package,
    18  // except for the `case datamodel.Kind_Link` block,
    19  // which is dag-cbor's special sauce for schemafree links.
    20  
    21  // EncodeOptions can be used to customize the behavior of an encoding function.
    22  // The Encode method on this struct fits the codec.Encoder function interface.
    23  type EncodeOptions struct {
    24  	// If true, allow encoding of Link nodes as CBOR tag(42);
    25  	// otherwise, reject them as unencodable.
    26  	AllowLinks bool
    27  
    28  	// Control the sorting of map keys, using one of the `codec.MapSortMode_*` constants.
    29  	MapSortMode codec.MapSortMode
    30  }
    31  
    32  // Encode walks the given datamodel.Node and serializes it to the given io.Writer.
    33  // Encode fits the codec.Encoder function interface.
    34  //
    35  // The behavior of the encoder can be customized by setting fields in the EncodeOptions struct before calling this method.
    36  func (cfg EncodeOptions) Encode(n datamodel.Node, w io.Writer) error {
    37  	// Probe for a builtin fast path.  Shortcut to that if possible.
    38  	type detectFastPath interface {
    39  		EncodeDagCbor(io.Writer) error
    40  	}
    41  	if n2, ok := n.(detectFastPath); ok {
    42  		return n2.EncodeDagCbor(w)
    43  	}
    44  	// Okay, generic inspection path.
    45  	return Marshal(n, cbor.NewEncoder(w), cfg)
    46  }
    47  
    48  // Future work: we would like to remove the Marshal function,
    49  // and in particular, stop seeing types from refmt (like shared.TokenSink) be visible.
    50  // Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types;
    51  // we should improve our API so that this can be done with only our own types in this package.
    52  
    53  // Marshal is a deprecated function.
    54  // Please consider switching to EncodeOptions.Encode instead.
    55  func Marshal(n datamodel.Node, sink shared.TokenSink, options EncodeOptions) error {
    56  	var tk tok.Token
    57  	return marshal(n, &tk, sink, options)
    58  }
    59  
    60  func marshal(n datamodel.Node, tk *tok.Token, sink shared.TokenSink, options EncodeOptions) error {
    61  	switch n.Kind() {
    62  	case datamodel.Kind_Invalid:
    63  		return fmt.Errorf("cannot traverse a node that is absent")
    64  	case datamodel.Kind_Null:
    65  		tk.Type = tok.TNull
    66  		_, err := sink.Step(tk)
    67  		return err
    68  	case datamodel.Kind_Map:
    69  		return marshalMap(n, tk, sink, options)
    70  	case datamodel.Kind_List:
    71  		// Emit start of list.
    72  		tk.Type = tok.TArrOpen
    73  		l := n.Length()
    74  		tk.Length = int(l) // TODO: overflow check
    75  		if _, err := sink.Step(tk); err != nil {
    76  			return err
    77  		}
    78  		// Emit list contents (and recurse).
    79  		for i := int64(0); i < l; i++ {
    80  			v, err := n.LookupByIndex(i)
    81  			if err != nil {
    82  				return err
    83  			}
    84  			if err := marshal(v, tk, sink, options); err != nil {
    85  				return err
    86  			}
    87  		}
    88  		// Emit list close.
    89  		tk.Type = tok.TArrClose
    90  		_, err := sink.Step(tk)
    91  		return err
    92  	case datamodel.Kind_Bool:
    93  		v, err := n.AsBool()
    94  		if err != nil {
    95  			return err
    96  		}
    97  		tk.Type = tok.TBool
    98  		tk.Bool = v
    99  		_, err = sink.Step(tk)
   100  		return err
   101  	case datamodel.Kind_Int:
   102  		if uin, ok := n.(datamodel.UintNode); ok {
   103  			v, err := uin.AsUint()
   104  			if err != nil {
   105  				return err
   106  			}
   107  			tk.Type = tok.TUint
   108  			tk.Uint = v
   109  		} else {
   110  			v, err := n.AsInt()
   111  			if err != nil {
   112  				return err
   113  			}
   114  			tk.Type = tok.TInt
   115  			tk.Int = v
   116  		}
   117  		_, err := sink.Step(tk)
   118  		return err
   119  	case datamodel.Kind_Float:
   120  		v, err := n.AsFloat()
   121  		if err != nil {
   122  			return err
   123  		}
   124  		tk.Type = tok.TFloat64
   125  		tk.Float64 = v
   126  		_, err = sink.Step(tk)
   127  		return err
   128  	case datamodel.Kind_String:
   129  		v, err := n.AsString()
   130  		if err != nil {
   131  			return err
   132  		}
   133  		tk.Type = tok.TString
   134  		tk.Str = v
   135  		_, err = sink.Step(tk)
   136  		return err
   137  	case datamodel.Kind_Bytes:
   138  		v, err := n.AsBytes()
   139  		if err != nil {
   140  			return err
   141  		}
   142  		tk.Type = tok.TBytes
   143  		tk.Bytes = v
   144  		_, err = sink.Step(tk)
   145  		return err
   146  	case datamodel.Kind_Link:
   147  		if !options.AllowLinks {
   148  			return fmt.Errorf("cannot Marshal ipld links to CBOR")
   149  		}
   150  		v, err := n.AsLink()
   151  		if err != nil {
   152  			return err
   153  		}
   154  		switch lnk := v.(type) {
   155  		case cidlink.Link:
   156  			if !lnk.Cid.Defined() {
   157  				return fmt.Errorf("encoding undefined CIDs are not supported by this codec")
   158  			}
   159  			tk.Type = tok.TBytes
   160  			tk.Bytes = append([]byte{0}, lnk.Bytes()...)
   161  			tk.Tagged = true
   162  			tk.Tag = linkTag
   163  			_, err = sink.Step(tk)
   164  			tk.Tagged = false
   165  			return err
   166  		default:
   167  			return fmt.Errorf("schemafree link emission only supported by this codec for CID type links")
   168  		}
   169  	default:
   170  		panic("unreachable")
   171  	}
   172  }
   173  
   174  func marshalMap(n datamodel.Node, tk *tok.Token, sink shared.TokenSink, options EncodeOptions) error {
   175  	// Emit start of map.
   176  	tk.Type = tok.TMapOpen
   177  	expectedLength := int(n.Length())
   178  	tk.Length = expectedLength // TODO: overflow check
   179  	if _, err := sink.Step(tk); err != nil {
   180  		return err
   181  	}
   182  	if options.MapSortMode != codec.MapSortMode_None {
   183  		// Collect map entries, then sort by key
   184  		type entry struct {
   185  			key   string
   186  			value datamodel.Node
   187  		}
   188  		entries := []entry{}
   189  		for itr := n.MapIterator(); !itr.Done(); {
   190  			k, v, err := itr.Next()
   191  			if err != nil {
   192  				return err
   193  			}
   194  			keyStr, err := k.AsString()
   195  			if err != nil {
   196  				return err
   197  			}
   198  			entries = append(entries, entry{keyStr, v})
   199  		}
   200  		if len(entries) != expectedLength {
   201  			return fmt.Errorf("map Length() does not match number of MapIterator() entries")
   202  		}
   203  		// Apply the desired sort function.
   204  		switch options.MapSortMode {
   205  		case codec.MapSortMode_Lexical:
   206  			sort.Slice(entries, func(i, j int) bool {
   207  				return entries[i].key < entries[j].key
   208  			})
   209  		case codec.MapSortMode_RFC7049:
   210  			sort.Slice(entries, func(i, j int) bool {
   211  				// RFC7049 style sort as per DAG-CBOR spec
   212  				li, lj := len(entries[i].key), len(entries[j].key)
   213  				if li == lj {
   214  					return entries[i].key < entries[j].key
   215  				}
   216  				return li < lj
   217  			})
   218  		}
   219  		// Emit map contents (and recurse).
   220  		for _, e := range entries {
   221  			tk.Type = tok.TString
   222  			tk.Str = e.key
   223  			if _, err := sink.Step(tk); err != nil {
   224  				return err
   225  			}
   226  			if err := marshal(e.value, tk, sink, options); err != nil {
   227  				return err
   228  			}
   229  		}
   230  	} else { // no sorting
   231  		// Emit map contents (and recurse).
   232  		var entryCount int
   233  		for itr := n.MapIterator(); !itr.Done(); {
   234  			k, v, err := itr.Next()
   235  			if err != nil {
   236  				return err
   237  			}
   238  			entryCount++
   239  			tk.Type = tok.TString
   240  			tk.Str, err = k.AsString()
   241  			if err != nil {
   242  				return err
   243  			}
   244  			if _, err := sink.Step(tk); err != nil {
   245  				return err
   246  			}
   247  			if err := marshal(v, tk, sink, options); err != nil {
   248  				return err
   249  			}
   250  		}
   251  		if entryCount != expectedLength {
   252  			return fmt.Errorf("map Length() does not match number of MapIterator() entries")
   253  		}
   254  	}
   255  	// Emit map close.
   256  	tk.Type = tok.TMapClose
   257  	_, err := sink.Step(tk)
   258  	return err
   259  }
   260  
   261  // EncodedLength will calculate the length in bytes that the encoded form of the
   262  // provided Node will occupy.
   263  //
   264  // Note that this function requires a full walk of the Node's graph, which may
   265  // not necessarily be a trivial cost and will incur some allocations. Using this
   266  // method to calculate buffers to pre-allocate may not result in performance
   267  // gains, but rather incur an overall cost. Use with care.
   268  func EncodedLength(n datamodel.Node) (int64, error) {
   269  	switch n.Kind() {
   270  	case datamodel.Kind_Invalid:
   271  		return 0, fmt.Errorf("cannot traverse a node that is absent")
   272  	case datamodel.Kind_Null:
   273  		return 1, nil // 0xf6
   274  	case datamodel.Kind_Map:
   275  		length := uintLength(uint64(n.Length())) // length prefixed major 5
   276  		for itr := n.MapIterator(); !itr.Done(); {
   277  			k, v, err := itr.Next()
   278  			if err != nil {
   279  				return 0, err
   280  			}
   281  			keyLength, err := EncodedLength(k)
   282  			if err != nil {
   283  				return 0, err
   284  			}
   285  			length += keyLength
   286  			valueLength, err := EncodedLength(v)
   287  			if err != nil {
   288  				return 0, err
   289  			}
   290  			length += valueLength
   291  		}
   292  		return length, nil
   293  	case datamodel.Kind_List:
   294  		nl := n.Length()
   295  		length := uintLength(uint64(nl)) // length prefixed major 4
   296  		for i := int64(0); i < nl; i++ {
   297  			v, err := n.LookupByIndex(i)
   298  			if err != nil {
   299  				return 0, err
   300  			}
   301  			innerLength, err := EncodedLength(v)
   302  			if err != nil {
   303  				return 0, err
   304  			}
   305  			length += innerLength
   306  		}
   307  		return length, nil
   308  	case datamodel.Kind_Bool:
   309  		return 1, nil // 0xf4 or 0xf5
   310  	case datamodel.Kind_Int:
   311  		v, err := n.AsInt()
   312  		if err != nil {
   313  			return 0, err
   314  		}
   315  		if v < 0 {
   316  			v = -v - 1 // negint is stored as one less than actual
   317  		}
   318  		return uintLength(uint64(v)), nil // major 0 or 1, as small as possible
   319  	case datamodel.Kind_Float:
   320  		return 9, nil // always major 7 and 64-bit float
   321  	case datamodel.Kind_String:
   322  		v, err := n.AsString()
   323  		if err != nil {
   324  			return 0, err
   325  		}
   326  
   327  		return uintLength(uint64(len(v))) + int64(len(v)), nil // length prefixed major 3
   328  	case datamodel.Kind_Bytes:
   329  		v, err := n.AsBytes()
   330  		if err != nil {
   331  			return 0, err
   332  		}
   333  		return uintLength(uint64(len(v))) + int64(len(v)), nil // length prefixed major 2
   334  	case datamodel.Kind_Link:
   335  		v, err := n.AsLink()
   336  		if err != nil {
   337  			return 0, err
   338  		}
   339  		switch lnk := v.(type) {
   340  		case cidlink.Link:
   341  			length := int64(2)                    // tag,42: 0xd82a
   342  			bl := int64(len(lnk.Bytes())) + 1     // additional 0x00 in front of the CID bytes
   343  			length += uintLength(uint64(bl)) + bl // length prefixed major 2
   344  			return length, err
   345  		default:
   346  			return 0, fmt.Errorf("schemafree link emission only supported by this codec for CID type links")
   347  		}
   348  	default:
   349  		panic("unreachable")
   350  	}
   351  }
   352  
   353  // Calculate how many bytes an integer, and therefore also the leading bytes of
   354  // a length-prefixed token. CBOR will pack it up into the smallest possible
   355  // uint representation, even merging it with the major if it's <=23.
   356  
   357  type boundaryLength struct {
   358  	upperBound uint64
   359  	length     int64
   360  }
   361  
   362  var lengthBoundaries = []boundaryLength{
   363  	{24, 1},         // packed major|minor
   364  	{256, 2},        // major, 8-bit length
   365  	{65536, 3},      // major, 16-bit length
   366  	{4294967296, 5}, // major, 32-bit length
   367  	{0, 9},          // major, 64-bit length
   368  }
   369  
   370  func uintLength(ii uint64) int64 {
   371  	for _, lb := range lengthBoundaries {
   372  		if ii < lb.upperBound {
   373  			return lb.length
   374  		}
   375  	}
   376  	// maximum number of bytes to pack this int
   377  	// if this int is used as a length prefix for a map, list, string or bytes
   378  	// then we likely have a very bad Node that shouldn't be encoded, but the
   379  	// encoder may raise problems with that if the memory allocator doesn't first.
   380  	return lengthBoundaries[len(lengthBoundaries)-1].length
   381  }