github.com/ipld/go-ipld-prime@v0.21.0/node/bindnode/fuzz_test.go (about)

     1  //go:build go1.18
     2  
     3  package bindnode_test
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"reflect"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/ipld/go-ipld-prime/codec/dagcbor"
    13  	"github.com/ipld/go-ipld-prime/codec/dagjson"
    14  	"github.com/ipld/go-ipld-prime/datamodel"
    15  	"github.com/ipld/go-ipld-prime/node/basicnode"
    16  	"github.com/ipld/go-ipld-prime/node/bindnode"
    17  	"github.com/ipld/go-ipld-prime/schema"
    18  	schemadmt "github.com/ipld/go-ipld-prime/schema/dmt"
    19  	schemadsl "github.com/ipld/go-ipld-prime/schema/dsl"
    20  )
    21  
    22  var fuzzInputs = []struct {
    23  	schemaDSL, nodeDagJSON string
    24  }{
    25  	{
    26  		schemaDSL:   `type Root bool`,
    27  		nodeDagJSON: `true`,
    28  	},
    29  	{
    30  		schemaDSL:   `type Root int`,
    31  		nodeDagJSON: `123`,
    32  	},
    33  	{
    34  		schemaDSL:   `type Root float`,
    35  		nodeDagJSON: `45.67`,
    36  	},
    37  	{
    38  		schemaDSL:   `type Root string`,
    39  		nodeDagJSON: `"foo"`,
    40  	},
    41  	{
    42  		schemaDSL:   `type Root bytes`,
    43  		nodeDagJSON: `{"/":{"bytes":"ZGVhZGJlZWY"}}`,
    44  	},
    45  	{
    46  		schemaDSL:   `type Root [Int]`,
    47  		nodeDagJSON: `[3,2,1]`,
    48  	},
    49  	{
    50  		schemaDSL:   `type Root [String]`,
    51  		nodeDagJSON: `["x","y","z"]`,
    52  	},
    53  	{
    54  		schemaDSL:   `type Root {String:Int}`,
    55  		nodeDagJSON: `{"a":20,"b":10}`,
    56  	},
    57  	{
    58  		schemaDSL:   `type Root {String:Float}`,
    59  		nodeDagJSON: `{"a":20.5,"b":10.2}`,
    60  	},
    61  	{
    62  		schemaDSL: `type Root struct {
    63  			F1 Bool
    64  			F2 Bytes
    65  		}`,
    66  		nodeDagJSON: `{"F1":true,"F2":{"/":{"bytes":"ZGVhZGJlZWY"}}}`,
    67  	},
    68  	{
    69  		schemaDSL: `type Root struct {
    70  			F1 Int
    71  			F2 Float
    72  		} representation tuple`,
    73  		nodeDagJSON: `[23,45.67]`,
    74  	},
    75  	{
    76  		schemaDSL: `type Root enum {
    77  			| aa ("a")
    78  			| bb ("b")
    79  		} representation string`,
    80  		nodeDagJSON: `"b"`,
    81  	},
    82  	{
    83  		schemaDSL: `type Root enum {
    84  			| One ("1")
    85  			| Two ("2")
    86  		} representation int`,
    87  		nodeDagJSON: `2`,
    88  	},
    89  	{
    90  		schemaDSL: `type Root union {
    91  			| Int    "x"
    92  			| String "y"
    93  		} representation keyed`,
    94  		nodeDagJSON: `{"y":"foo"}`,
    95  	},
    96  	{
    97  		schemaDSL: `type Root union {
    98  			| Float  float
    99  			| Bytes  bytes
   100  			| Bool   bool
   101  			| Nested map
   102  		} representation kinded
   103  		type Nested struct {
   104  			F1 Int
   105  		}
   106  		`,
   107  		nodeDagJSON: `true`,
   108  	},
   109  }
   110  
   111  func marshalDagCBOR(tb testing.TB, node datamodel.Node) []byte {
   112  	tb.Helper()
   113  	var buf bytes.Buffer
   114  	if err := dagcbor.Encode(node, &buf); err != nil {
   115  		tb.Fatal(err)
   116  	}
   117  	return buf.Bytes()
   118  }
   119  
   120  func marshalDagJSON(tb testing.TB, node datamodel.Node) []byte {
   121  	tb.Helper()
   122  	var buf bytes.Buffer
   123  	if err := dagjson.Encode(node, &buf); err != nil {
   124  		switch s := err.Error(); {
   125  		case strings.Contains(s, "unsupported value: NaN"),
   126  			strings.Contains(s, "unsupported value: -Inf"),
   127  			strings.Contains(s, "unsupported value: +Inf"):
   128  			tb.Skipf("dagcbor does not support NaN/Inf")
   129  		}
   130  		tb.Fatal(err)
   131  	}
   132  	return buf.Bytes()
   133  }
   134  
   135  // TODO: consider allowing any codec multicode instead of hard-coding dagcbor
   136  
   137  // TODO: we always infer the Go type; it would be interesting to also support
   138  // inferring the IPLD schema, or to supply both.
   139  
   140  // TODO: encoding roundtrips via codecs are a good way to exercise bindnode's
   141  // Node implementation, but they do not call all the methods on the Node
   142  // interface. Consider other ways to call the rest of the methods, akin to how
   143  // infer_test.go has useNodeAsKind.
   144  
   145  func FuzzBindnodeViaDagCBOR(f *testing.F) {
   146  	for _, input := range fuzzInputs {
   147  		// f.Logf("debug: %#v\n", input)
   148  		schemaDMT, err := schemadsl.ParseBytes([]byte(input.schemaDSL))
   149  		if err != nil {
   150  			f.Fatal(err)
   151  		}
   152  		schemaNode := bindnode.Wrap(schemaDMT, schemadmt.Prototypes.Schema.Type())
   153  		schemaDagCBOR := marshalDagCBOR(f, schemaNode.Representation())
   154  
   155  		nodeBuilder := basicnode.Prototype.Any.NewBuilder()
   156  		if err := dagjson.Decode(nodeBuilder, strings.NewReader(input.nodeDagJSON)); err != nil {
   157  			f.Fatal(err)
   158  		}
   159  		node := nodeBuilder.Build()
   160  		nodeDagCBOR := marshalDagCBOR(f, node)
   161  		f.Add(schemaDagCBOR, nodeDagCBOR)
   162  
   163  		// Verify that nodeDagCBOR actually fits the schema.
   164  		// Otherwise, if any of our fuzz inputs are wrong, we might not notice.
   165  		{
   166  			schemaDMT := bindnode.Unwrap(schemaNode).(*schemadmt.Schema)
   167  			ts := new(schema.TypeSystem)
   168  			ts.Init()
   169  			if err := schemadmt.Compile(ts, schemaDMT); err != nil {
   170  				f.Fatal(err)
   171  			}
   172  			schemaType := ts.TypeByName("Root")
   173  			proto := bindnode.Prototype(nil, schemaType)
   174  			nodeBuilder := proto.Representation().NewBuilder()
   175  			if err := dagcbor.Decode(nodeBuilder, bytes.NewReader(nodeDagCBOR)); err != nil {
   176  				f.Fatal(err)
   177  			}
   178  		}
   179  	}
   180  	f.Fuzz(func(t *testing.T, schemaDagCBOR, nodeDagCBOR []byte) {
   181  		schemaBuilder := schemadmt.Prototypes.Schema.Representation().NewBuilder()
   182  
   183  		if err := dagcbor.Decode(schemaBuilder, bytes.NewReader(schemaDagCBOR)); err != nil {
   184  			t.Skipf("invalid schema-schema dag-cbor: %v", err)
   185  		}
   186  
   187  		schemaNode := schemaBuilder.Build().(schema.TypedNode)
   188  		schemaDMT := bindnode.Unwrap(schemaNode).(*schemadmt.Schema)
   189  
   190  		// Log the input schema and node we're fuzzing with, to help debugging.
   191  		// We also use dag-json, as it's more human readable.
   192  		t.Logf("schema in dag-cbor: %X", schemaDagCBOR)
   193  		t.Logf("node in dag-cbor: %X", nodeDagCBOR)
   194  		t.Logf("schema in dag-json: %s", marshalDagJSON(t, schemaNode.Representation()))
   195  		{
   196  			nodeBuilder := basicnode.Prototype.Any.NewBuilder()
   197  			if err := dagcbor.Decode(nodeBuilder, bytes.NewReader(nodeDagCBOR)); err != nil {
   198  				// If some dag-cbor bytes don't decode into the Any prototype,
   199  				// then they're just not valid dag-cbor at all.
   200  				t.Skipf("invalid node dag-cbor: %v", err)
   201  			}
   202  			node := nodeBuilder.Build()
   203  			t.Logf("node in dag-json: %s", marshalDagJSON(t, node))
   204  		}
   205  
   206  		// Is nodeDagCBOR canonically encoded, i.e. strictly deterministic as
   207  		// per the DAG-CBOR spec? This matters for the re-encode checks below.
   208  		// Note that we want to use the non-strict decoder for fuzzing,
   209  		// as that default is what the vast majority users will use.
   210  		canonicalNodeDagCBOR := true
   211  		canonicalDecoder := dagcbor.DecodeOptions{AllowLinks: true, ExperimentalDeterminism: true}
   212  		if err := canonicalDecoder.Decode(basicnode.Prototype.Any.NewBuilder(), bytes.NewReader(nodeDagCBOR)); err != nil {
   213  			canonicalNodeDagCBOR = false
   214  			t.Logf("note that this node dag-cbor isn't canonical!")
   215  		}
   216  
   217  		ts := new(schema.TypeSystem)
   218  		ts.Init()
   219  		// For the time being, we're not interested in panics from
   220  		// schemadmt.Compile or schema.TypeSystem. They are relatively prone to
   221  		// panics at the moment, and right now we're mainly interested in bugs
   222  		// in bindnode and dagcbor.
   223  		func() {
   224  			defer func() {
   225  				if r := recover(); r != nil {
   226  					t.Skipf("invalid schema: %v", r)
   227  				}
   228  			}()
   229  			if err := schemadmt.Compile(ts, schemaDMT); err != nil {
   230  				t.Skipf("invalid schema: %v", err)
   231  			}
   232  		}()
   233  
   234  		schemaType := ts.TypeByName("Root")
   235  		if schemaType == nil {
   236  			t.Skipf("schema has no Root type")
   237  		}
   238  		var proto schema.TypedPrototype
   239  		func() {
   240  			defer func() {
   241  				if r := recover(); r != nil {
   242  					str := fmt.Sprint(r)
   243  					switch {
   244  					case strings.Contains(str, "bindnode: unexpected nil schema.Type"):
   245  					case strings.Contains(str, "is not a valid Go identifier"):
   246  					case strings.Contains(str, "bindnode: inferring Go types from cyclic schemas is not supported"):
   247  					default:
   248  						panic(r)
   249  					}
   250  					t.Skipf("invalid schema: %v", r)
   251  				}
   252  			}()
   253  			proto = bindnode.Prototype(nil, schemaType)
   254  		}()
   255  
   256  		for _, repr := range []bool{false, true} {
   257  			t.Logf("decode and encode roundtrip with dag-cbor repr=%v", repr)
   258  			var nodeBuilder datamodel.NodeBuilder
   259  			if !repr {
   260  				nodeBuilder = proto.NewBuilder()
   261  			} else {
   262  				nodeBuilder = proto.Representation().NewBuilder()
   263  			}
   264  			if err := dagcbor.Decode(nodeBuilder, bytes.NewReader(nodeDagCBOR)); err != nil {
   265  				// The dag-cbor isn't valid for this node. Nothing else to do.
   266  				// We don't use t.Skip, because a dag-cbor might only be valid
   267  				// at the repr level, but not at the type level.
   268  				continue
   269  			}
   270  			node := nodeBuilder.Build()
   271  			if repr {
   272  				node = node.(schema.TypedNode).Representation()
   273  			}
   274  			// Unwrap returns a pointer, and %#v prints pointers as hex,
   275  			// so to get useful output, use reflect to dereference them.
   276  			t.Logf("decode successful: %#v", reflect.ValueOf(bindnode.Unwrap(node)).Elem().Interface())
   277  			reenc := marshalDagCBOR(t, node)
   278  			switch {
   279  			case canonicalNodeDagCBOR && !bytes.Equal(reenc, nodeDagCBOR):
   280  				t.Errorf("node reencoded as %X rather than %X", reenc, nodeDagCBOR)
   281  			case !canonicalNodeDagCBOR && bytes.Equal(reenc, nodeDagCBOR):
   282  				t.Errorf("node reencoded as %X even though it's not canonical", reenc)
   283  			default:
   284  				t.Logf("re-encode successful: %X", reenc)
   285  			}
   286  		}
   287  	})
   288  }