go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/lib/json/json.go (about)

     1  // Copyright 2020 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package json defines utilities for converting Starlark values
     6  // to/from JSON strings. The most recent IETF standard for JSON is
     7  // https://www.ietf.org/rfc/rfc7159.txt.
     8  package json // import "go.starlark.net/lib/json"
     9  
    10  import (
    11  	"bytes"
    12  	"encoding/json"
    13  	"fmt"
    14  	"math"
    15  	"math/big"
    16  	"reflect"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  	"unicode/utf8"
    21  	"unsafe"
    22  
    23  	"go.starlark.net/starlark"
    24  	"go.starlark.net/starlarkstruct"
    25  )
    26  
    27  // Module json is a Starlark module of JSON-related functions.
    28  //
    29  //	json = module(
    30  //	   encode,
    31  //	   decode,
    32  //	   indent,
    33  //	)
    34  //
    35  // def encode(x):
    36  //
    37  // The encode function accepts one required positional argument,
    38  // which it converts to JSON by cases:
    39  //   - A Starlark value that implements Go's standard json.Marshal
    40  //     interface defines its own JSON encoding.
    41  //   - None, True, and False are converted to null, true, and false, respectively.
    42  //   - Starlark int values, no matter how large, are encoded as decimal integers.
    43  //     Some decoders may not be able to decode very large integers.
    44  //   - Starlark float values are encoded using decimal point notation,
    45  //     even if the value is an integer.
    46  //     It is an error to encode a non-finite floating-point value.
    47  //   - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
    48  //   - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
    49  //     It is an error if any key is not a string.
    50  //   - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
    51  //   - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
    52  //
    53  // It an application-defined type matches more than one the cases describe above,
    54  // (e.g. it implements both Iterable and HasFields), the first case takes precedence.
    55  // Encoding any other value yields an error.
    56  //
    57  // def decode(x[, default]):
    58  //
    59  // The decode function has one required positional parameter, a JSON string.
    60  // It returns the Starlark value that the string denotes.
    61  //   - Numbers are parsed as int or float, depending on whether they
    62  //     contain a decimal point.
    63  //   - JSON objects are parsed as new unfrozen Starlark dicts.
    64  //   - JSON arrays are parsed as new unfrozen Starlark lists.
    65  //
    66  // If x is not a valid JSON string, the behavior depends on the "default"
    67  // parameter: if present, Decode returns its value; otherwise, Decode fails.
    68  //
    69  // def indent(str, *, prefix="", indent="\t"):
    70  //
    71  // The indent function pretty-prints a valid JSON encoding,
    72  // and returns a string containing the indented form.
    73  // It accepts one required positional parameter, the JSON string,
    74  // and two optional keyword-only string parameters, prefix and indent,
    75  // that specify a prefix of each new line, and the unit of indentation.
    76  var Module = &starlarkstruct.Module{
    77  	Name: "json",
    78  	Members: starlark.StringDict{
    79  		"encode": starlark.NewBuiltin("json.encode", encode),
    80  		"decode": starlark.NewBuiltin("json.decode", decode),
    81  		"indent": starlark.NewBuiltin("json.indent", indent),
    82  	},
    83  }
    84  
    85  func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
    86  	var x starlark.Value
    87  	if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
    88  		return nil, err
    89  	}
    90  
    91  	buf := new(bytes.Buffer)
    92  
    93  	var quoteSpace [128]byte
    94  	quote := func(s string) {
    95  		// Non-trivial escaping is handled by Go's encoding/json.
    96  		if isPrintableASCII(s) {
    97  			buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
    98  		} else {
    99  			// TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
   100  			// Can we avoid this call?
   101  			data, _ := json.Marshal(s)
   102  			buf.Write(data)
   103  		}
   104  	}
   105  
   106  	path := make([]unsafe.Pointer, 0, 8)
   107  
   108  	var emit func(x starlark.Value) error
   109  	emit = func(x starlark.Value) error {
   110  
   111  		// It is only necessary to push/pop the item when it might contain
   112  		// itself (i.e. the last three switch cases), but omitting it in the other
   113  		// cases did not show significant improvement on the benchmarks.
   114  		if ptr := pointer(x); ptr != nil {
   115  			if pathContains(path, ptr) {
   116  				return fmt.Errorf("cycle in JSON structure")
   117  			}
   118  
   119  			path = append(path, ptr)
   120  			defer func() { path = path[0 : len(path)-1] }()
   121  		}
   122  
   123  		switch x := x.(type) {
   124  		case json.Marshaler:
   125  			// Application-defined starlark.Value types
   126  			// may define their own JSON encoding.
   127  			data, err := x.MarshalJSON()
   128  			if err != nil {
   129  				return err
   130  			}
   131  			buf.Write(data)
   132  
   133  		case starlark.NoneType:
   134  			buf.WriteString("null")
   135  
   136  		case starlark.Bool:
   137  			if x {
   138  				buf.WriteString("true")
   139  			} else {
   140  				buf.WriteString("false")
   141  			}
   142  
   143  		case starlark.Int:
   144  			fmt.Fprint(buf, x)
   145  
   146  		case starlark.Float:
   147  			if !isFinite(float64(x)) {
   148  				return fmt.Errorf("cannot encode non-finite float %v", x)
   149  			}
   150  			fmt.Fprintf(buf, "%g", x) // always contains a decimal point
   151  
   152  		case starlark.String:
   153  			quote(string(x))
   154  
   155  		case starlark.IterableMapping:
   156  			// e.g. dict (must have string keys)
   157  			buf.WriteByte('{')
   158  			items := x.Items()
   159  			for _, item := range items {
   160  				if _, ok := item[0].(starlark.String); !ok {
   161  					return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
   162  				}
   163  			}
   164  			sort.Slice(items, func(i, j int) bool {
   165  				return items[i][0].(starlark.String) < items[j][0].(starlark.String)
   166  			})
   167  			for i, item := range items {
   168  				if i > 0 {
   169  					buf.WriteByte(',')
   170  				}
   171  				k, _ := starlark.AsString(item[0])
   172  				quote(k)
   173  				buf.WriteByte(':')
   174  				if err := emit(item[1]); err != nil {
   175  					return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
   176  				}
   177  			}
   178  			buf.WriteByte('}')
   179  
   180  		case starlark.Iterable:
   181  			// e.g. tuple, list
   182  			buf.WriteByte('[')
   183  			iter := x.Iterate()
   184  			defer iter.Done()
   185  			var elem starlark.Value
   186  			for i := 0; iter.Next(&elem); i++ {
   187  				if i > 0 {
   188  					buf.WriteByte(',')
   189  				}
   190  				if err := emit(elem); err != nil {
   191  					return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
   192  				}
   193  			}
   194  			buf.WriteByte(']')
   195  
   196  		case starlark.HasAttrs:
   197  			// e.g. struct
   198  			buf.WriteByte('{')
   199  			var names []string
   200  			names = append(names, x.AttrNames()...)
   201  			sort.Strings(names)
   202  			for i, name := range names {
   203  				v, err := x.Attr(name)
   204  				if err != nil {
   205  					return fmt.Errorf("cannot access attribute %s.%s: %w", x.Type(), name, err)
   206  				}
   207  				if v == nil {
   208  					// x.AttrNames() returned name, but x.Attr(name) returned nil, stating
   209  					// that the field doesn't exist.
   210  					return fmt.Errorf("missing attribute %s.%s (despite %q appearing in dir()", x.Type(), name, name)
   211  				}
   212  				if i > 0 {
   213  					buf.WriteByte(',')
   214  				}
   215  				quote(name)
   216  				buf.WriteByte(':')
   217  				if err := emit(v); err != nil {
   218  					return fmt.Errorf("in field .%s: %v", name, err)
   219  				}
   220  			}
   221  			buf.WriteByte('}')
   222  
   223  		default:
   224  			return fmt.Errorf("cannot encode %s as JSON", x.Type())
   225  		}
   226  		return nil
   227  	}
   228  
   229  	if err := emit(x); err != nil {
   230  		return nil, fmt.Errorf("%s: %v", b.Name(), err)
   231  	}
   232  	return starlark.String(buf.String()), nil
   233  }
   234  
   235  func pointer(i interface{}) unsafe.Pointer {
   236  	v := reflect.ValueOf(i)
   237  	switch v.Kind() {
   238  	case reflect.Ptr, reflect.Chan, reflect.Map, reflect.UnsafePointer, reflect.Slice:
   239  		// TODO(adonovan): use v.Pointer() when we drop go1.17.
   240  		return unsafe.Pointer(v.Pointer())
   241  	default:
   242  		return nil
   243  	}
   244  }
   245  
   246  func pathContains(path []unsafe.Pointer, item unsafe.Pointer) bool {
   247  	for _, p := range path {
   248  		if p == item {
   249  			return true
   250  		}
   251  	}
   252  
   253  	return false
   254  }
   255  
   256  // isPrintableASCII reports whether s contains only printable ASCII.
   257  func isPrintableASCII(s string) bool {
   258  	for i := 0; i < len(s); i++ {
   259  		b := s[i]
   260  		if b < 0x20 || b >= 0x80 {
   261  			return false
   262  		}
   263  	}
   264  	return true
   265  }
   266  
   267  // isFinite reports whether f represents a finite rational value.
   268  // It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
   269  func isFinite(f float64) bool {
   270  	return math.Abs(f) <= math.MaxFloat64
   271  }
   272  
   273  func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
   274  	prefix, indent := "", "\t" // keyword-only
   275  	if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
   276  		"prefix?", &prefix,
   277  		"indent?", &indent,
   278  	); err != nil {
   279  		return nil, err
   280  	}
   281  	var str string // positional-only
   282  	if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
   283  		return nil, err
   284  	}
   285  
   286  	buf := new(bytes.Buffer)
   287  	if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
   288  		return nil, fmt.Errorf("%s: %v", b.Name(), err)
   289  	}
   290  	return starlark.String(buf.String()), nil
   291  }
   292  
   293  func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (v starlark.Value, err error) {
   294  	var s string
   295  	var d starlark.Value
   296  	if err := starlark.UnpackArgs(b.Name(), args, kwargs, "x", &s, "default?", &d); err != nil {
   297  		return nil, err
   298  	}
   299  	if len(args) < 1 {
   300  		// "x" parameter is positional only; UnpackArgs does not allow us to
   301  		// directly express "def decode(x, *, default)"
   302  		return nil, fmt.Errorf("%s: unexpected keyword argument x", b.Name())
   303  	}
   304  
   305  	// The decoder necessarily makes certain representation choices
   306  	// such as list vs tuple, struct vs dict, int vs float.
   307  	// In principle, we could parameterize it to allow the caller to
   308  	// control the returned types, but there's no compelling need yet.
   309  
   310  	// Use panic/recover with a distinguished type (failure) for error handling.
   311  	// If "default" is set, we only want to return it when encountering invalid
   312  	// json - not for any other possible causes of panic.
   313  	// In particular, if we ever extend the json.decode API to take a callback,
   314  	// a distinguished, private failure type prevents the possibility of
   315  	// json.decode with "default" becoming abused as a try-catch mechanism.
   316  	type failure string
   317  	fail := func(format string, args ...interface{}) {
   318  		panic(failure(fmt.Sprintf(format, args...)))
   319  	}
   320  
   321  	i := 0
   322  
   323  	// skipSpace consumes leading spaces, and reports whether there is more input.
   324  	skipSpace := func() bool {
   325  		for ; i < len(s); i++ {
   326  			b := s[i]
   327  			if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
   328  				return true
   329  			}
   330  		}
   331  		return false
   332  	}
   333  
   334  	// next consumes leading spaces and returns the first non-space.
   335  	// It panics if at EOF.
   336  	next := func() byte {
   337  		if skipSpace() {
   338  			return s[i]
   339  		}
   340  		fail("unexpected end of file")
   341  		panic("unreachable")
   342  	}
   343  
   344  	// parse returns the next JSON value from the input.
   345  	// It consumes leading but not trailing whitespace.
   346  	// It panics on error.
   347  	var parse func() starlark.Value
   348  	parse = func() starlark.Value {
   349  		b := next()
   350  		switch b {
   351  		case '"':
   352  			// string
   353  
   354  			// Find end of quotation.
   355  			// Also, record whether trivial unquoting is safe.
   356  			// Non-trivial unquoting is handled by Go's encoding/json.
   357  			safe := true
   358  			closed := false
   359  			j := i + 1
   360  			for ; j < len(s); j++ {
   361  				b := s[j]
   362  				if b == '\\' {
   363  					safe = false
   364  					j++ // skip x in \x
   365  				} else if b == '"' {
   366  					closed = true
   367  					j++ // skip '"'
   368  					break
   369  				} else if b >= utf8.RuneSelf {
   370  					safe = false
   371  				}
   372  			}
   373  			if !closed {
   374  				fail("unclosed string literal")
   375  			}
   376  
   377  			r := s[i:j]
   378  			i = j
   379  
   380  			// unquote
   381  			if safe {
   382  				r = r[1 : len(r)-1]
   383  			} else if err := json.Unmarshal([]byte(r), &r); err != nil {
   384  				fail("%s", err)
   385  			}
   386  			return starlark.String(r)
   387  
   388  		case 'n':
   389  			if strings.HasPrefix(s[i:], "null") {
   390  				i += len("null")
   391  				return starlark.None
   392  			}
   393  
   394  		case 't':
   395  			if strings.HasPrefix(s[i:], "true") {
   396  				i += len("true")
   397  				return starlark.True
   398  			}
   399  
   400  		case 'f':
   401  			if strings.HasPrefix(s[i:], "false") {
   402  				i += len("false")
   403  				return starlark.False
   404  			}
   405  
   406  		case '[':
   407  			// array
   408  			var elems []starlark.Value
   409  
   410  			i++ // '['
   411  			b = next()
   412  			if b != ']' {
   413  				for {
   414  					elem := parse()
   415  					elems = append(elems, elem)
   416  					b = next()
   417  					if b != ',' {
   418  						if b != ']' {
   419  							fail("got %q, want ',' or ']'", b)
   420  						}
   421  						break
   422  					}
   423  					i++ // ','
   424  				}
   425  			}
   426  			i++ // ']'
   427  			return starlark.NewList(elems)
   428  
   429  		case '{':
   430  			// object
   431  			dict := new(starlark.Dict)
   432  
   433  			i++ // '{'
   434  			b = next()
   435  			if b != '}' {
   436  				for {
   437  					key := parse()
   438  					if _, ok := key.(starlark.String); !ok {
   439  						fail("got %s for object key, want string", key.Type())
   440  					}
   441  					b = next()
   442  					if b != ':' {
   443  						fail("after object key, got %q, want ':' ", b)
   444  					}
   445  					i++ // ':'
   446  					value := parse()
   447  					dict.SetKey(key, value) // can't fail
   448  					b = next()
   449  					if b != ',' {
   450  						if b != '}' {
   451  							fail("in object, got %q, want ',' or '}'", b)
   452  						}
   453  						break
   454  					}
   455  					i++ // ','
   456  				}
   457  			}
   458  			i++ // '}'
   459  			return dict
   460  
   461  		default:
   462  			// number?
   463  			if isdigit(b) || b == '-' {
   464  				// scan literal. Allow [0-9+-eE.] for now.
   465  				float := false
   466  				var j int
   467  				for j = i + 1; j < len(s); j++ {
   468  					b = s[j]
   469  					if isdigit(b) {
   470  						// ok
   471  					} else if b == '.' ||
   472  						b == 'e' ||
   473  						b == 'E' ||
   474  						b == '+' ||
   475  						b == '-' {
   476  						float = true
   477  					} else {
   478  						break
   479  					}
   480  				}
   481  				num := s[i:j]
   482  				i = j
   483  
   484  				// Unlike most C-like languages,
   485  				// JSON disallows a leading zero before a digit.
   486  				digits := num
   487  				if num[0] == '-' {
   488  					digits = num[1:]
   489  				}
   490  				if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
   491  					fail("invalid number: %s", num)
   492  				}
   493  
   494  				// parse literal
   495  				if float {
   496  					x, err := strconv.ParseFloat(num, 64)
   497  					if err != nil {
   498  						fail("invalid number: %s", num)
   499  					}
   500  					return starlark.Float(x)
   501  				} else {
   502  					x, ok := new(big.Int).SetString(num, 10)
   503  					if !ok {
   504  						fail("invalid number: %s", num)
   505  					}
   506  					return starlark.MakeBigInt(x)
   507  				}
   508  			}
   509  		}
   510  		fail("unexpected character %q", b)
   511  		panic("unreachable")
   512  	}
   513  	defer func() {
   514  		x := recover()
   515  		switch x := x.(type) {
   516  		case failure:
   517  			if d != nil {
   518  				v = d
   519  			} else {
   520  				err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
   521  			}
   522  		case nil:
   523  			// nop
   524  		default:
   525  			panic(x) // unexpected panic
   526  		}
   527  	}()
   528  	v = parse()
   529  	if skipSpace() {
   530  		fail("unexpected character %q after value", s[i])
   531  	}
   532  	return v, nil
   533  }
   534  
   535  func isdigit(b byte) bool {
   536  	return b >= '0' && b <= '9'
   537  }