github.com/goccy/go-json@v0.10.3-0.20240509105655-5e2ae3f23c1d/internal/decoder/unmarshal_text.go (about)

     1  package decoder
     2  
     3  import (
     4  	"bytes"
     5  	"encoding"
     6  	"fmt"
     7  	"unicode"
     8  	"unicode/utf16"
     9  	"unicode/utf8"
    10  	"unsafe"
    11  
    12  	"github.com/goccy/go-json/internal/errors"
    13  	"github.com/goccy/go-json/internal/runtime"
    14  )
    15  
    16  type unmarshalTextDecoder struct {
    17  	typ        *runtime.Type
    18  	structName string
    19  	fieldName  string
    20  }
    21  
    22  func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
    23  	return &unmarshalTextDecoder{
    24  		typ:        typ,
    25  		structName: structName,
    26  		fieldName:  fieldName,
    27  	}
    28  }
    29  
    30  func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
    31  	switch e := err.(type) {
    32  	case *errors.UnmarshalTypeError:
    33  		e.Struct = d.structName
    34  		e.Field = d.fieldName
    35  	case *errors.SyntaxError:
    36  		e.Offset = cursor
    37  	}
    38  }
    39  
    40  var (
    41  	nullbytes = []byte(`null`)
    42  )
    43  
    44  func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
    45  	s.skipWhiteSpace()
    46  	start := s.cursor
    47  	if err := s.skipValue(depth); err != nil {
    48  		return err
    49  	}
    50  	src := s.buf[start:s.cursor]
    51  	if len(src) > 0 {
    52  		switch src[0] {
    53  		case '[':
    54  			return &errors.UnmarshalTypeError{
    55  				Value:  "array",
    56  				Type:   runtime.RType2Type(d.typ),
    57  				Offset: s.totalOffset(),
    58  			}
    59  		case '{':
    60  			return &errors.UnmarshalTypeError{
    61  				Value:  "object",
    62  				Type:   runtime.RType2Type(d.typ),
    63  				Offset: s.totalOffset(),
    64  			}
    65  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    66  			return &errors.UnmarshalTypeError{
    67  				Value:  "number",
    68  				Type:   runtime.RType2Type(d.typ),
    69  				Offset: s.totalOffset(),
    70  			}
    71  		case 'n':
    72  			if bytes.Equal(src, nullbytes) {
    73  				*(*unsafe.Pointer)(p) = nil
    74  				return nil
    75  			}
    76  		}
    77  	}
    78  	dst := make([]byte, len(src))
    79  	copy(dst, src)
    80  
    81  	if b, ok := unquoteBytes(dst); ok {
    82  		dst = b
    83  	}
    84  	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
    85  		typ: d.typ,
    86  		ptr: p,
    87  	}))
    88  	if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
    89  		d.annotateError(s.cursor, err)
    90  		return err
    91  	}
    92  	return nil
    93  }
    94  
    95  func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
    96  	buf := ctx.Buf
    97  	cursor = skipWhiteSpace(buf, cursor)
    98  	start := cursor
    99  	end, err := skipValue(buf, cursor, depth)
   100  	if err != nil {
   101  		return 0, err
   102  	}
   103  	src := buf[start:end]
   104  	if len(src) > 0 {
   105  		switch src[0] {
   106  		case '[':
   107  			return 0, &errors.UnmarshalTypeError{
   108  				Value:  "array",
   109  				Type:   runtime.RType2Type(d.typ),
   110  				Offset: start,
   111  			}
   112  		case '{':
   113  			return 0, &errors.UnmarshalTypeError{
   114  				Value:  "object",
   115  				Type:   runtime.RType2Type(d.typ),
   116  				Offset: start,
   117  			}
   118  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   119  			return 0, &errors.UnmarshalTypeError{
   120  				Value:  "number",
   121  				Type:   runtime.RType2Type(d.typ),
   122  				Offset: start,
   123  			}
   124  		case 'n':
   125  			if bytes.Equal(src, nullbytes) {
   126  				*(*unsafe.Pointer)(p) = nil
   127  				return end, nil
   128  			}
   129  		}
   130  	}
   131  
   132  	if s, ok := unquoteBytes(src); ok {
   133  		src = s
   134  	}
   135  	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
   136  		typ: d.typ,
   137  		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
   138  	}))
   139  	if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
   140  		d.annotateError(cursor, err)
   141  		return 0, err
   142  	}
   143  	return end, nil
   144  }
   145  
   146  func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
   147  	return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path")
   148  }
   149  
   150  func unquoteBytes(s []byte) (t []byte, ok bool) { //nolint: nonamedreturns
   151  	length := len(s)
   152  	if length < 2 || s[0] != '"' || s[length-1] != '"' {
   153  		return
   154  	}
   155  	s = s[1 : length-1]
   156  	length -= 2
   157  
   158  	// Check for unusual characters. If there are none,
   159  	// then no unquoting is needed, so return a slice of the
   160  	// original bytes.
   161  	r := 0
   162  	for r < length {
   163  		c := s[r]
   164  		if c == '\\' || c == '"' || c < ' ' {
   165  			break
   166  		}
   167  		if c < utf8.RuneSelf {
   168  			r++
   169  			continue
   170  		}
   171  		rr, size := utf8.DecodeRune(s[r:])
   172  		if rr == utf8.RuneError && size == 1 {
   173  			break
   174  		}
   175  		r += size
   176  	}
   177  	if r == length {
   178  		return s, true
   179  	}
   180  
   181  	b := make([]byte, length+2*utf8.UTFMax)
   182  	w := copy(b, s[0:r])
   183  	for r < length {
   184  		// Out of room? Can only happen if s is full of
   185  		// malformed UTF-8 and we're replacing each
   186  		// byte with RuneError.
   187  		if w >= len(b)-2*utf8.UTFMax {
   188  			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
   189  			copy(nb, b[0:w])
   190  			b = nb
   191  		}
   192  		switch c := s[r]; {
   193  		case c == '\\':
   194  			r++
   195  			if r >= length {
   196  				return
   197  			}
   198  			switch s[r] {
   199  			default:
   200  				return
   201  			case '"', '\\', '/', '\'':
   202  				b[w] = s[r]
   203  				r++
   204  				w++
   205  			case 'b':
   206  				b[w] = '\b'
   207  				r++
   208  				w++
   209  			case 'f':
   210  				b[w] = '\f'
   211  				r++
   212  				w++
   213  			case 'n':
   214  				b[w] = '\n'
   215  				r++
   216  				w++
   217  			case 'r':
   218  				b[w] = '\r'
   219  				r++
   220  				w++
   221  			case 't':
   222  				b[w] = '\t'
   223  				r++
   224  				w++
   225  			case 'u':
   226  				r--
   227  				rr := getu4(s[r:])
   228  				if rr < 0 {
   229  					return
   230  				}
   231  				r += 6
   232  				if utf16.IsSurrogate(rr) {
   233  					rr1 := getu4(s[r:])
   234  					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
   235  						// A valid pair; consume.
   236  						r += 6
   237  						w += utf8.EncodeRune(b[w:], dec)
   238  						break
   239  					}
   240  					// Invalid surrogate; fall back to replacement rune.
   241  					rr = unicode.ReplacementChar
   242  				}
   243  				w += utf8.EncodeRune(b[w:], rr)
   244  			}
   245  
   246  		// Quote, control characters are invalid.
   247  		case c == '"', c < ' ':
   248  			return
   249  
   250  		// ASCII
   251  		case c < utf8.RuneSelf:
   252  			b[w] = c
   253  			r++
   254  			w++
   255  
   256  		// Coerce to well-formed UTF-8.
   257  		default:
   258  			rr, size := utf8.DecodeRune(s[r:])
   259  			r += size
   260  			w += utf8.EncodeRune(b[w:], rr)
   261  		}
   262  	}
   263  	return b[0:w], true
   264  }
   265  
   266  func getu4(s []byte) rune {
   267  	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
   268  		return -1
   269  	}
   270  	var r rune
   271  	for _, c := range s[2:6] {
   272  		switch {
   273  		case '0' <= c && c <= '9':
   274  			c = c - '0'
   275  		case 'a' <= c && c <= 'f':
   276  			c = c - 'a' + 10
   277  		case 'A' <= c && c <= 'F':
   278  			c = c - 'A' + 10
   279  		default:
   280  			return -1
   281  		}
   282  		r = r*16 + rune(c)
   283  	}
   284  	return r
   285  }