github.com/3JoB/go-json@v0.10.4/internal/decoder/unmarshal_text.go (about)

     1  package decoder
     2  
     3  import (
     4  	"bytes"
     5  	"encoding"
     6  	"unicode"
     7  	"unicode/utf16"
     8  	"unicode/utf8"
     9  	"unsafe"
    10  
    11  	"github.com/3JoB/go-json/internal/errors"
    12  	"github.com/3JoB/go-json/internal/runtime"
    13  	"github.com/3JoB/go-reflect"
    14  	"github.com/3JoB/unsafeConvert"
    15  )
    16  
    17  type unmarshalTextDecoder struct {
    18  	typ        *runtime.Type
    19  	structName string
    20  	fieldName  string
    21  }
    22  
    23  func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
    24  	return &unmarshalTextDecoder{
    25  		typ:        typ,
    26  		structName: structName,
    27  		fieldName:  fieldName,
    28  	}
    29  }
    30  
    31  func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
    32  	switch e := err.(type) {
    33  	case *errors.UnmarshalTypeError:
    34  		e.Struct = d.structName
    35  		e.Field = d.fieldName
    36  	case *errors.SyntaxError:
    37  		e.Offset = cursor
    38  	}
    39  }
    40  
    41  var (
    42  	nullbytes = unsafeConvert.BytesReflect(`null`)
    43  )
    44  
    45  func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
    46  	s.skipWhiteSpace()
    47  	start := s.cursor
    48  	if err := s.skipValue(depth); err != nil {
    49  		return err
    50  	}
    51  	src := s.buf[start:s.cursor]
    52  	if len(src) > 0 {
    53  		switch src[0] {
    54  		case '[':
    55  			return &errors.UnmarshalTypeError{
    56  				Value:  "array",
    57  				Type:   reflect.ToT(runtime.RType2Type(d.typ)),
    58  				Offset: s.totalOffset(),
    59  			}
    60  		case '{':
    61  			return &errors.UnmarshalTypeError{
    62  				Value:  "object",
    63  				Type:   reflect.ToT(runtime.RType2Type(d.typ)),
    64  				Offset: s.totalOffset(),
    65  			}
    66  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    67  			return &errors.UnmarshalTypeError{
    68  				Value:  "number",
    69  				Type:   reflect.ToT(runtime.RType2Type(d.typ)),
    70  				Offset: s.totalOffset(),
    71  			}
    72  		case 'n':
    73  			if bytes.Equal(src, nullbytes) {
    74  				*(*unsafe.Pointer)(p) = nil
    75  				return nil
    76  			}
    77  		}
    78  	}
    79  	dst := make([]byte, len(src))
    80  	copy(dst, src)
    81  
    82  	if b, ok := unquoteBytes(dst); ok {
    83  		dst = b
    84  	}
    85  	v := *(*any)(unsafe.Pointer(&emptyInterface{
    86  		typ: d.typ,
    87  		ptr: p,
    88  	}))
    89  	if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
    90  		d.annotateError(s.cursor, err)
    91  		return err
    92  	}
    93  	return nil
    94  }
    95  
    96  func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
    97  	buf := ctx.Buf
    98  	cursor = skipWhiteSpace(buf, cursor)
    99  	start := cursor
   100  	end, err := skipValue(buf, cursor, depth)
   101  	if err != nil {
   102  		return 0, err
   103  	}
   104  	src := buf[start:end]
   105  	if len(src) > 0 {
   106  		switch src[0] {
   107  		case '[':
   108  			return 0, &errors.UnmarshalTypeError{
   109  				Value:  "array",
   110  				Type:   reflect.ToT(runtime.RType2Type(d.typ)),
   111  				Offset: start,
   112  			}
   113  		case '{':
   114  			return 0, &errors.UnmarshalTypeError{
   115  				Value:  "object",
   116  				Type:   reflect.ToT(runtime.RType2Type(d.typ)),
   117  				Offset: start,
   118  			}
   119  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   120  			return 0, &errors.UnmarshalTypeError{
   121  				Value:  "number",
   122  				Type:   reflect.ToT(runtime.RType2Type(d.typ)),
   123  				Offset: start,
   124  			}
   125  		case 'n':
   126  			if bytes.Equal(src, nullbytes) {
   127  				*(*unsafe.Pointer)(p) = nil
   128  				return end, nil
   129  			}
   130  		}
   131  	}
   132  
   133  	if s, ok := unquoteBytes(src); ok {
   134  		src = s
   135  	}
   136  	v := *(*any)(unsafe.Pointer(&emptyInterface{
   137  		typ: d.typ,
   138  		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
   139  	}))
   140  	if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
   141  		d.annotateError(cursor, err)
   142  		return 0, err
   143  	}
   144  	return end, nil
   145  }
   146  
   147  func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
   148  	return nil, 0, errors.New("json: unmarshal text decoder does not support decode path")
   149  }
   150  
   151  func unquoteBytes(s []byte) (t []byte, ok bool) {
   152  	length := len(s)
   153  	if length < 2 || s[0] != '"' || s[length-1] != '"' {
   154  		return
   155  	}
   156  	s = s[1 : length-1]
   157  	length -= 2
   158  
   159  	// Check for unusual characters. If there are none,
   160  	// then no unquoting is needed, so return a slice of the
   161  	// original bytes.
   162  	r := 0
   163  	for r < length {
   164  		c := s[r]
   165  		if c == '\\' || c == '"' || c < ' ' {
   166  			break
   167  		}
   168  		if c < utf8.RuneSelf {
   169  			r++
   170  			continue
   171  		}
   172  		rr, size := utf8.DecodeRune(s[r:])
   173  		if rr == utf8.RuneError && size == 1 {
   174  			break
   175  		}
   176  		r += size
   177  	}
   178  	if r == length {
   179  		return s, true
   180  	}
   181  
   182  	b := make([]byte, length+2*utf8.UTFMax)
   183  	w := copy(b, s[0:r])
   184  	for r < length {
   185  		// Out of room? Can only happen if s is full of
   186  		// malformed UTF-8 and we're replacing each
   187  		// byte with RuneError.
   188  		if w >= len(b)-2*utf8.UTFMax {
   189  			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
   190  			copy(nb, b[0:w])
   191  			b = nb
   192  		}
   193  		switch c := s[r]; {
   194  		case c == '\\':
   195  			r++
   196  			if r >= length {
   197  				return
   198  			}
   199  			switch s[r] {
   200  			default:
   201  				return
   202  			case '"', '\\', '/', '\'':
   203  				b[w] = s[r]
   204  				r++
   205  				w++
   206  			case 'b':
   207  				b[w] = '\b'
   208  				r++
   209  				w++
   210  			case 'f':
   211  				b[w] = '\f'
   212  				r++
   213  				w++
   214  			case 'n':
   215  				b[w] = '\n'
   216  				r++
   217  				w++
   218  			case 'r':
   219  				b[w] = '\r'
   220  				r++
   221  				w++
   222  			case 't':
   223  				b[w] = '\t'
   224  				r++
   225  				w++
   226  			case 'u':
   227  				r--
   228  				rr := getu4(s[r:])
   229  				if rr < 0 {
   230  					return
   231  				}
   232  				r += 6
   233  				if utf16.IsSurrogate(rr) {
   234  					rr1 := getu4(s[r:])
   235  					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
   236  						// A valid pair; consume.
   237  						r += 6
   238  						w += utf8.EncodeRune(b[w:], dec)
   239  						break
   240  					}
   241  					// Invalid surrogate; fall back to replacement rune.
   242  					rr = unicode.ReplacementChar
   243  				}
   244  				w += utf8.EncodeRune(b[w:], rr)
   245  			}
   246  
   247  		// Quote, control characters are invalid.
   248  		case c == '"', c < ' ':
   249  			return
   250  
   251  		// ASCII
   252  		case c < utf8.RuneSelf:
   253  			b[w] = c
   254  			r++
   255  			w++
   256  
   257  		// Coerce to well-formed UTF-8.
   258  		default:
   259  			rr, size := utf8.DecodeRune(s[r:])
   260  			r += size
   261  			w += utf8.EncodeRune(b[w:], rr)
   262  		}
   263  	}
   264  	return b[0:w], true
   265  }
   266  
   267  func getu4(s []byte) rune {
   268  	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
   269  		return -1
   270  	}
   271  	var r rune
   272  	for _, c := range s[2:6] {
   273  		switch {
   274  		case c >= '0' && c <= '9':
   275  			c = c - '0'
   276  		case c >= 'a' && c <= 'f':
   277  			c = c - 'a' + 10
   278  		case c >= 'A' && c <= 'F':
   279  			c = c - 'A' + 10
   280  		default:
   281  			return -1
   282  		}
   283  		r = r*16 + rune(c)
   284  	}
   285  	return r
   286  }