github.com/night-codes/go-json@v0.9.15/internal/decoder/unmarshal_text.go (about)

     1  package decoder
     2  
     3  import (
     4  	"bytes"
     5  	"encoding"
     6  	"unicode"
     7  	"unicode/utf16"
     8  	"unicode/utf8"
     9  	"unsafe"
    10  
    11  	"github.com/night-codes/go-json/internal/errors"
    12  	"github.com/night-codes/go-json/internal/runtime"
    13  )
    14  
    15  type unmarshalTextDecoder struct {
    16  	typ        *runtime.Type
    17  	structName string
    18  	fieldName  string
    19  }
    20  
    21  func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
    22  	return &unmarshalTextDecoder{
    23  		typ:        typ,
    24  		structName: structName,
    25  		fieldName:  fieldName,
    26  	}
    27  }
    28  
    29  func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
    30  	switch e := err.(type) {
    31  	case *errors.UnmarshalTypeError:
    32  		e.Struct = d.structName
    33  		e.Field = d.fieldName
    34  	case *errors.SyntaxError:
    35  		e.Offset = cursor
    36  	}
    37  }
    38  
    39  var (
    40  	nullbytes = []byte(`null`)
    41  )
    42  
    43  func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
    44  	s.skipWhiteSpace()
    45  	start := s.cursor
    46  	if err := s.skipValue(depth); err != nil {
    47  		return err
    48  	}
    49  	src := s.buf[start:s.cursor]
    50  	if len(src) > 0 {
    51  		switch src[0] {
    52  		case '[':
    53  			return &errors.UnmarshalTypeError{
    54  				Value:  "array",
    55  				Type:   runtime.RType2Type(d.typ),
    56  				Offset: s.totalOffset(),
    57  			}
    58  		case '{':
    59  			return &errors.UnmarshalTypeError{
    60  				Value:  "object",
    61  				Type:   runtime.RType2Type(d.typ),
    62  				Offset: s.totalOffset(),
    63  			}
    64  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    65  			return &errors.UnmarshalTypeError{
    66  				Value:  "number",
    67  				Type:   runtime.RType2Type(d.typ),
    68  				Offset: s.totalOffset(),
    69  			}
    70  		case 'n':
    71  			if bytes.Equal(src, nullbytes) {
    72  				*(*unsafe.Pointer)(p) = nil
    73  				return nil
    74  			}
    75  		}
    76  	}
    77  	dst := make([]byte, len(src))
    78  	copy(dst, src)
    79  
    80  	if b, ok := unquoteBytes(dst); ok {
    81  		dst = b
    82  	}
    83  	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
    84  		typ: d.typ,
    85  		ptr: p,
    86  	}))
    87  	if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
    88  		d.annotateError(s.cursor, err)
    89  		return err
    90  	}
    91  	return nil
    92  }
    93  
    94  func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
    95  	buf := ctx.Buf
    96  	cursor = skipWhiteSpace(buf, cursor)
    97  	start := cursor
    98  	end, err := skipValue(buf, cursor, depth)
    99  	if err != nil {
   100  		return 0, err
   101  	}
   102  	src := buf[start:end]
   103  	if len(src) > 0 {
   104  		switch src[0] {
   105  		case '[':
   106  			return 0, &errors.UnmarshalTypeError{
   107  				Value:  "array",
   108  				Type:   runtime.RType2Type(d.typ),
   109  				Offset: start,
   110  			}
   111  		case '{':
   112  			return 0, &errors.UnmarshalTypeError{
   113  				Value:  "object",
   114  				Type:   runtime.RType2Type(d.typ),
   115  				Offset: start,
   116  			}
   117  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   118  			return 0, &errors.UnmarshalTypeError{
   119  				Value:  "number",
   120  				Type:   runtime.RType2Type(d.typ),
   121  				Offset: start,
   122  			}
   123  		case 'n':
   124  			if bytes.Equal(src, nullbytes) {
   125  				*(*unsafe.Pointer)(p) = nil
   126  				return end, nil
   127  			}
   128  		}
   129  	}
   130  
   131  	if s, ok := unquoteBytes(src); ok {
   132  		src = s
   133  	}
   134  	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
   135  		typ: d.typ,
   136  		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
   137  	}))
   138  	if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
   139  		d.annotateError(cursor, err)
   140  		return 0, err
   141  	}
   142  	return end, nil
   143  }
   144  
   145  func unquoteBytes(s []byte) (t []byte, ok bool) {
   146  	length := len(s)
   147  	if length < 2 || s[0] != '"' || s[length-1] != '"' {
   148  		return
   149  	}
   150  	s = s[1 : length-1]
   151  	length -= 2
   152  
   153  	// Check for unusual characters. If there are none,
   154  	// then no unquoting is needed, so return a slice of the
   155  	// original bytes.
   156  	r := 0
   157  	for r < length {
   158  		c := s[r]
   159  		if c == '\\' || c == '"' || c < ' ' {
   160  			break
   161  		}
   162  		if c < utf8.RuneSelf {
   163  			r++
   164  			continue
   165  		}
   166  		rr, size := utf8.DecodeRune(s[r:])
   167  		if rr == utf8.RuneError && size == 1 {
   168  			break
   169  		}
   170  		r += size
   171  	}
   172  	if r == length {
   173  		return s, true
   174  	}
   175  
   176  	b := make([]byte, length+2*utf8.UTFMax)
   177  	w := copy(b, s[0:r])
   178  	for r < length {
   179  		// Out of room? Can only happen if s is full of
   180  		// malformed UTF-8 and we're replacing each
   181  		// byte with RuneError.
   182  		if w >= len(b)-2*utf8.UTFMax {
   183  			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
   184  			copy(nb, b[0:w])
   185  			b = nb
   186  		}
   187  		switch c := s[r]; {
   188  		case c == '\\':
   189  			r++
   190  			if r >= length {
   191  				return
   192  			}
   193  			switch s[r] {
   194  			default:
   195  				return
   196  			case '"', '\\', '/', '\'':
   197  				b[w] = s[r]
   198  				r++
   199  				w++
   200  			case 'b':
   201  				b[w] = '\b'
   202  				r++
   203  				w++
   204  			case 'f':
   205  				b[w] = '\f'
   206  				r++
   207  				w++
   208  			case 'n':
   209  				b[w] = '\n'
   210  				r++
   211  				w++
   212  			case 'r':
   213  				b[w] = '\r'
   214  				r++
   215  				w++
   216  			case 't':
   217  				b[w] = '\t'
   218  				r++
   219  				w++
   220  			case 'u':
   221  				r--
   222  				rr := getu4(s[r:])
   223  				if rr < 0 {
   224  					return
   225  				}
   226  				r += 6
   227  				if utf16.IsSurrogate(rr) {
   228  					rr1 := getu4(s[r:])
   229  					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
   230  						// A valid pair; consume.
   231  						r += 6
   232  						w += utf8.EncodeRune(b[w:], dec)
   233  						break
   234  					}
   235  					// Invalid surrogate; fall back to replacement rune.
   236  					rr = unicode.ReplacementChar
   237  				}
   238  				w += utf8.EncodeRune(b[w:], rr)
   239  			}
   240  
   241  		// Quote, control characters are invalid.
   242  		case c == '"', c < ' ':
   243  			return
   244  
   245  		// ASCII
   246  		case c < utf8.RuneSelf:
   247  			b[w] = c
   248  			r++
   249  			w++
   250  
   251  		// Coerce to well-formed UTF-8.
   252  		default:
   253  			rr, size := utf8.DecodeRune(s[r:])
   254  			r += size
   255  			w += utf8.EncodeRune(b[w:], rr)
   256  		}
   257  	}
   258  	return b[0:w], true
   259  }
   260  
   261  func getu4(s []byte) rune {
   262  	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
   263  		return -1
   264  	}
   265  	var r rune
   266  	for _, c := range s[2:6] {
   267  		switch {
   268  		case '0' <= c && c <= '9':
   269  			c = c - '0'
   270  		case 'a' <= c && c <= 'f':
   271  			c = c - 'a' + 10
   272  		case 'A' <= c && c <= 'F':
   273  			c = c - 'A' + 10
   274  		default:
   275  			return -1
   276  		}
   277  		r = r*16 + rune(c)
   278  	}
   279  	return r
   280  }