github.com/goccy/go-json@v0.10.3-0.20240509105655-5e2ae3f23c1d/internal/decoder/unmarshal_text.go (about) 1 package decoder 2 3 import ( 4 "bytes" 5 "encoding" 6 "fmt" 7 "unicode" 8 "unicode/utf16" 9 "unicode/utf8" 10 "unsafe" 11 12 "github.com/goccy/go-json/internal/errors" 13 "github.com/goccy/go-json/internal/runtime" 14 ) 15 16 type unmarshalTextDecoder struct { 17 typ *runtime.Type 18 structName string 19 fieldName string 20 } 21 22 func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder { 23 return &unmarshalTextDecoder{ 24 typ: typ, 25 structName: structName, 26 fieldName: fieldName, 27 } 28 } 29 30 func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) { 31 switch e := err.(type) { 32 case *errors.UnmarshalTypeError: 33 e.Struct = d.structName 34 e.Field = d.fieldName 35 case *errors.SyntaxError: 36 e.Offset = cursor 37 } 38 } 39 40 var ( 41 nullbytes = []byte(`null`) 42 ) 43 44 func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error { 45 s.skipWhiteSpace() 46 start := s.cursor 47 if err := s.skipValue(depth); err != nil { 48 return err 49 } 50 src := s.buf[start:s.cursor] 51 if len(src) > 0 { 52 switch src[0] { 53 case '[': 54 return &errors.UnmarshalTypeError{ 55 Value: "array", 56 Type: runtime.RType2Type(d.typ), 57 Offset: s.totalOffset(), 58 } 59 case '{': 60 return &errors.UnmarshalTypeError{ 61 Value: "object", 62 Type: runtime.RType2Type(d.typ), 63 Offset: s.totalOffset(), 64 } 65 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 66 return &errors.UnmarshalTypeError{ 67 Value: "number", 68 Type: runtime.RType2Type(d.typ), 69 Offset: s.totalOffset(), 70 } 71 case 'n': 72 if bytes.Equal(src, nullbytes) { 73 *(*unsafe.Pointer)(p) = nil 74 return nil 75 } 76 } 77 } 78 dst := make([]byte, len(src)) 79 copy(dst, src) 80 81 if b, ok := unquoteBytes(dst); ok { 82 dst = b 83 } 84 v := *(*interface{})(unsafe.Pointer(&emptyInterface{ 85 typ: d.typ, 86 ptr: p, 87 })) 88 if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil { 89 d.annotateError(s.cursor, err) 90 return err 91 } 92 return nil 93 } 94 95 func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) { 96 buf := ctx.Buf 97 cursor = skipWhiteSpace(buf, cursor) 98 start := cursor 99 end, err := skipValue(buf, cursor, depth) 100 if err != nil { 101 return 0, err 102 } 103 src := buf[start:end] 104 if len(src) > 0 { 105 switch src[0] { 106 case '[': 107 return 0, &errors.UnmarshalTypeError{ 108 Value: "array", 109 Type: runtime.RType2Type(d.typ), 110 Offset: start, 111 } 112 case '{': 113 return 0, &errors.UnmarshalTypeError{ 114 Value: "object", 115 Type: runtime.RType2Type(d.typ), 116 Offset: start, 117 } 118 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 119 return 0, &errors.UnmarshalTypeError{ 120 Value: "number", 121 Type: runtime.RType2Type(d.typ), 122 Offset: start, 123 } 124 case 'n': 125 if bytes.Equal(src, nullbytes) { 126 *(*unsafe.Pointer)(p) = nil 127 return end, nil 128 } 129 } 130 } 131 132 if s, ok := unquoteBytes(src); ok { 133 src = s 134 } 135 v := *(*interface{})(unsafe.Pointer(&emptyInterface{ 136 typ: d.typ, 137 ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)), 138 })) 139 if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil { 140 d.annotateError(cursor, err) 141 return 0, err 142 } 143 return end, nil 144 } 145 146 func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) { 147 return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path") 148 } 149 150 func unquoteBytes(s []byte) (t []byte, ok bool) { //nolint: nonamedreturns 151 length := len(s) 152 if length < 2 || s[0] != '"' || s[length-1] != '"' { 153 return 154 } 155 s = s[1 : length-1] 156 length -= 2 157 158 // Check for unusual characters. If there are none, 159 // then no unquoting is needed, so return a slice of the 160 // original bytes. 161 r := 0 162 for r < length { 163 c := s[r] 164 if c == '\\' || c == '"' || c < ' ' { 165 break 166 } 167 if c < utf8.RuneSelf { 168 r++ 169 continue 170 } 171 rr, size := utf8.DecodeRune(s[r:]) 172 if rr == utf8.RuneError && size == 1 { 173 break 174 } 175 r += size 176 } 177 if r == length { 178 return s, true 179 } 180 181 b := make([]byte, length+2*utf8.UTFMax) 182 w := copy(b, s[0:r]) 183 for r < length { 184 // Out of room? Can only happen if s is full of 185 // malformed UTF-8 and we're replacing each 186 // byte with RuneError. 187 if w >= len(b)-2*utf8.UTFMax { 188 nb := make([]byte, (len(b)+utf8.UTFMax)*2) 189 copy(nb, b[0:w]) 190 b = nb 191 } 192 switch c := s[r]; { 193 case c == '\\': 194 r++ 195 if r >= length { 196 return 197 } 198 switch s[r] { 199 default: 200 return 201 case '"', '\\', '/', '\'': 202 b[w] = s[r] 203 r++ 204 w++ 205 case 'b': 206 b[w] = '\b' 207 r++ 208 w++ 209 case 'f': 210 b[w] = '\f' 211 r++ 212 w++ 213 case 'n': 214 b[w] = '\n' 215 r++ 216 w++ 217 case 'r': 218 b[w] = '\r' 219 r++ 220 w++ 221 case 't': 222 b[w] = '\t' 223 r++ 224 w++ 225 case 'u': 226 r-- 227 rr := getu4(s[r:]) 228 if rr < 0 { 229 return 230 } 231 r += 6 232 if utf16.IsSurrogate(rr) { 233 rr1 := getu4(s[r:]) 234 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { 235 // A valid pair; consume. 236 r += 6 237 w += utf8.EncodeRune(b[w:], dec) 238 break 239 } 240 // Invalid surrogate; fall back to replacement rune. 241 rr = unicode.ReplacementChar 242 } 243 w += utf8.EncodeRune(b[w:], rr) 244 } 245 246 // Quote, control characters are invalid. 247 case c == '"', c < ' ': 248 return 249 250 // ASCII 251 case c < utf8.RuneSelf: 252 b[w] = c 253 r++ 254 w++ 255 256 // Coerce to well-formed UTF-8. 257 default: 258 rr, size := utf8.DecodeRune(s[r:]) 259 r += size 260 w += utf8.EncodeRune(b[w:], rr) 261 } 262 } 263 return b[0:w], true 264 } 265 266 func getu4(s []byte) rune { 267 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { 268 return -1 269 } 270 var r rune 271 for _, c := range s[2:6] { 272 switch { 273 case '0' <= c && c <= '9': 274 c = c - '0' 275 case 'a' <= c && c <= 'f': 276 c = c - 'a' + 10 277 case 'A' <= c && c <= 'F': 278 c = c - 'A' + 10 279 default: 280 return -1 281 } 282 r = r*16 + rune(c) 283 } 284 return r 285 }