github.com/3JoB/go-json@v0.10.4/internal/decoder/unmarshal_text.go (about) 1 package decoder 2 3 import ( 4 "bytes" 5 "encoding" 6 "unicode" 7 "unicode/utf16" 8 "unicode/utf8" 9 "unsafe" 10 11 "github.com/3JoB/go-json/internal/errors" 12 "github.com/3JoB/go-json/internal/runtime" 13 "github.com/3JoB/go-reflect" 14 "github.com/3JoB/unsafeConvert" 15 ) 16 17 type unmarshalTextDecoder struct { 18 typ *runtime.Type 19 structName string 20 fieldName string 21 } 22 23 func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder { 24 return &unmarshalTextDecoder{ 25 typ: typ, 26 structName: structName, 27 fieldName: fieldName, 28 } 29 } 30 31 func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) { 32 switch e := err.(type) { 33 case *errors.UnmarshalTypeError: 34 e.Struct = d.structName 35 e.Field = d.fieldName 36 case *errors.SyntaxError: 37 e.Offset = cursor 38 } 39 } 40 41 var ( 42 nullbytes = unsafeConvert.BytesReflect(`null`) 43 ) 44 45 func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error { 46 s.skipWhiteSpace() 47 start := s.cursor 48 if err := s.skipValue(depth); err != nil { 49 return err 50 } 51 src := s.buf[start:s.cursor] 52 if len(src) > 0 { 53 switch src[0] { 54 case '[': 55 return &errors.UnmarshalTypeError{ 56 Value: "array", 57 Type: reflect.ToT(runtime.RType2Type(d.typ)), 58 Offset: s.totalOffset(), 59 } 60 case '{': 61 return &errors.UnmarshalTypeError{ 62 Value: "object", 63 Type: reflect.ToT(runtime.RType2Type(d.typ)), 64 Offset: s.totalOffset(), 65 } 66 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 67 return &errors.UnmarshalTypeError{ 68 Value: "number", 69 Type: reflect.ToT(runtime.RType2Type(d.typ)), 70 Offset: s.totalOffset(), 71 } 72 case 'n': 73 if bytes.Equal(src, nullbytes) { 74 *(*unsafe.Pointer)(p) = nil 75 return nil 76 } 77 } 78 } 79 dst := make([]byte, len(src)) 80 copy(dst, src) 81 82 if b, ok := unquoteBytes(dst); ok { 83 dst = b 84 } 85 v := *(*any)(unsafe.Pointer(&emptyInterface{ 86 typ: d.typ, 87 ptr: p, 88 })) 89 if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil { 90 d.annotateError(s.cursor, err) 91 return err 92 } 93 return nil 94 } 95 96 func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) { 97 buf := ctx.Buf 98 cursor = skipWhiteSpace(buf, cursor) 99 start := cursor 100 end, err := skipValue(buf, cursor, depth) 101 if err != nil { 102 return 0, err 103 } 104 src := buf[start:end] 105 if len(src) > 0 { 106 switch src[0] { 107 case '[': 108 return 0, &errors.UnmarshalTypeError{ 109 Value: "array", 110 Type: reflect.ToT(runtime.RType2Type(d.typ)), 111 Offset: start, 112 } 113 case '{': 114 return 0, &errors.UnmarshalTypeError{ 115 Value: "object", 116 Type: reflect.ToT(runtime.RType2Type(d.typ)), 117 Offset: start, 118 } 119 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 120 return 0, &errors.UnmarshalTypeError{ 121 Value: "number", 122 Type: reflect.ToT(runtime.RType2Type(d.typ)), 123 Offset: start, 124 } 125 case 'n': 126 if bytes.Equal(src, nullbytes) { 127 *(*unsafe.Pointer)(p) = nil 128 return end, nil 129 } 130 } 131 } 132 133 if s, ok := unquoteBytes(src); ok { 134 src = s 135 } 136 v := *(*any)(unsafe.Pointer(&emptyInterface{ 137 typ: d.typ, 138 ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)), 139 })) 140 if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil { 141 d.annotateError(cursor, err) 142 return 0, err 143 } 144 return end, nil 145 } 146 147 func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) { 148 return nil, 0, errors.New("json: unmarshal text decoder does not support decode path") 149 } 150 151 func unquoteBytes(s []byte) (t []byte, ok bool) { 152 length := len(s) 153 if length < 2 || s[0] != '"' || s[length-1] != '"' { 154 return 155 } 156 s = s[1 : length-1] 157 length -= 2 158 159 // Check for unusual characters. If there are none, 160 // then no unquoting is needed, so return a slice of the 161 // original bytes. 162 r := 0 163 for r < length { 164 c := s[r] 165 if c == '\\' || c == '"' || c < ' ' { 166 break 167 } 168 if c < utf8.RuneSelf { 169 r++ 170 continue 171 } 172 rr, size := utf8.DecodeRune(s[r:]) 173 if rr == utf8.RuneError && size == 1 { 174 break 175 } 176 r += size 177 } 178 if r == length { 179 return s, true 180 } 181 182 b := make([]byte, length+2*utf8.UTFMax) 183 w := copy(b, s[0:r]) 184 for r < length { 185 // Out of room? Can only happen if s is full of 186 // malformed UTF-8 and we're replacing each 187 // byte with RuneError. 188 if w >= len(b)-2*utf8.UTFMax { 189 nb := make([]byte, (len(b)+utf8.UTFMax)*2) 190 copy(nb, b[0:w]) 191 b = nb 192 } 193 switch c := s[r]; { 194 case c == '\\': 195 r++ 196 if r >= length { 197 return 198 } 199 switch s[r] { 200 default: 201 return 202 case '"', '\\', '/', '\'': 203 b[w] = s[r] 204 r++ 205 w++ 206 case 'b': 207 b[w] = '\b' 208 r++ 209 w++ 210 case 'f': 211 b[w] = '\f' 212 r++ 213 w++ 214 case 'n': 215 b[w] = '\n' 216 r++ 217 w++ 218 case 'r': 219 b[w] = '\r' 220 r++ 221 w++ 222 case 't': 223 b[w] = '\t' 224 r++ 225 w++ 226 case 'u': 227 r-- 228 rr := getu4(s[r:]) 229 if rr < 0 { 230 return 231 } 232 r += 6 233 if utf16.IsSurrogate(rr) { 234 rr1 := getu4(s[r:]) 235 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { 236 // A valid pair; consume. 237 r += 6 238 w += utf8.EncodeRune(b[w:], dec) 239 break 240 } 241 // Invalid surrogate; fall back to replacement rune. 242 rr = unicode.ReplacementChar 243 } 244 w += utf8.EncodeRune(b[w:], rr) 245 } 246 247 // Quote, control characters are invalid. 248 case c == '"', c < ' ': 249 return 250 251 // ASCII 252 case c < utf8.RuneSelf: 253 b[w] = c 254 r++ 255 w++ 256 257 // Coerce to well-formed UTF-8. 258 default: 259 rr, size := utf8.DecodeRune(s[r:]) 260 r += size 261 w += utf8.EncodeRune(b[w:], rr) 262 } 263 } 264 return b[0:w], true 265 } 266 267 func getu4(s []byte) rune { 268 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { 269 return -1 270 } 271 var r rune 272 for _, c := range s[2:6] { 273 switch { 274 case c >= '0' && c <= '9': 275 c = c - '0' 276 case c >= 'a' && c <= 'f': 277 c = c - 'a' + 10 278 case c >= 'A' && c <= 'F': 279 c = c - 'A' + 10 280 default: 281 return -1 282 } 283 r = r*16 + rune(c) 284 } 285 return r 286 }