github.com/trim21/go-phpserialize@v0.0.22-0.20240301204449-2fca0319b3f0/internal/decoder/unmarshal_php.go (about) 1 package decoder 2 3 import ( 4 "bytes" 5 "unicode" 6 "unicode/utf16" 7 "unicode/utf8" 8 "unsafe" 9 10 "github.com/trim21/go-phpserialize/internal/errors" 11 "github.com/trim21/go-phpserialize/internal/runtime" 12 ) 13 14 type Unmarshaler interface { 15 UnmarshalPHP([]byte) error 16 } 17 18 type unmarshalPHPDecoder struct { 19 typ *runtime.Type 20 structName string 21 fieldName string 22 } 23 24 func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalPHPDecoder { 25 return &unmarshalPHPDecoder{ 26 typ: typ, 27 structName: structName, 28 fieldName: fieldName, 29 } 30 } 31 32 func (d *unmarshalPHPDecoder) annotateError(cursor int64, err error) { 33 switch e := err.(type) { 34 case *errors.UnmarshalTypeError: 35 e.Struct = d.structName 36 e.Field = d.fieldName 37 case *errors.SyntaxError: 38 e.Offset = cursor 39 } 40 } 41 42 var ( 43 nullbytes = []byte(`N;`) 44 ) 45 46 func (d *unmarshalPHPDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) { 47 buf := ctx.Buf 48 start := cursor 49 end, err := skipValue(buf, cursor, depth) 50 if err != nil { 51 return 0, err 52 } 53 src := buf[start:end] 54 if len(src) > 0 { 55 switch src[0] { 56 case '[': 57 return 0, &errors.UnmarshalTypeError{ 58 Value: "array", 59 Type: runtime.RType2Type(d.typ), 60 Offset: start, 61 } 62 case '{': 63 return 0, &errors.UnmarshalTypeError{ 64 Value: "object", 65 Type: runtime.RType2Type(d.typ), 66 Offset: start, 67 } 68 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 69 return 0, &errors.UnmarshalTypeError{ 70 Value: "number", 71 Type: runtime.RType2Type(d.typ), 72 Offset: start, 73 } 74 case 'N': 75 if bytes.Equal(src, nullbytes) { 76 *(*unsafe.Pointer)(p) = nil 77 return end, nil 78 } 79 } 80 } 81 82 if s, ok := unquoteBytes(src); ok { 83 src = s 84 } 85 v := *(*any)(unsafe.Pointer(&emptyInterface{ 86 typ: d.typ, 87 ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)), 88 })) 89 if err := v.(Unmarshaler).UnmarshalPHP(src); err != nil { 90 d.annotateError(cursor, err) 91 return 0, err 92 } 93 return end, nil 94 } 95 96 func unquoteBytes(s []byte) (t []byte, ok bool) { 97 length := len(s) 98 if length < 2 || s[0] != '"' || s[length-1] != '"' { 99 return 100 } 101 s = s[1 : length-1] 102 length -= 2 103 104 // Check for unusual characters. If there are none, 105 // then no unquoting is needed, so return a slice of the 106 // original bytes. 107 r := 0 108 for r < length { 109 c := s[r] 110 if c == '\\' || c == '"' || c < ' ' { 111 break 112 } 113 if c < utf8.RuneSelf { 114 r++ 115 continue 116 } 117 rr, size := utf8.DecodeRune(s[r:]) 118 if rr == utf8.RuneError && size == 1 { 119 break 120 } 121 r += size 122 } 123 if r == length { 124 return s, true 125 } 126 127 b := make([]byte, length+2*utf8.UTFMax) 128 w := copy(b, s[0:r]) 129 for r < length { 130 // Out of room? Can only happen if s is full of 131 // malformed UTF-8 and we're replacing each 132 // byte with RuneError. 133 if w >= len(b)-2*utf8.UTFMax { 134 nb := make([]byte, (len(b)+utf8.UTFMax)*2) 135 copy(nb, b[0:w]) 136 b = nb 137 } 138 switch c := s[r]; { 139 case c == '\\': 140 r++ 141 if r >= length { 142 return 143 } 144 switch s[r] { 145 default: 146 return 147 case '"', '\\', '/', '\'': 148 b[w] = s[r] 149 r++ 150 w++ 151 case 'b': 152 b[w] = '\b' 153 r++ 154 w++ 155 case 'f': 156 b[w] = '\f' 157 r++ 158 w++ 159 case 'n': 160 b[w] = '\n' 161 r++ 162 w++ 163 case 'r': 164 b[w] = '\r' 165 r++ 166 w++ 167 case 't': 168 b[w] = '\t' 169 r++ 170 w++ 171 case 'u': 172 r-- 173 rr := getu4(s[r:]) 174 if rr < 0 { 175 return 176 } 177 r += 6 178 if utf16.IsSurrogate(rr) { 179 rr1 := getu4(s[r:]) 180 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { 181 // A valid pair; consume. 182 r += 6 183 w += utf8.EncodeRune(b[w:], dec) 184 break 185 } 186 // Invalid surrogate; fall back to replacement rune. 187 rr = unicode.ReplacementChar 188 } 189 w += utf8.EncodeRune(b[w:], rr) 190 } 191 192 // Quote, control characters are invalid. 193 case c == '"', c < ' ': 194 return 195 196 // ASCII 197 case c < utf8.RuneSelf: 198 b[w] = c 199 r++ 200 w++ 201 202 // Coerce to well-formed UTF-8. 203 default: 204 rr, size := utf8.DecodeRune(s[r:]) 205 r += size 206 w += utf8.EncodeRune(b[w:], rr) 207 } 208 } 209 return b[0:w], true 210 } 211 212 func getu4(s []byte) rune { 213 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { 214 return -1 215 } 216 var r rune 217 for _, c := range s[2:6] { 218 switch { 219 case '0' <= c && c <= '9': 220 c = c - '0' 221 case 'a' <= c && c <= 'f': 222 c = c - 'a' + 10 223 case 'A' <= c && c <= 'F': 224 c = c - 'A' + 10 225 default: 226 return -1 227 } 228 r = r*16 + rune(c) 229 } 230 return r 231 }