github.com/trim21/go-phpserialize@v0.0.22-0.20240301204449-2fca0319b3f0/internal/decoder/unmarshal_php.go (about)

     1  package decoder
     2  
     3  import (
     4  	"bytes"
     5  	"unicode"
     6  	"unicode/utf16"
     7  	"unicode/utf8"
     8  	"unsafe"
     9  
    10  	"github.com/trim21/go-phpserialize/internal/errors"
    11  	"github.com/trim21/go-phpserialize/internal/runtime"
    12  )
    13  
    14  type Unmarshaler interface {
    15  	UnmarshalPHP([]byte) error
    16  }
    17  
    18  type unmarshalPHPDecoder struct {
    19  	typ        *runtime.Type
    20  	structName string
    21  	fieldName  string
    22  }
    23  
    24  func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalPHPDecoder {
    25  	return &unmarshalPHPDecoder{
    26  		typ:        typ,
    27  		structName: structName,
    28  		fieldName:  fieldName,
    29  	}
    30  }
    31  
    32  func (d *unmarshalPHPDecoder) annotateError(cursor int64, err error) {
    33  	switch e := err.(type) {
    34  	case *errors.UnmarshalTypeError:
    35  		e.Struct = d.structName
    36  		e.Field = d.fieldName
    37  	case *errors.SyntaxError:
    38  		e.Offset = cursor
    39  	}
    40  }
    41  
    42  var (
    43  	nullbytes = []byte(`N;`)
    44  )
    45  
    46  func (d *unmarshalPHPDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
    47  	buf := ctx.Buf
    48  	start := cursor
    49  	end, err := skipValue(buf, cursor, depth)
    50  	if err != nil {
    51  		return 0, err
    52  	}
    53  	src := buf[start:end]
    54  	if len(src) > 0 {
    55  		switch src[0] {
    56  		case '[':
    57  			return 0, &errors.UnmarshalTypeError{
    58  				Value:  "array",
    59  				Type:   runtime.RType2Type(d.typ),
    60  				Offset: start,
    61  			}
    62  		case '{':
    63  			return 0, &errors.UnmarshalTypeError{
    64  				Value:  "object",
    65  				Type:   runtime.RType2Type(d.typ),
    66  				Offset: start,
    67  			}
    68  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    69  			return 0, &errors.UnmarshalTypeError{
    70  				Value:  "number",
    71  				Type:   runtime.RType2Type(d.typ),
    72  				Offset: start,
    73  			}
    74  		case 'N':
    75  			if bytes.Equal(src, nullbytes) {
    76  				*(*unsafe.Pointer)(p) = nil
    77  				return end, nil
    78  			}
    79  		}
    80  	}
    81  
    82  	if s, ok := unquoteBytes(src); ok {
    83  		src = s
    84  	}
    85  	v := *(*any)(unsafe.Pointer(&emptyInterface{
    86  		typ: d.typ,
    87  		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
    88  	}))
    89  	if err := v.(Unmarshaler).UnmarshalPHP(src); err != nil {
    90  		d.annotateError(cursor, err)
    91  		return 0, err
    92  	}
    93  	return end, nil
    94  }
    95  
    96  func unquoteBytes(s []byte) (t []byte, ok bool) {
    97  	length := len(s)
    98  	if length < 2 || s[0] != '"' || s[length-1] != '"' {
    99  		return
   100  	}
   101  	s = s[1 : length-1]
   102  	length -= 2
   103  
   104  	// Check for unusual characters. If there are none,
   105  	// then no unquoting is needed, so return a slice of the
   106  	// original bytes.
   107  	r := 0
   108  	for r < length {
   109  		c := s[r]
   110  		if c == '\\' || c == '"' || c < ' ' {
   111  			break
   112  		}
   113  		if c < utf8.RuneSelf {
   114  			r++
   115  			continue
   116  		}
   117  		rr, size := utf8.DecodeRune(s[r:])
   118  		if rr == utf8.RuneError && size == 1 {
   119  			break
   120  		}
   121  		r += size
   122  	}
   123  	if r == length {
   124  		return s, true
   125  	}
   126  
   127  	b := make([]byte, length+2*utf8.UTFMax)
   128  	w := copy(b, s[0:r])
   129  	for r < length {
   130  		// Out of room? Can only happen if s is full of
   131  		// malformed UTF-8 and we're replacing each
   132  		// byte with RuneError.
   133  		if w >= len(b)-2*utf8.UTFMax {
   134  			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
   135  			copy(nb, b[0:w])
   136  			b = nb
   137  		}
   138  		switch c := s[r]; {
   139  		case c == '\\':
   140  			r++
   141  			if r >= length {
   142  				return
   143  			}
   144  			switch s[r] {
   145  			default:
   146  				return
   147  			case '"', '\\', '/', '\'':
   148  				b[w] = s[r]
   149  				r++
   150  				w++
   151  			case 'b':
   152  				b[w] = '\b'
   153  				r++
   154  				w++
   155  			case 'f':
   156  				b[w] = '\f'
   157  				r++
   158  				w++
   159  			case 'n':
   160  				b[w] = '\n'
   161  				r++
   162  				w++
   163  			case 'r':
   164  				b[w] = '\r'
   165  				r++
   166  				w++
   167  			case 't':
   168  				b[w] = '\t'
   169  				r++
   170  				w++
   171  			case 'u':
   172  				r--
   173  				rr := getu4(s[r:])
   174  				if rr < 0 {
   175  					return
   176  				}
   177  				r += 6
   178  				if utf16.IsSurrogate(rr) {
   179  					rr1 := getu4(s[r:])
   180  					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
   181  						// A valid pair; consume.
   182  						r += 6
   183  						w += utf8.EncodeRune(b[w:], dec)
   184  						break
   185  					}
   186  					// Invalid surrogate; fall back to replacement rune.
   187  					rr = unicode.ReplacementChar
   188  				}
   189  				w += utf8.EncodeRune(b[w:], rr)
   190  			}
   191  
   192  		// Quote, control characters are invalid.
   193  		case c == '"', c < ' ':
   194  			return
   195  
   196  		// ASCII
   197  		case c < utf8.RuneSelf:
   198  			b[w] = c
   199  			r++
   200  			w++
   201  
   202  		// Coerce to well-formed UTF-8.
   203  		default:
   204  			rr, size := utf8.DecodeRune(s[r:])
   205  			r += size
   206  			w += utf8.EncodeRune(b[w:], rr)
   207  		}
   208  	}
   209  	return b[0:w], true
   210  }
   211  
   212  func getu4(s []byte) rune {
   213  	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
   214  		return -1
   215  	}
   216  	var r rune
   217  	for _, c := range s[2:6] {
   218  		switch {
   219  		case '0' <= c && c <= '9':
   220  			c = c - '0'
   221  		case 'a' <= c && c <= 'f':
   222  			c = c - 'a' + 10
   223  		case 'A' <= c && c <= 'F':
   224  			c = c - 'A' + 10
   225  		default:
   226  			return -1
   227  		}
   228  		r = r*16 + rune(c)
   229  	}
   230  	return r
   231  }