github.com/philpearl/plenc@v0.0.15/plenccodec/descriptor.go (about)

     1  package plenccodec
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"time"
     7  	"unsafe"
     8  
     9  	"github.com/philpearl/plenc/plenccore"
    10  )
    11  
    12  //go:generate stringer -type FieldType
    13  type FieldType int
    14  
    15  const (
    16  	FieldTypeInt FieldType = iota
    17  	FieldTypeUint
    18  	FieldTypeFloat32
    19  	FieldTypeFloat64
    20  	FieldTypeString
    21  	FieldTypeSlice
    22  	FieldTypeStruct
    23  	FieldTypeBool
    24  	FieldTypeTime
    25  	FieldTypeJSONObject
    26  	FieldTypeJSONArray
    27  	// Not zig-zag encoded, but expected to be signed. Don't use if negative
    28  	// numbers are likely.
    29  	FieldTypeFlatInt
    30  	// Do we want int32 types?
    31  	// Do we want fixed size int types?
    32  	// Do we want a separate bytes type?
    33  	// Do we want an ENUM type? How would we encode it?
    34  )
    35  
    36  //go:generate stringer -type LogicalType
    37  type LogicalType int
    38  
    39  const (
    40  	LogicalTypeNone LogicalType = iota
    41  	LogicalTypeTimestamp
    42  	LogicalTypeDate
    43  	LogicalTypeTime
    44  	LogicalTypeMap
    45  	LogicalTypeMapEntry
    46  )
    47  
    48  // Descriptor describes how a type is plenc-encoded. It contains enough
    49  // information to decode plenc data marshalled from the described type.
    50  type Descriptor struct {
    51  	// Index is the plenc index of this field
    52  	Index int `plenc:"1"`
    53  	// Name is the name of the field
    54  	Name string `plenc:"2"`
    55  	// Type is the type of the field
    56  	Type FieldType `plenc:"3"`
    57  	// TypeName is used for struct types and is the name of the struct.
    58  	TypeName string `plenc:"5"`
    59  	// Elements is valid for FieldTypeSlice, FieldTypeStruct & FieldTypeMap. For
    60  	// FieldTypeSlice we expect one entry that describes the elements of the
    61  	// slice. For FieldTypeStruct we expect an entry for each field in the
    62  	// struct. For FieldTypeMap we expect two entries. The first is for the key
    63  	// type and the second is for the map type
    64  	Elements []Descriptor `plenc:"4"`
    65  
    66  	// ExplicitPresence is set if the field has a mechanism to distinguish when
    67  	// it is not present. So either a pointer type or something from the null
    68  	// package. If this is not set then a missing value indicates the zero
    69  	// value, not a null or nil entry.
    70  	ExplicitPresence bool `plenc:"6"`
    71  
    72  	// The logical type of the field. This is used to indicate if the field has
    73  	// any special meaning - e.g. if a long or string indicates a timestamp.
    74  	LogicalType LogicalType `plenc:"7"`
    75  }
    76  
    77  func (d *Descriptor) Read(out Outputter, data []byte) (err error) {
    78  	_, err = d.read(out, data)
    79  	return err
    80  }
    81  
    82  func (d *Descriptor) read(out Outputter, data []byte) (n int, err error) {
    83  	switch d.Type {
    84  	case FieldTypeInt:
    85  		var v int64
    86  		n, err = IntCodec[int64]{}.Read(data, unsafe.Pointer(&v), plenccore.WTVarInt)
    87  		out.Int64(v)
    88  		return n, err
    89  
    90  	case FieldTypeFlatInt:
    91  		switch d.LogicalType {
    92  		case LogicalTypeTimestamp:
    93  			var v time.Time
    94  			n, err = BQTimestampCodec{}.Read(data, unsafe.Pointer(&v), plenccore.WTVarInt)
    95  			out.Time(v)
    96  		default:
    97  			var v int64
    98  			n, err = FlatIntCodec[uint64]{}.Read(data, unsafe.Pointer(&v), plenccore.WTVarInt)
    99  			out.Int64(v)
   100  		}
   101  		return n, err
   102  
   103  	case FieldTypeUint:
   104  		var v uint64
   105  		n, err = UintCodec[uint64]{}.Read(data, unsafe.Pointer(&v), plenccore.WTVarInt)
   106  		out.Uint64(v)
   107  		return n, err
   108  
   109  	case FieldTypeFloat32:
   110  		var v float32
   111  		n, err = Float32Codec{}.Read(data, unsafe.Pointer(&v), plenccore.WT32)
   112  		out.Float32(v)
   113  		return n, err
   114  
   115  	case FieldTypeFloat64:
   116  		var v float64
   117  		n, err = Float64Codec{}.Read(data, unsafe.Pointer(&v), plenccore.WT64)
   118  		out.Float64(v)
   119  		return n, err
   120  
   121  	case FieldTypeString:
   122  		var v string
   123  		n, err = StringCodec{}.Read(data, unsafe.Pointer(&v), plenccore.WTLength)
   124  		out.String(v)
   125  		return n, err
   126  
   127  	case FieldTypeBool:
   128  		var v bool
   129  		n, err = BoolCodec{}.Read(data, unsafe.Pointer(&v), plenccore.WTLength)
   130  		out.Bool(v)
   131  		return n, err
   132  
   133  	case FieldTypeTime:
   134  		var v time.Time
   135  		n, err = TimeCodec{}.Read(data, unsafe.Pointer(&v), plenccore.WTLength)
   136  		out.Time(v)
   137  		return n, err
   138  
   139  	case FieldTypeSlice:
   140  		if d.isValidJSONMap() {
   141  			out.StartObject()
   142  			defer out.EndObject()
   143  		} else {
   144  			out.StartArray()
   145  			defer out.EndArray()
   146  		}
   147  		return d.readAsSlice(out, data)
   148  
   149  	case FieldTypeStruct:
   150  		if d.isValidJSONMapEntry() {
   151  			return d.readAsMapEntry(out, data)
   152  		}
   153  		out.StartObject()
   154  		defer out.EndObject()
   155  		return d.readAsStruct(out, data)
   156  
   157  	case FieldTypeJSONObject:
   158  		out.StartObject()
   159  		defer out.EndObject()
   160  		return d.readAsJSON(out, data)
   161  
   162  	case FieldTypeJSONArray:
   163  		out.StartArray()
   164  		defer out.EndArray()
   165  		return d.readAsJSON(out, data)
   166  	}
   167  
   168  	return 0, fmt.Errorf("unrecognised field type %s", d.Type)
   169  }
   170  
   171  func (d *Descriptor) isValidJSONMap() bool {
   172  	if d.Type != FieldTypeSlice || d.LogicalType != LogicalTypeMap {
   173  		return false
   174  	}
   175  	if len(d.Elements) != 1 {
   176  		return false
   177  	}
   178  	return d.Elements[0].isValidJSONMapEntry()
   179  }
   180  
   181  func (d *Descriptor) isValidJSONMapEntry() bool {
   182  	if d.Type != FieldTypeStruct || d.LogicalType != LogicalTypeMapEntry {
   183  		return false
   184  	}
   185  	if len(d.Elements) != 2 {
   186  		return false
   187  	}
   188  	key := &d.Elements[0]
   189  	return key.Type == FieldTypeString
   190  }
   191  
   192  func (d *Descriptor) readAsSlice(out Outputter, data []byte) (n int, err error) {
   193  	elt := &d.Elements[0]
   194  	switch elt.Type {
   195  	case FieldTypeFloat32, FieldTypeFloat64, FieldTypeInt, FieldTypeUint:
   196  		// If data is generated by protobuf this could be an element of a slice.
   197  		// We won't support that for now. So this is either a float64 or float32
   198  		offset := 0
   199  		for offset < len(data) {
   200  			n, err := elt.read(out, data[offset:])
   201  			if err != nil {
   202  				return 0, err
   203  			}
   204  			offset += n
   205  		}
   206  		return offset, nil
   207  
   208  	case FieldTypeStruct, FieldTypeSlice, FieldTypeString:
   209  		count, n := plenccore.ReadVarUint(data)
   210  		if n < 0 {
   211  			return 0, fmt.Errorf("corrupt data looking for WTSlice count")
   212  		}
   213  		offset := n
   214  		for i := 0; i < int(count); i++ {
   215  			if offset >= len(data) {
   216  				return 0, fmt.Errorf("corrupt data looking for length of slice entry %d", i)
   217  			}
   218  			s, n := plenccore.ReadVarUint(data[offset:])
   219  			if n <= 0 {
   220  				return 0, fmt.Errorf("invalid varint for slice entry %d", i)
   221  			}
   222  			offset += n
   223  			if s == 0 {
   224  				continue
   225  			}
   226  			end := offset + int(s)
   227  			if end > len(data) {
   228  				return 0, fmt.Errorf("corrupt data reading slice entry %d", i)
   229  			}
   230  
   231  			n, err := elt.read(out, data[offset:offset+int(s)])
   232  			if err != nil {
   233  				return 0, err
   234  			}
   235  			offset += n
   236  		}
   237  
   238  		return offset, nil
   239  
   240  	default:
   241  		return 0, fmt.Errorf("slice of unexpected element types %s", elt.Type)
   242  	}
   243  }
   244  
   245  func (d *Descriptor) readAsMapEntry(out Outputter, data []byte) (n int, err error) {
   246  	if d.Elements[0].Type != FieldTypeString {
   247  		// map keys have to be strings to be valid JSON. So we'll output as a
   248  		// struct instead
   249  		return
   250  	}
   251  
   252  	l := len(data)
   253  
   254  	var offset int
   255  	for offset < l {
   256  		wt, index, n := plenccore.ReadTag(data[offset:])
   257  		offset += n
   258  
   259  		var elt *Descriptor
   260  		for i := range d.Elements {
   261  			candidate := &d.Elements[i]
   262  			if candidate.Index == index {
   263  				elt = candidate
   264  				break
   265  			}
   266  		}
   267  
   268  		if elt == nil {
   269  			// Field corresponding to index does not exist
   270  			n, err := plenccore.Skip(data[offset:], wt)
   271  			if err != nil {
   272  				return 0, fmt.Errorf("failed to skip field %d in %s: %w", index, d.Name, err)
   273  			}
   274  			offset += n
   275  			continue
   276  		}
   277  
   278  		fl := l
   279  		if wt == plenccore.WTLength {
   280  			// For WTLength types we read out the length and ensure the data we
   281  			// read the field from is the right length
   282  			v, n := plenccore.ReadVarUint(data[offset:])
   283  			if n <= 0 {
   284  				return 0, fmt.Errorf("varuint overflow reading field %d of %s", index, d.Name)
   285  			}
   286  			offset += n
   287  			fl = int(v) + offset
   288  			if fl > l {
   289  				return 0, fmt.Errorf("length %d of field %d of %s exceeds data length", fl, index, d.Name)
   290  			}
   291  		}
   292  
   293  		n, err := elt.read(out, data[offset:fl])
   294  		if err != nil {
   295  			return 0, fmt.Errorf("failed reading field %d(%s) of %s. %w", index, elt.Name, d.Name, err)
   296  		}
   297  		offset += n
   298  	}
   299  
   300  	return offset, nil
   301  }
   302  
   303  func (d *Descriptor) readAsStruct(out Outputter, data []byte) (n int, err error) {
   304  	l := len(data)
   305  
   306  	var offset int
   307  	for offset < l {
   308  		wt, index, n := plenccore.ReadTag(data[offset:])
   309  		offset += n
   310  
   311  		var elt *Descriptor
   312  		for i := range d.Elements {
   313  			candidate := &d.Elements[i]
   314  			if candidate.Index == index {
   315  				elt = candidate
   316  				break
   317  			}
   318  		}
   319  
   320  		if elt == nil {
   321  			// Field corresponding to index does not exist
   322  			n, err := plenccore.Skip(data[offset:], wt)
   323  			if err != nil {
   324  				return 0, fmt.Errorf("failed to skip field %d in %s: %w", index, d.Name, err)
   325  			}
   326  			offset += n
   327  			continue
   328  		}
   329  
   330  		fl := l
   331  		if wt == plenccore.WTLength {
   332  			// For WTLength types we read out the length and ensure the data we
   333  			// read the field from is the right length
   334  			v, n := plenccore.ReadVarUint(data[offset:])
   335  			if n <= 0 {
   336  				return 0, fmt.Errorf("varuint overflow reading field %d of %s", index, d.Name)
   337  			}
   338  			offset += n
   339  			fl = int(v) + offset
   340  			if fl > l {
   341  				return 0, fmt.Errorf("length %d of field %d of %s exceeds data length", fl, index, d.Name)
   342  			}
   343  		}
   344  
   345  		out.NameField(elt.Name)
   346  		n, err := elt.read(out, data[offset:fl])
   347  		if err != nil {
   348  			return 0, fmt.Errorf("failed reading field %d(%s) of %s. %w", index, elt.Name, d.Name, err)
   349  		}
   350  		offset += n
   351  	}
   352  
   353  	return offset, nil
   354  }
   355  
   356  // readAsJSON reads data from JSON objects and arrays. Both are implemented as
   357  // slices of structs. The structs are name, value type and value. In the array
   358  // case the name is omitted from each entry
   359  func (d *Descriptor) readAsJSON(out Outputter, data []byte) (n int, err error) {
   360  	count, n := plenccore.ReadVarUint(data)
   361  	if n < 0 {
   362  		return 0, fmt.Errorf("corrupt data looking for WTSlice count")
   363  	}
   364  	offset := n
   365  	for i := 0; i < int(count); i++ {
   366  		// For each entry we have a string key, a value type and a value
   367  		s, n := plenccore.ReadVarUint(data[offset:])
   368  		if n <= 0 {
   369  			return 0, fmt.Errorf("invalid varint for slice entry %d", i)
   370  		}
   371  		offset += n
   372  		if s == 0 {
   373  			continue
   374  		}
   375  
   376  		n, err := d.readJSONObjectKV(out, data[offset:offset+int(s)])
   377  		if err != nil {
   378  			return 0, err
   379  		}
   380  		offset += n
   381  	}
   382  
   383  	return offset, nil
   384  }
   385  
   386  func (d *Descriptor) readJSONObjectKV(out Outputter, data []byte) (n int, err error) {
   387  	var (
   388  		jType  jsonType
   389  		offset int
   390  	)
   391  
   392  	for offset < len(data) {
   393  		wt, index, n := plenccore.ReadTag(data[offset:])
   394  		offset += n
   395  		switch index {
   396  		case 1:
   397  			// When using this for reading arrays we simply don't see this index
   398  			l, n := plenccore.ReadVarUint(data[offset:])
   399  			if n < 0 {
   400  				return 0, fmt.Errorf("bad length on string field")
   401  			}
   402  			offset += n
   403  			var key string
   404  
   405  			n, err := StringCodec{}.Read(data[offset:offset+int(l)], unsafe.Pointer(&key), wt)
   406  			if err != nil {
   407  				return 0, err
   408  			}
   409  			out.NameField(key)
   410  			offset += n
   411  		case 2:
   412  			v, n := plenccore.ReadVarUint(data[offset:])
   413  			if n < 0 {
   414  				return 0, fmt.Errorf("invalid map type field")
   415  			}
   416  			jType = jsonType(v)
   417  			offset += n
   418  		case 3:
   419  			switch jType {
   420  			case jsonTypeString:
   421  				l, n := plenccore.ReadVarUint(data[offset:])
   422  				if n < 0 {
   423  					return 0, fmt.Errorf("bad length on string field")
   424  				}
   425  				offset += n
   426  				var v string
   427  				n, err := StringCodec{}.Read(data[offset:offset+int(l)], unsafe.Pointer(&v), wt)
   428  				if err != nil {
   429  					return 0, err
   430  				}
   431  				out.String(v)
   432  				offset += n
   433  
   434  			case jsonTypeInt:
   435  				var v int64
   436  				n, err := IntCodec[int64]{}.Read(data[offset:], unsafe.Pointer(&v), wt)
   437  				if err != nil {
   438  					return 0, err
   439  				}
   440  				offset += n
   441  				out.Int64(v)
   442  
   443  			case jsonTypeFloat:
   444  				var v float64
   445  				n, err := Float64Codec{}.Read(data[offset:], unsafe.Pointer(&v), wt)
   446  				if err != nil {
   447  					return 0, err
   448  				}
   449  				offset += n
   450  				out.Float64(v)
   451  
   452  			case jsonTypeBool:
   453  				var v bool
   454  				n, err := BoolCodec{}.Read(data[offset:], unsafe.Pointer(&v), wt)
   455  				if err != nil {
   456  					return 0, err
   457  				}
   458  				offset += n
   459  				out.Bool(v)
   460  
   461  			case jsonTypeArray:
   462  				d := Descriptor{Type: FieldTypeJSONArray}
   463  				n, err := d.read(out, data[offset:])
   464  				if err != nil {
   465  					return 0, err
   466  				}
   467  				offset += n
   468  
   469  			case jsonTypeObject:
   470  				d := Descriptor{Type: FieldTypeJSONObject}
   471  				n, err := d.read(out, data[offset:])
   472  				if err != nil {
   473  					return 0, err
   474  				}
   475  				offset += n
   476  
   477  			case jsonTypeNumber:
   478  				l, n := plenccore.ReadVarUint(data[offset:])
   479  				if n < 0 {
   480  					return 0, fmt.Errorf("bad length on JSON number field")
   481  				}
   482  				offset += n
   483  				var v json.Number
   484  				n, err := StringCodec{}.Read(data[offset:offset+int(l)], unsafe.Pointer(&v), wt)
   485  				if err != nil {
   486  					return 0, err
   487  				}
   488  				out.Raw(v.String())
   489  				offset += n
   490  
   491  			default:
   492  				return 0, fmt.Errorf("unexpected json type %d", jType)
   493  			}
   494  		default:
   495  			return 0, fmt.Errorf("unexpected json field index %d", index)
   496  		}
   497  	}
   498  
   499  	return offset, nil
   500  }