github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/schema.go (about)

     1  package parquet
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"reflect"
     7  	"strconv"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/google/uuid"
    12  	"github.com/vc42/parquet-go/compress"
    13  	"github.com/vc42/parquet-go/deprecated"
    14  	"github.com/vc42/parquet-go/encoding"
    15  )
    16  
    17  // Schema represents a parquet schema created from a Go value.
    18  //
    19  // Schema implements the Node interface to represent the root node of a parquet
    20  // schema.
    21  type Schema struct {
    22  	name        string
    23  	root        Node
    24  	deconstruct deconstructFunc
    25  	reconstruct reconstructFunc
    26  	readRows    readRowsFunc
    27  	mapping     columnMapping
    28  	columns     [][]string
    29  }
    30  
    31  // SchemaOf constructs a parquet schema from a Go value.
    32  //
    33  // The function can construct parquet schemas from struct or pointer-to-struct
    34  // values only. A panic is raised if a Go value of a different type is passed
    35  // to this function.
    36  //
    37  // When creating a parquet Schema from a Go value, the struct fields may contain
    38  // a "parquet" tag to describe properties of the parquet node. The "parquet" tag
    39  // follows the conventional format of Go struct tags: a comma-separated list of
    40  // values describe the options, with the first one defining the name of the
    41  // parquet column.
    42  //
    43  // The following options are also supported in the "parquet" struct tag:
    44  //
    45  //	optional  | make the parquet column optional
    46  //	snappy    | sets the parquet column compression codec to snappy
    47  //	gzip      | sets the parquet column compression codec to gzip
    48  //	brotli    | sets the parquet column compression codec to brotli
    49  //	lz4       | sets the parquet column compression codec to lz4
    50  //	zstd      | sets the parquet column compression codec to zstd
    51  //	plain     | enables the plain encoding (no-op default)
    52  //	dict      | enables dictionary encoding on the parquet column
    53  //	delta     | enables delta encoding on the parquet column
    54  //	list      | for slice types, use the parquet LIST logical type
    55  //	enum      | for string types, use the parquet ENUM logical type
    56  //	uuid      | for string and [16]byte types, use the parquet UUID logical type
    57  //	decimal   | for int32, int64 and [n]byte types, use the parquet DECIMAL logical type
    58  //	date      | for int32 types use the DATE logical type
    59  //	timestamp | for int64 types use the TIMESTAMP logical type with, by default, millisecond precision
    60  //	split     | for float32/float64, use the BYTE_STREAM_SPLIT encoding
    61  //
    62  // The date logical type is an int32 value of the number of days since the unix epoch
    63  //
    64  // The timestamp precision can be changed by defining which precision to use as an argument.
    65  // Supported precisions are: nanosecond, millisecond and microsecond. Example:
    66  //
    67  //  type Message struct {
    68  //    TimestrampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"
    69  //  }
    70  //
    71  // The decimal tag must be followed by two integer parameters, the first integer
    72  // representing the scale and the second the precision; for example:
    73  //
    74  //	type Item struct {
    75  //		Cost int64 `parquet:"cost,decimal(0:3)"`
    76  //	}
    77  //
    78  // Invalid combination of struct tags and Go types, or repeating options will
    79  // cause the function to panic.
    80  //
    81  // As a special case, if the field tag is "-", the field is omitted from the schema
    82  // and the data will not be written into the parquet file(s).
    83  // Note that a field with name "-" can still be generated using the tag "-,".
    84  //
    85  // The schema name is the Go type name of the value.
    86  func SchemaOf(model interface{}) *Schema {
    87  	return schemaOf(dereference(reflect.TypeOf(model)))
    88  }
    89  
    90  var cachedSchemas sync.Map // map[reflect.Type]*Schema
    91  
    92  func schemaOf(model reflect.Type) *Schema {
    93  	cached, _ := cachedSchemas.Load(model)
    94  	schema, _ := cached.(*Schema)
    95  	if schema != nil {
    96  		return schema
    97  	}
    98  	if model.Kind() != reflect.Struct {
    99  		panic("cannot construct parquet schema from value of type " + model.String())
   100  	}
   101  	schema = NewSchema(model.Name(), nodeOf(model))
   102  	if actual, loaded := cachedSchemas.LoadOrStore(model, schema); loaded {
   103  		schema = actual.(*Schema)
   104  	}
   105  	return schema
   106  }
   107  
   108  // NewSchema constructs a new Schema object with the given name and root node.
   109  //
   110  // The function panics if Node contains more leaf columns than supported by the
   111  // package (see parquet.MaxColumnIndex).
   112  func NewSchema(name string, root Node) *Schema {
   113  	mapping, columns := columnMappingOf(root)
   114  	return &Schema{
   115  		name:        name,
   116  		root:        root,
   117  		deconstruct: makeDeconstructFunc(root),
   118  		reconstruct: makeReconstructFunc(root),
   119  		readRows:    makeReadRowsFunc(root),
   120  		mapping:     mapping,
   121  		columns:     columns,
   122  	}
   123  }
   124  
   125  func dereference(t reflect.Type) reflect.Type {
   126  	for t.Kind() == reflect.Ptr {
   127  		t = t.Elem()
   128  	}
   129  	return t
   130  }
   131  
   132  func makeDeconstructFunc(node Node) (deconstruct deconstructFunc) {
   133  	if schema, _ := node.(*Schema); schema != nil {
   134  		return schema.deconstruct
   135  	}
   136  	if !node.Leaf() {
   137  		_, deconstruct = deconstructFuncOf(0, node)
   138  	}
   139  	return deconstruct
   140  }
   141  
   142  func makeReconstructFunc(node Node) (reconstruct reconstructFunc) {
   143  	if schema, _ := node.(*Schema); schema != nil {
   144  		return schema.reconstruct
   145  	}
   146  	if !node.Leaf() {
   147  		_, reconstruct = reconstructFuncOf(0, node)
   148  	}
   149  	return reconstruct
   150  }
   151  
   152  func makeReadRowsFunc(node Node) readRowsFunc {
   153  	_, readRows := readRowsFuncOf(node, 0, 0)
   154  	return readRows
   155  }
   156  
   157  // ConfigureRowGroup satisfies the RowGroupOption interface, allowing Schema
   158  // instances to be passed to row group constructors to pre-declare the schema of
   159  // the output parquet file.
   160  func (s *Schema) ConfigureRowGroup(config *RowGroupConfig) { config.Schema = s }
   161  
   162  // ConfigureReader satisfies the ReaderOption interface, allowing Schema
   163  // instances to be passed to NewReader to pre-declare the schema of rows
   164  // read from the reader.
   165  func (s *Schema) ConfigureReader(config *ReaderConfig) { config.Schema = s }
   166  
   167  // ConfigureWriter satisfies the WriterOption interface, allowing Schema
   168  // instances to be passed to NewWriter to pre-declare the schema of the
   169  // output parquet file.
   170  func (s *Schema) ConfigureWriter(config *WriterConfig) { config.Schema = s }
   171  
   172  // String returns a parquet schema representation of s.
   173  func (s *Schema) String() string { return sprint(s.name, s.root) }
   174  
   175  // Name returns the name of s.
   176  func (s *Schema) Name() string { return s.name }
   177  
   178  // Type returns the parquet type of s.
   179  func (s *Schema) Type() Type { return s.root.Type() }
   180  
   181  // Optional returns false since the root node of a parquet schema is always required.
   182  func (s *Schema) Optional() bool { return s.root.Optional() }
   183  
   184  // Repeated returns false since the root node of a parquet schema is always required.
   185  func (s *Schema) Repeated() bool { return s.root.Repeated() }
   186  
   187  // Required returns true since the root node of a parquet schema is always required.
   188  func (s *Schema) Required() bool { return s.root.Required() }
   189  
   190  // Leaf returns true if the root node of the parquet schema is a leaf column.
   191  func (s *Schema) Leaf() bool { return s.root.Leaf() }
   192  
   193  // Fields returns the list of fields on the root node of the parquet schema.
   194  func (s *Schema) Fields() []Field { return s.root.Fields() }
   195  
   196  // Encoding returns the encoding set on the root node of the parquet schema.
   197  func (s *Schema) Encoding() encoding.Encoding { return s.root.Encoding() }
   198  
   199  // Compression returns the compression codec set on the root node of the parquet
   200  // schema.
   201  func (s *Schema) Compression() compress.Codec { return s.root.Compression() }
   202  
   203  // GoType returns the Go type that best represents the schema.
   204  func (s *Schema) GoType() reflect.Type { return s.root.GoType() }
   205  
   206  // Deconstruct deconstructs a Go value and appends it to a row.
   207  //
   208  // The method panics is the structure of the go value does not match the
   209  // parquet schema.
   210  func (s *Schema) Deconstruct(row Row, value interface{}) Row {
   211  	v := reflect.ValueOf(value)
   212  	for v.Kind() == reflect.Ptr {
   213  		if v.IsNil() {
   214  			v = reflect.Value{}
   215  			break
   216  		}
   217  		v = v.Elem()
   218  	}
   219  	if s.deconstruct != nil {
   220  		row = s.deconstruct(row, levels{}, v)
   221  	}
   222  	return row
   223  }
   224  
   225  // Reconstruct reconstructs a Go value from a row.
   226  //
   227  // The go value passed as first argument must be a non-nil pointer for the
   228  // row to be decoded into.
   229  //
   230  // The method panics if the structure of the go value and parquet row do not
   231  // match.
   232  func (s *Schema) Reconstruct(value interface{}, row Row) error {
   233  	v := reflect.ValueOf(value)
   234  	if !v.IsValid() {
   235  		panic("cannot reconstruct row into go value of type <nil>")
   236  	}
   237  	if v.Kind() != reflect.Ptr {
   238  		panic("cannot reconstruct row into go value of non-pointer type " + v.Type().String())
   239  	}
   240  	if v.IsNil() {
   241  		panic("cannot reconstruct row into nil pointer of type " + v.Type().String())
   242  	}
   243  	for v.Kind() == reflect.Ptr {
   244  		if v.IsNil() {
   245  			v.Set(reflect.New(v.Type().Elem()))
   246  		}
   247  		v = v.Elem()
   248  	}
   249  	var err error
   250  	if s.reconstruct != nil {
   251  		row, err = s.reconstruct(v, levels{}, row)
   252  		if len(row) > 0 && err == nil {
   253  			err = fmt.Errorf("%d values remain unused after reconstructing go value of type %s from parquet row", len(row), v.Type())
   254  		}
   255  	}
   256  	return err
   257  }
   258  
   259  // Lookup returns the leaf column at the given path.
   260  //
   261  // The path is the sequence of column names identifying a leaf column (not
   262  // including the root).
   263  //
   264  // If the path was not found in the mapping, or if it did not represent a
   265  // leaf column of the parquet schema, the boolean will be false.
   266  func (s *Schema) Lookup(path ...string) (LeafColumn, bool) {
   267  	leaf := s.mapping.lookup(path)
   268  	return LeafColumn{
   269  		Node:               leaf.node,
   270  		Path:               leaf.path,
   271  		ColumnIndex:        int(leaf.columnIndex),
   272  		MaxRepetitionLevel: int(leaf.maxRepetitionLevel),
   273  		MaxDefinitionLevel: int(leaf.maxDefinitionLevel),
   274  	}, leaf.node != nil
   275  }
   276  
   277  // Columns returns the list of column paths available in the schema.
   278  //
   279  // The method always returns the same slice value across calls to ColumnPaths,
   280  // applications should treat it as immutable.
   281  func (s *Schema) Columns() [][]string {
   282  	return s.columns
   283  }
   284  
   285  func (s *Schema) forEachNode(do func(name string, node Node)) {
   286  	forEachNodeOf(s.Name(), s, do)
   287  }
   288  
   289  type structNode struct {
   290  	gotype reflect.Type
   291  	fields []structField
   292  }
   293  
   294  func structNodeOf(t reflect.Type) *structNode {
   295  	// Collect struct fields first so we can order them before generating the
   296  	// column indexes.
   297  	fields := structFieldsOf(t)
   298  
   299  	s := &structNode{
   300  		gotype: t,
   301  		fields: make([]structField, len(fields)),
   302  	}
   303  
   304  	for i := range fields {
   305  		s.fields[i] = makeStructField(fields[i])
   306  	}
   307  
   308  	return s
   309  }
   310  
   311  func structFieldsOf(t reflect.Type) []reflect.StructField {
   312  	fields := appendStructFields(t, nil, nil, 0)
   313  
   314  	for i := range fields {
   315  		f := &fields[i]
   316  
   317  		if tag := f.Tag.Get("parquet"); tag != "" {
   318  			name, _ := split(tag)
   319  			if name != "" {
   320  				f.Name = name
   321  			}
   322  		}
   323  	}
   324  
   325  	return fields
   326  }
   327  
   328  func appendStructFields(t reflect.Type, fields []reflect.StructField, index []int, offset uintptr) []reflect.StructField {
   329  	for i, n := 0, t.NumField(); i < n; i++ {
   330  		f := t.Field(i)
   331  		if tag := f.Tag.Get("parquet"); tag != "" {
   332  			name, _ := split(tag)
   333  			if tag != "-," && name == "-" {
   334  				continue
   335  			}
   336  		}
   337  
   338  		fieldIndex := index[:len(index):len(index)]
   339  		fieldIndex = append(fieldIndex, i)
   340  
   341  		f.Offset += offset
   342  
   343  		if f.Anonymous {
   344  			fields = appendStructFields(f.Type, fields, fieldIndex, f.Offset)
   345  		} else if f.IsExported() {
   346  			f.Index = fieldIndex
   347  			fields = append(fields, f)
   348  		}
   349  	}
   350  	return fields
   351  }
   352  
   353  func (s *structNode) Optional() bool { return false }
   354  
   355  func (s *structNode) Repeated() bool { return false }
   356  
   357  func (s *structNode) Required() bool { return true }
   358  
   359  func (s *structNode) Leaf() bool { return false }
   360  
   361  func (s *structNode) Encoding() encoding.Encoding { return nil }
   362  
   363  func (s *structNode) Compression() compress.Codec { return nil }
   364  
   365  func (s *structNode) GoType() reflect.Type { return s.gotype }
   366  
   367  func (s *structNode) String() string { return sprint("", s) }
   368  
   369  func (s *structNode) Type() Type { return groupType{} }
   370  
   371  func (s *structNode) Fields() []Field {
   372  	fields := make([]Field, len(s.fields))
   373  	for i := range s.fields {
   374  		fields[i] = &s.fields[i]
   375  	}
   376  	return fields
   377  }
   378  
   379  // fieldByIndex is like reflect.Value.FieldByIndex but returns the zero-value of
   380  // reflect.Value if one of the fields was a nil pointer instead of panicking.
   381  func fieldByIndex(v reflect.Value, index []int) reflect.Value {
   382  	for _, i := range index {
   383  		if v = v.Field(i); v.Kind() == reflect.Ptr {
   384  			if v.IsNil() {
   385  				v = reflect.Value{}
   386  				break
   387  			} else {
   388  				v = v.Elem()
   389  			}
   390  		}
   391  	}
   392  	return v
   393  }
   394  
   395  type structField struct {
   396  	Node
   397  	name  string
   398  	index []int
   399  }
   400  
   401  func (f *structField) Name() string { return f.name }
   402  
   403  func (f *structField) Value(base reflect.Value) reflect.Value {
   404  	switch base.Kind() {
   405  	case reflect.Map:
   406  		return base.MapIndex(reflect.ValueOf(&f.name).Elem())
   407  	default:
   408  		if base.Kind() == reflect.Ptr {
   409  			base = base.Elem()
   410  		}
   411  		if len(f.index) == 1 {
   412  			return base.Field(f.index[0])
   413  		} else {
   414  			return fieldByIndex(base, f.index)
   415  		}
   416  	}
   417  }
   418  
   419  func structFieldString(f reflect.StructField) string {
   420  	return f.Name + " " + f.Type.String() + " " + string(f.Tag)
   421  }
   422  
   423  func throwInvalidFieldTag(f reflect.StructField, tag string) {
   424  	panic("struct has invalid '" + tag + "' parquet tag: " + structFieldString(f))
   425  }
   426  
   427  func throwUnknownFieldTag(f reflect.StructField, tag string) {
   428  	panic("struct has unrecognized '" + tag + "' parquet tag: " + structFieldString(f))
   429  }
   430  
   431  func throwInvalidStructField(msg string, field reflect.StructField) {
   432  	panic(msg + ": " + structFieldString(field))
   433  }
   434  
   435  func makeStructField(f reflect.StructField) structField {
   436  	var (
   437  		field      = structField{name: f.Name, index: f.Index}
   438  		optional   bool
   439  		list       bool
   440  		encoded    encoding.Encoding
   441  		compressed compress.Codec
   442  	)
   443  
   444  	setNode := func(node Node) {
   445  		if field.Node != nil {
   446  			throwInvalidStructField("struct field has multiple logical parquet types declared", f)
   447  		}
   448  		field.Node = node
   449  	}
   450  
   451  	setOptional := func() {
   452  		if optional {
   453  			throwInvalidStructField("struct field has multiple declaration of the optional tag", f)
   454  		}
   455  		optional = true
   456  	}
   457  
   458  	setList := func() {
   459  		if list {
   460  			throwInvalidStructField("struct field has multiple declaration of the list tag", f)
   461  		}
   462  		list = true
   463  	}
   464  
   465  	setEncoding := func(e encoding.Encoding) {
   466  		if encoded != nil {
   467  			throwInvalidStructField("struct field has encoding declared multiple times", f)
   468  		}
   469  		encoded = e
   470  	}
   471  
   472  	setCompression := func(c compress.Codec) {
   473  		if compressed != nil {
   474  			throwInvalidStructField("struct field has compression codecs declared multiple times", f)
   475  		}
   476  		compressed = c
   477  	}
   478  
   479  	forEachStructTagOption(f, func(t reflect.Type, option, args string) {
   480  		switch option {
   481  		case "optional":
   482  			setOptional()
   483  
   484  		case "snappy":
   485  			setCompression(&Snappy)
   486  
   487  		case "gzip":
   488  			setCompression(&Gzip)
   489  
   490  		case "brotli":
   491  			setCompression(&Brotli)
   492  
   493  		case "lz4":
   494  			setCompression(&Lz4Raw)
   495  
   496  		case "zstd":
   497  			setCompression(&Zstd)
   498  
   499  		case "uncompressed":
   500  			setCompression(&Uncompressed)
   501  
   502  		case "plain":
   503  			setEncoding(&Plain)
   504  
   505  		case "dict":
   506  			setEncoding(&RLEDictionary)
   507  
   508  		case "delta":
   509  			switch t.Kind() {
   510  			case reflect.Int, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint32, reflect.Uint64:
   511  				setEncoding(&DeltaBinaryPacked)
   512  			case reflect.String:
   513  				setEncoding(&DeltaByteArray)
   514  			case reflect.Slice:
   515  				if t.Elem().Kind() == reflect.Uint8 { // []byte?
   516  					setEncoding(&DeltaByteArray)
   517  				} else {
   518  					throwInvalidFieldTag(f, option)
   519  				}
   520  			case reflect.Array:
   521  				if t.Elem().Kind() == reflect.Uint8 { // [N]byte?
   522  					setEncoding(&DeltaByteArray)
   523  				} else {
   524  					throwInvalidFieldTag(f, option)
   525  				}
   526  			default:
   527  				throwInvalidFieldTag(f, option)
   528  			}
   529  
   530  		case "split":
   531  			switch t.Kind() {
   532  			case reflect.Float32, reflect.Float64:
   533  				setEncoding(&ByteStreamSplit)
   534  			default:
   535  				throwInvalidFieldTag(f, option)
   536  			}
   537  
   538  		case "list":
   539  			switch t.Kind() {
   540  			case reflect.Slice:
   541  				element := nodeOf(t.Elem())
   542  				setNode(element)
   543  				setList()
   544  			default:
   545  				throwInvalidFieldTag(f, option)
   546  			}
   547  
   548  		case "enum":
   549  			switch t.Kind() {
   550  			case reflect.String:
   551  				setNode(Enum())
   552  			default:
   553  				throwInvalidFieldTag(f, option)
   554  			}
   555  
   556  		case "uuid":
   557  			switch t.Kind() {
   558  			case reflect.Array:
   559  				if t.Elem().Kind() != reflect.Uint8 || t.Len() != 16 {
   560  					throwInvalidFieldTag(f, option)
   561  				}
   562  			default:
   563  				throwInvalidFieldTag(f, option)
   564  			}
   565  
   566  		case "decimal":
   567  			scale, precision, err := parseDecimalArgs(args)
   568  			if err != nil {
   569  				throwInvalidFieldTag(f, option+args)
   570  			}
   571  			var baseType Type
   572  			switch t.Kind() {
   573  			case reflect.Int32:
   574  				baseType = Int32Type
   575  			case reflect.Int64:
   576  				baseType = Int64Type
   577  			case reflect.Array, reflect.Slice:
   578  				baseType = FixedLenByteArrayType(decimalFixedLenByteArraySize(precision))
   579  			default:
   580  				throwInvalidFieldTag(f, option)
   581  			}
   582  
   583  			setNode(Decimal(scale, precision, baseType))
   584  		case "date":
   585  			switch t.Kind() {
   586  			case reflect.Int32, reflect.String:
   587  				setNode(Date())
   588  			default:
   589  				throwInvalidFieldTag(f, option)
   590  			}
   591  		case "timestamp":
   592  			switch t.Kind() {
   593  			case reflect.Int64, reflect.String:
   594  				timeUnit, err := parseTimestampArgs(args)
   595  				if err != nil {
   596  					throwInvalidFieldTag(f, args)
   597  				}
   598  				setNode(Timestamp(timeUnit))
   599  			default:
   600  				throwInvalidFieldTag(f, option)
   601  			}
   602  		default:
   603  			throwUnknownFieldTag(f, option)
   604  		}
   605  	})
   606  
   607  	if field.Node == nil {
   608  		field.Node = nodeOf(f.Type)
   609  	}
   610  
   611  	if compressed != nil {
   612  		field.Node = Compressed(field.Node, compressed)
   613  	}
   614  
   615  	if encoded != nil {
   616  		field.Node = Encoded(field.Node, encoded)
   617  	}
   618  
   619  	if list {
   620  		field.Node = List(field.Node)
   621  	}
   622  
   623  	if optional {
   624  		field.Node = Optional(field.Node)
   625  	}
   626  
   627  	return field
   628  }
   629  
   630  // FixedLenByteArray decimals are sized based on precision
   631  // this function calculates the necessary byte array size.
   632  func decimalFixedLenByteArraySize(precision int) int {
   633  	return int(math.Ceil((math.Log10(2) + float64(precision)) / math.Log10(256)))
   634  }
   635  
   636  func forEachStructTagOption(sf reflect.StructField, do func(t reflect.Type, option, args string)) {
   637  	if tag := sf.Tag.Get("parquet"); tag != "" {
   638  		_, tag = split(tag) // skip the field name
   639  		for tag != "" {
   640  			option := ""
   641  			option, tag = split(tag)
   642  			option, args := splitOptionArgs(option)
   643  			ft := sf.Type
   644  			if ft.Kind() == reflect.Ptr {
   645  				ft = ft.Elem()
   646  			}
   647  			do(ft, option, args)
   648  		}
   649  	}
   650  }
   651  
   652  func nodeOf(t reflect.Type) Node {
   653  	switch t {
   654  	case reflect.TypeOf(deprecated.Int96{}):
   655  		return Leaf(Int96Type)
   656  	case reflect.TypeOf(uuid.UUID{}):
   657  		return UUID()
   658  	}
   659  
   660  	var n Node
   661  	switch t.Kind() {
   662  	case reflect.Bool:
   663  		n = Leaf(BooleanType)
   664  
   665  	case reflect.Int, reflect.Int64:
   666  		n = Int(64)
   667  
   668  	case reflect.Int8, reflect.Int16, reflect.Int32:
   669  		n = Int(t.Bits())
   670  
   671  	case reflect.Uint, reflect.Uintptr, reflect.Uint64:
   672  		n = Uint(64)
   673  
   674  	case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   675  		n = Uint(t.Bits())
   676  
   677  	case reflect.Float32:
   678  		n = Leaf(FloatType)
   679  
   680  	case reflect.Float64:
   681  		n = Leaf(DoubleType)
   682  
   683  	case reflect.String:
   684  		n = String()
   685  
   686  	case reflect.Ptr:
   687  		n = Optional(nodeOf(t.Elem()))
   688  
   689  	case reflect.Slice:
   690  		if elem := t.Elem(); elem.Kind() == reflect.Uint8 { // []byte?
   691  			n = Leaf(ByteArrayType)
   692  		} else {
   693  			n = Repeated(nodeOf(elem))
   694  		}
   695  
   696  	case reflect.Array:
   697  		if t.Elem().Kind() == reflect.Uint8 {
   698  			n = Leaf(FixedLenByteArrayType(t.Len()))
   699  		}
   700  
   701  	case reflect.Map:
   702  		n = Map(nodeOf(t.Key()), nodeOf(t.Elem()))
   703  
   704  	case reflect.Struct:
   705  		return structNodeOf(t)
   706  	}
   707  
   708  	if n == nil {
   709  		panic("cannot create parquet node from go value of type " + t.String())
   710  	}
   711  
   712  	return &goNode{Node: n, gotype: t}
   713  }
   714  
   715  func split(s string) (head, tail string) {
   716  	if i := strings.IndexByte(s, ','); i < 0 {
   717  		head = s
   718  	} else {
   719  		head, tail = s[:i], s[i+1:]
   720  	}
   721  	return
   722  }
   723  
   724  func splitOptionArgs(s string) (option, args string) {
   725  	if i := strings.IndexByte(s, '('); i >= 0 {
   726  		return s[:i], s[i:]
   727  	} else {
   728  		return s, "()"
   729  	}
   730  }
   731  
   732  func parseDecimalArgs(args string) (scale, precision int, err error) {
   733  	if !strings.HasPrefix(args, "(") || !strings.HasSuffix(args, ")") {
   734  		return 0, 0, fmt.Errorf("malformed decimal args: %s", args)
   735  	}
   736  	args = strings.TrimPrefix(args, "(")
   737  	args = strings.TrimSuffix(args, ")")
   738  	parts := strings.Split(args, ":")
   739  	if len(parts) != 2 {
   740  		return 0, 0, fmt.Errorf("malformed decimal args: (%s)", args)
   741  	}
   742  	s, err := strconv.ParseInt(parts[0], 10, 32)
   743  	if err != nil {
   744  		return 0, 0, err
   745  	}
   746  	p, err := strconv.ParseInt(parts[1], 10, 32)
   747  	if err != nil {
   748  		return 0, 0, err
   749  	}
   750  	return int(s), int(p), nil
   751  }
   752  
   753  func parseTimestampArgs(args string) (TimeUnit, error) {
   754  	if !strings.HasPrefix(args, "(") || !strings.HasSuffix(args, ")") {
   755  		return nil, fmt.Errorf("malformed timestamp args: %s", args)
   756  	}
   757  
   758  	args = strings.TrimPrefix(args, "(")
   759  	args = strings.TrimSuffix(args, ")")
   760  
   761  	if len(args) == 0 {
   762  		return Millisecond, nil
   763  	}
   764  
   765  	switch args {
   766  	case "millisecond":
   767  		return Millisecond, nil
   768  	case "microsecond":
   769  		return Microsecond, nil
   770  	case "nanosecond":
   771  		return Nanosecond, nil
   772  	default:
   773  	}
   774  
   775  	return nil, fmt.Errorf("unknown time unit: %s", args)
   776  }
   777  
   778  type goNode struct {
   779  	Node
   780  	gotype reflect.Type
   781  }
   782  
   783  func (n *goNode) GoType() reflect.Type { return n.gotype }
   784  
   785  var (
   786  	_ RowGroupOption = (*Schema)(nil)
   787  	_ ReaderOption   = (*Schema)(nil)
   788  	_ WriterOption   = (*Schema)(nil)
   789  )