github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/row.go (about)

     1  package parquet
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"reflect"
     8  )
     9  
    10  const (
    11  	defaultRowBufferSize = 42
    12  )
    13  
    14  // Row represents a parquet row as a slice of values.
    15  //
    16  // Each value should embed a column index, repetition level, and definition
    17  // level allowing the program to determine how to reconstruct the original
    18  // object from the row.
    19  type Row []Value
    20  
    21  // MakeRow constructs a Row from a list of column values.
    22  //
    23  // The function panics if the column indexes of values in each column do not
    24  // match their position in the argument list.
    25  func MakeRow(columns ...[]Value) Row { return AppendRow(nil, columns...) }
    26  
    27  // AppendRow appends to row the given list of column values.
    28  //
    29  // AppendRow can be used to construct a Row value from columns, while retaining
    30  // the underlying memory buffer to avoid reallocation; for example:
    31  //
    32  // The function panics if the column indexes of values in each column do not
    33  // match their position in the argument list.
    34  func AppendRow(row Row, columns ...[]Value) Row {
    35  	numValues := 0
    36  
    37  	for expectedColumnIndex, column := range columns {
    38  		numValues += len(column)
    39  
    40  		for _, value := range column {
    41  			if value.columnIndex != ^int16(expectedColumnIndex) {
    42  				panic(fmt.Sprintf("value of column %d has column index %d", expectedColumnIndex, value.Column()))
    43  			}
    44  		}
    45  	}
    46  
    47  	if capacity := cap(row) - len(row); capacity < numValues {
    48  		row = append(make(Row, 0, len(row)+numValues), row...)
    49  	}
    50  
    51  	return appendRow(row, columns)
    52  }
    53  
    54  func appendRow(row Row, columns [][]Value) Row {
    55  	for _, column := range columns {
    56  		row = append(row, column...)
    57  	}
    58  	return row
    59  }
    60  
    61  // Clone creates a copy of the row which shares no pointers.
    62  //
    63  // This method is useful to capture rows after a call to RowReader.ReadRows when
    64  // values need to be retained before the next call to ReadRows or after the lifespan
    65  // of the reader.
    66  func (row Row) Clone() Row {
    67  	clone := make(Row, len(row))
    68  	for i := range row {
    69  		clone[i] = row[i].Clone()
    70  	}
    71  	return clone
    72  }
    73  
    74  // Equal returns true if row and other contain the same sequence of values.
    75  func (row Row) Equal(other Row) bool {
    76  	if len(row) != len(other) {
    77  		return false
    78  	}
    79  	for i := range row {
    80  		if !Equal(row[i], other[i]) {
    81  			return false
    82  		}
    83  		if row[i].repetitionLevel != other[i].repetitionLevel {
    84  			return false
    85  		}
    86  		if row[i].definitionLevel != other[i].definitionLevel {
    87  			return false
    88  		}
    89  		if row[i].columnIndex != other[i].columnIndex {
    90  			return false
    91  		}
    92  	}
    93  	return true
    94  }
    95  
    96  // Range calls f for each column of row.
    97  func (row Row) Range(f func(columnIndex int, columnValues []Value) bool) {
    98  	columnIndex := 0
    99  
   100  	for i := 0; i < len(row); {
   101  		j := i + 1
   102  
   103  		for j < len(row) && row[j].columnIndex == ^int16(columnIndex) {
   104  			j++
   105  		}
   106  
   107  		if !f(columnIndex, row[i:j:j]) {
   108  			break
   109  		}
   110  
   111  		columnIndex++
   112  		i = j
   113  	}
   114  }
   115  
   116  // RowSeeker is an interface implemented by readers of parquet rows which can be
   117  // positioned at a specific row index.
   118  type RowSeeker interface {
   119  	// Positions the stream on the given row index.
   120  	//
   121  	// Some implementations of the interface may only allow seeking forward.
   122  	//
   123  	// The method returns io.ErrClosedPipe if the stream had already been closed.
   124  	SeekToRow(int64) error
   125  }
   126  
   127  // RowReader reads a sequence of parquet rows.
   128  type RowReader interface {
   129  	// ReadRows reads rows from the reader, returning the number of rows read
   130  	// into the buffer, and any error that occurred. Note that the rows read
   131  	// into the buffer are not safe for reuse after a subsequent call to
   132  	// ReadRows. Callers that want to reuse rows must copy the rows using Clone.
   133  	//
   134  	// When all rows have been read, the reader returns io.EOF to indicate the
   135  	// end of the sequence. It is valid for the reader to return both a non-zero
   136  	// number of rows and a non-nil error (including io.EOF).
   137  	//
   138  	// The buffer of rows passed as argument will be used to store values of
   139  	// each row read from the reader. If the rows are not nil, the backing array
   140  	// of the slices will be used as an optimization to avoid re-allocating new
   141  	// arrays.
   142  	//
   143  	// The application is expected to handle the case where ReadRows returns
   144  	// less rows than requested and no error, by looking at the first returned
   145  	// value from ReadRows, which is the number of rows that were read.
   146  	ReadRows([]Row) (int, error)
   147  }
   148  
   149  // RowReaderFrom reads parquet rows from reader.
   150  type RowReaderFrom interface {
   151  	ReadRowsFrom(RowReader) (int64, error)
   152  }
   153  
   154  // RowReaderWithSchema is an extension of the RowReader interface which
   155  // advertises the schema of rows returned by ReadRow calls.
   156  type RowReaderWithSchema interface {
   157  	RowReader
   158  	Schema() *Schema
   159  }
   160  
   161  // RowReadSeeker is an interface implemented by row readers which support
   162  // seeking to arbitrary row positions.
   163  type RowReadSeeker interface {
   164  	RowReader
   165  	RowSeeker
   166  }
   167  
   168  // RowWriter writes parquet rows to an underlying medium.
   169  type RowWriter interface {
   170  	// Writes rows to the writer, returning the number of rows written and any
   171  	// error that occurred.
   172  	//
   173  	// Because columnar operations operate on independent columns of values,
   174  	// writes of rows may not be atomic operations, and could result in some
   175  	// rows being partially written. The method returns the number of rows that
   176  	// were successfully written, but if an error occurs, values of the row(s)
   177  	// that failed to be written may have been partially committed to their
   178  	// columns. For that reason, applications should consider a write error as
   179  	// fatal and assume that they need to discard the state, they cannot retry
   180  	// the write nor recover the underlying file.
   181  	WriteRows([]Row) (int, error)
   182  }
   183  
   184  // RowWriterTo writes parquet rows to a writer.
   185  type RowWriterTo interface {
   186  	WriteRowsTo(RowWriter) (int64, error)
   187  }
   188  
   189  // RowWriterWithSchema is an extension of the RowWriter interface which
   190  // advertises the schema of rows expected to be passed to WriteRow calls.
   191  type RowWriterWithSchema interface {
   192  	RowWriter
   193  	Schema() *Schema
   194  }
   195  
   196  // RowReaderFunc is a function type implementing the RowReader interface.
   197  type RowReaderFunc func([]Row) (int, error)
   198  
   199  func (f RowReaderFunc) ReadRows(rows []Row) (int, error) { return f(rows) }
   200  
   201  // RowWriterFunc is a function type implementing the RowWriter interface.
   202  type RowWriterFunc func([]Row) (int, error)
   203  
   204  func (f RowWriterFunc) WriteRows(rows []Row) (int, error) { return f(rows) }
   205  
   206  // MultiRowWriter constructs a RowWriter which dispatches writes to all the
   207  // writers passed as arguments.
   208  //
   209  // When writing rows, if any of the writers returns an error, the operation is
   210  // aborted and the error returned. If one of the writers did not error, but did
   211  // not write all the rows, the operation is aborted and io.ErrShortWrite is
   212  // returned.
   213  //
   214  // Rows are written sequentially to each writer in the order they are given to
   215  // this function.
   216  func MultiRowWriter(writers ...RowWriter) RowWriter {
   217  	m := &multiRowWriter{writers: make([]RowWriter, len(writers))}
   218  	copy(m.writers, writers)
   219  	return m
   220  }
   221  
   222  type multiRowWriter struct{ writers []RowWriter }
   223  
   224  func (m *multiRowWriter) WriteRows(rows []Row) (int, error) {
   225  	for _, w := range m.writers {
   226  		n, err := w.WriteRows(rows)
   227  		if err != nil {
   228  			return n, err
   229  		}
   230  		if n != len(rows) {
   231  			return n, io.ErrShortWrite
   232  		}
   233  	}
   234  	return len(rows), nil
   235  }
   236  
   237  type forwardRowSeeker struct {
   238  	rows  RowReader
   239  	seek  int64
   240  	index int64
   241  }
   242  
   243  func (r *forwardRowSeeker) ReadRows(rows []Row) (int, error) {
   244  	for {
   245  		n, err := r.rows.ReadRows(rows)
   246  
   247  		if n > 0 && r.index < r.seek {
   248  			skip := r.seek - r.index
   249  			r.index += int64(n)
   250  			if skip >= int64(n) {
   251  				continue
   252  			}
   253  
   254  			for i, j := 0, int(skip); j < n; i++ {
   255  				rows[i] = append(rows[i][:0], rows[j]...)
   256  			}
   257  
   258  			n -= int(skip)
   259  		}
   260  
   261  		return n, err
   262  	}
   263  }
   264  
   265  func (r *forwardRowSeeker) SeekToRow(rowIndex int64) error {
   266  	if rowIndex >= r.index {
   267  		r.seek = rowIndex
   268  		return nil
   269  	}
   270  	return fmt.Errorf(
   271  		"SeekToRow: %T does not implement parquet.RowSeeker: cannot seek backward from row %d to %d",
   272  		r.rows,
   273  		r.index,
   274  		rowIndex,
   275  	)
   276  }
   277  
   278  // CopyRows copies rows from src to dst.
   279  //
   280  // The underlying types of src and dst are tested to determine if they expose
   281  // information about the schema of rows that are read and expected to be
   282  // written. If the schema information are available but do not match, the
   283  // function will attempt to automatically convert the rows from the source
   284  // schema to the destination.
   285  //
   286  // As an optimization, the src argument may implement RowWriterTo to bypass
   287  // the default row copy logic and provide its own. The dst argument may also
   288  // implement RowReaderFrom for the same purpose.
   289  //
   290  // The function returns the number of rows written, or any error encountered
   291  // other than io.EOF.
   292  func CopyRows(dst RowWriter, src RowReader) (int64, error) {
   293  	return copyRows(dst, src, nil)
   294  }
   295  
   296  func copyRows(dst RowWriter, src RowReader, buf []Row) (written int64, err error) {
   297  	targetSchema := targetSchemaOf(dst)
   298  	sourceSchema := sourceSchemaOf(src)
   299  
   300  	if targetSchema != nil && sourceSchema != nil {
   301  		if !nodesAreEqual(targetSchema, sourceSchema) {
   302  			conv, err := Convert(targetSchema, sourceSchema)
   303  			if err != nil {
   304  				return 0, err
   305  			}
   306  			// The conversion effectively disables a potential optimization
   307  			// if the source reader implemented RowWriterTo. It is a trade off
   308  			// we are making to optimize for safety rather than performance.
   309  			//
   310  			// Entering this code path should not be the common case tho, it is
   311  			// most often used when parquet schemas are evolving, but we expect
   312  			// that the majority of files of an application to be sharing a
   313  			// common schema.
   314  			src = ConvertRowReader(src, conv)
   315  		}
   316  	}
   317  
   318  	if wt, ok := src.(RowWriterTo); ok {
   319  		return wt.WriteRowsTo(dst)
   320  	}
   321  
   322  	if rf, ok := dst.(RowReaderFrom); ok {
   323  		return rf.ReadRowsFrom(src)
   324  	}
   325  
   326  	if len(buf) == 0 {
   327  		buf = make([]Row, defaultRowBufferSize)
   328  	}
   329  
   330  	defer clearRows(buf)
   331  
   332  	for {
   333  		rn, err := src.ReadRows(buf)
   334  
   335  		if rn > 0 {
   336  			wn, err := dst.WriteRows(buf[:rn])
   337  			if err != nil {
   338  				return written, err
   339  			}
   340  
   341  			written += int64(wn)
   342  		}
   343  
   344  		if err != nil {
   345  			if errors.Is(err, io.EOF) {
   346  				err = nil
   347  			}
   348  			return written, err
   349  		}
   350  
   351  		if rn == 0 {
   352  			return written, io.ErrNoProgress
   353  		}
   354  	}
   355  }
   356  
   357  func makeRows(n int) []Row {
   358  	buf := make([]Value, n)
   359  	row := make([]Row, n)
   360  	for i := range row {
   361  		row[i] = buf[i : i : i+1]
   362  	}
   363  	return row
   364  }
   365  
   366  func clearRows(rows []Row) {
   367  	for i, values := range rows {
   368  		clearValues(values)
   369  		rows[i] = values[:0]
   370  	}
   371  }
   372  
   373  func sourceSchemaOf(r RowReader) *Schema {
   374  	if rrs, ok := r.(RowReaderWithSchema); ok {
   375  		return rrs.Schema()
   376  	}
   377  	return nil
   378  }
   379  
   380  func targetSchemaOf(w RowWriter) *Schema {
   381  	if rws, ok := w.(RowWriterWithSchema); ok {
   382  		return rws.Schema()
   383  	}
   384  	return nil
   385  }
   386  
   387  // =============================================================================
   388  // Functions returning closures are marked with "go:noinline" below to prevent
   389  // losing naming information of the closure in stack traces.
   390  //
   391  // Because some of the functions are very short (simply return a closure), the
   392  // compiler inlines when at their call site, which result in the closure being
   393  // named something like parquet.deconstructFuncOf.func2 instead of the original
   394  // parquet.deconstructFuncOfLeaf.func1; the latter being much more meaningful
   395  // when reading CPU or memory profiles.
   396  // =============================================================================
   397  
   398  type levels struct {
   399  	repetitionDepth byte
   400  	repetitionLevel byte
   401  	definitionLevel byte
   402  }
   403  
   404  // deconstructFunc accepts a row, the current levels, the value to deserialize
   405  // the current column onto, and returns the row minus the deserialied value(s)
   406  // It recurses until it hits a leaf node, then deserializes that value
   407  // individually as the base case.
   408  type deconstructFunc func([][]Value, levels, reflect.Value)
   409  
   410  func deconstructFuncOf(columnIndex int16, node Node) (int16, deconstructFunc) {
   411  	switch {
   412  	case node.Optional():
   413  		return deconstructFuncOfOptional(columnIndex, node)
   414  	case node.Repeated():
   415  		return deconstructFuncOfRepeated(columnIndex, node)
   416  	case isList(node):
   417  		return deconstructFuncOfList(columnIndex, node)
   418  	case isMap(node):
   419  		return deconstructFuncOfMap(columnIndex, node)
   420  	default:
   421  		return deconstructFuncOfRequired(columnIndex, node)
   422  	}
   423  }
   424  
   425  //go:noinline
   426  func deconstructFuncOfOptional(columnIndex int16, node Node) (int16, deconstructFunc) {
   427  	columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node))
   428  	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
   429  		if value.IsValid() {
   430  			if value.IsZero() {
   431  				value = reflect.Value{}
   432  			} else {
   433  				if value.Kind() == reflect.Ptr {
   434  					value = value.Elem()
   435  				}
   436  				levels.definitionLevel++
   437  			}
   438  		}
   439  		deconstruct(columns, levels, value)
   440  	}
   441  }
   442  
   443  //go:noinline
   444  func deconstructFuncOfRepeated(columnIndex int16, node Node) (int16, deconstructFunc) {
   445  	columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node))
   446  	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
   447  		if value.Kind() == reflect.Interface {
   448  			value = value.Elem()
   449  		}
   450  
   451  		if !value.IsValid() || value.Len() == 0 {
   452  			deconstruct(columns, levels, reflect.Value{})
   453  			return
   454  		}
   455  
   456  		levels.repetitionDepth++
   457  		levels.definitionLevel++
   458  
   459  		for i, n := 0, value.Len(); i < n; i++ {
   460  			deconstruct(columns, levels, value.Index(i))
   461  			levels.repetitionLevel = levels.repetitionDepth
   462  		}
   463  	}
   464  }
   465  
   466  func deconstructFuncOfRequired(columnIndex int16, node Node) (int16, deconstructFunc) {
   467  	switch {
   468  	case node.Leaf():
   469  		return deconstructFuncOfLeaf(columnIndex, node)
   470  	default:
   471  		return deconstructFuncOfGroup(columnIndex, node)
   472  	}
   473  }
   474  
   475  func deconstructFuncOfList(columnIndex int16, node Node) (int16, deconstructFunc) {
   476  	return deconstructFuncOf(columnIndex, Repeated(listElementOf(node)))
   477  }
   478  
   479  //go:noinline
   480  func deconstructFuncOfMap(columnIndex int16, node Node) (int16, deconstructFunc) {
   481  	keyValue := mapKeyValueOf(node)
   482  	keyValueType := keyValue.GoType()
   483  	keyValueElem := keyValueType.Elem()
   484  	keyType := keyValueElem.Field(0).Type
   485  	valueType := keyValueElem.Field(1).Type
   486  	nextColumnIndex, deconstruct := deconstructFuncOf(columnIndex, schemaOf(keyValueElem))
   487  	return nextColumnIndex, func(columns [][]Value, levels levels, mapValue reflect.Value) {
   488  		if !mapValue.IsValid() || mapValue.Len() == 0 {
   489  			deconstruct(columns, levels, reflect.Value{})
   490  			return
   491  		}
   492  
   493  		levels.repetitionDepth++
   494  		levels.definitionLevel++
   495  
   496  		elem := reflect.New(keyValueElem).Elem()
   497  		k := elem.Field(0)
   498  		v := elem.Field(1)
   499  
   500  		for _, key := range mapValue.MapKeys() {
   501  			k.Set(key.Convert(keyType))
   502  			v.Set(mapValue.MapIndex(key).Convert(valueType))
   503  			deconstruct(columns, levels, elem)
   504  			levels.repetitionLevel = levels.repetitionDepth
   505  		}
   506  	}
   507  }
   508  
   509  //go:noinline
   510  func deconstructFuncOfGroup(columnIndex int16, node Node) (int16, deconstructFunc) {
   511  	fields := node.Fields()
   512  	funcs := make([]deconstructFunc, len(fields))
   513  	for i, field := range fields {
   514  		columnIndex, funcs[i] = deconstructFuncOf(columnIndex, field)
   515  	}
   516  	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
   517  		if value.IsValid() {
   518  			for i, f := range funcs {
   519  				f(columns, levels, fields[i].Value(value))
   520  			}
   521  		} else {
   522  			for _, f := range funcs {
   523  				f(columns, levels, value)
   524  			}
   525  		}
   526  	}
   527  }
   528  
   529  //go:noinline
   530  func deconstructFuncOfLeaf(columnIndex int16, node Node) (int16, deconstructFunc) {
   531  	if columnIndex > MaxColumnIndex {
   532  		panic("row cannot be deconstructed because it has more than 127 columns")
   533  	}
   534  	typ := node.Type()
   535  	kind := typ.Kind()
   536  	lt := typ.LogicalType()
   537  	valueColumnIndex := ^columnIndex
   538  	return columnIndex + 1, func(columns [][]Value, levels levels, value reflect.Value) {
   539  		v := Value{}
   540  
   541  		if value.IsValid() {
   542  			v = makeValue(kind, lt, value)
   543  		}
   544  
   545  		v.repetitionLevel = levels.repetitionLevel
   546  		v.definitionLevel = levels.definitionLevel
   547  		v.columnIndex = valueColumnIndex
   548  
   549  		columns[columnIndex] = append(columns[columnIndex], v)
   550  	}
   551  }
   552  
   553  // "reconstructX" turns a Go value into a Go representation of a Parquet series
   554  // of values
   555  
   556  type reconstructFunc func(reflect.Value, levels, [][]Value) error
   557  
   558  func reconstructFuncOf(columnIndex int16, node Node) (int16, reconstructFunc) {
   559  	switch {
   560  	case node.Optional():
   561  		return reconstructFuncOfOptional(columnIndex, node)
   562  	case node.Repeated():
   563  		return reconstructFuncOfRepeated(columnIndex, node)
   564  	case isList(node):
   565  		return reconstructFuncOfList(columnIndex, node)
   566  	case isMap(node):
   567  		return reconstructFuncOfMap(columnIndex, node)
   568  	default:
   569  		return reconstructFuncOfRequired(columnIndex, node)
   570  	}
   571  }
   572  
   573  //go:noinline
   574  func reconstructFuncOfOptional(columnIndex int16, node Node) (int16, reconstructFunc) {
   575  	// We convert the optional func to required so that we eventually reach the
   576  	// leaf base-case.  We're still using the heuristics of optional in the
   577  	// returned closure (see levels.definitionLevel++), but we don't actually do
   578  	// deserialization here, that happens in the leaf function, hence this line.
   579  	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node))
   580  
   581  	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   582  		levels.definitionLevel++
   583  
   584  		if columns[0][0].definitionLevel < levels.definitionLevel {
   585  			value.Set(reflect.Zero(value.Type()))
   586  			return nil
   587  		}
   588  
   589  		if value.Kind() == reflect.Ptr {
   590  			if value.IsNil() {
   591  				value.Set(reflect.New(value.Type().Elem()))
   592  			}
   593  			value = value.Elem()
   594  		}
   595  
   596  		return reconstruct(value, levels, columns)
   597  	}
   598  }
   599  
   600  func setMakeSlice(v reflect.Value, n int) reflect.Value {
   601  	t := v.Type()
   602  	if t.Kind() == reflect.Interface {
   603  		t = reflect.TypeOf(([]interface{})(nil))
   604  	}
   605  	s := reflect.MakeSlice(t, n, n)
   606  	v.Set(s)
   607  	return s
   608  }
   609  
   610  //go:noinline
   611  func reconstructFuncOfRepeated(columnIndex int16, node Node) (int16, reconstructFunc) {
   612  	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node))
   613  	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   614  		levels.repetitionDepth++
   615  		levels.definitionLevel++
   616  
   617  		if columns[0][0].definitionLevel < levels.definitionLevel {
   618  			setMakeSlice(value, 0)
   619  			return nil
   620  		}
   621  
   622  		values := make([][]Value, len(columns))
   623  		column := columns[0]
   624  		n := 0
   625  
   626  		for i, column := range columns {
   627  			values[i] = column[0:0:len(column)]
   628  		}
   629  
   630  		for i := 0; i < len(column); {
   631  			i++
   632  			n++
   633  
   634  			for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth {
   635  				i++
   636  			}
   637  		}
   638  
   639  		value = setMakeSlice(value, n)
   640  
   641  		for i := 0; i < n; i++ {
   642  			for j, column := range values {
   643  				column = column[:cap(column)]
   644  				if len(column) == 0 {
   645  					continue
   646  				}
   647  
   648  				k := 1
   649  				for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth {
   650  					k++
   651  				}
   652  
   653  				values[j] = column[:k]
   654  			}
   655  
   656  			if err := reconstruct(value.Index(i), levels, values); err != nil {
   657  				return err
   658  			}
   659  
   660  			for j, column := range values {
   661  				values[j] = column[len(column):len(column):cap(column)]
   662  			}
   663  
   664  			levels.repetitionLevel = levels.repetitionDepth
   665  		}
   666  
   667  		return nil
   668  	}
   669  }
   670  
   671  func reconstructFuncOfRequired(columnIndex int16, node Node) (int16, reconstructFunc) {
   672  	switch {
   673  	case node.Leaf():
   674  		return reconstructFuncOfLeaf(columnIndex, node)
   675  	default:
   676  		return reconstructFuncOfGroup(columnIndex, node)
   677  	}
   678  }
   679  
   680  func reconstructFuncOfList(columnIndex int16, node Node) (int16, reconstructFunc) {
   681  	return reconstructFuncOf(columnIndex, Repeated(listElementOf(node)))
   682  }
   683  
   684  //go:noinline
   685  func reconstructFuncOfMap(columnIndex int16, node Node) (int16, reconstructFunc) {
   686  	keyValue := mapKeyValueOf(node)
   687  	keyValueType := keyValue.GoType()
   688  	keyValueElem := keyValueType.Elem()
   689  	keyValueZero := reflect.Zero(keyValueElem)
   690  	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, schemaOf(keyValueElem))
   691  	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   692  		levels.repetitionDepth++
   693  		levels.definitionLevel++
   694  
   695  		if columns[0][0].definitionLevel < levels.definitionLevel {
   696  			value.Set(reflect.MakeMap(value.Type()))
   697  			return nil
   698  		}
   699  
   700  		values := make([][]Value, len(columns))
   701  		column := columns[0]
   702  		t := value.Type()
   703  		if t.Kind() == reflect.Interface {
   704  			t = reflect.TypeOf((map[string]any)(nil))
   705  		}
   706  		k := t.Key()
   707  		v := t.Elem()
   708  		n := 0
   709  
   710  		for i, column := range columns {
   711  			values[i] = column[0:0:len(column)]
   712  		}
   713  
   714  		for i := 0; i < len(column); {
   715  			i++
   716  			n++
   717  
   718  			for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth {
   719  				i++
   720  			}
   721  		}
   722  
   723  		if value.IsNil() {
   724  			m := reflect.MakeMapWithSize(t, n)
   725  			value.Set(m)
   726  			value = m // track map instead of interface{} for read[any]()
   727  		}
   728  
   729  		elem := reflect.New(keyValueElem).Elem()
   730  		for i := 0; i < n; i++ {
   731  			for j, column := range values {
   732  				column = column[:cap(column)]
   733  				k := 1
   734  
   735  				for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth {
   736  					k++
   737  				}
   738  
   739  				values[j] = column[:k]
   740  			}
   741  
   742  			if err := reconstruct(elem, levels, values); err != nil {
   743  				return err
   744  			}
   745  
   746  			for j, column := range values {
   747  				values[j] = column[len(column):len(column):cap(column)]
   748  			}
   749  
   750  			value.SetMapIndex(elem.Field(0).Convert(k), elem.Field(1).Convert(v))
   751  			elem.Set(keyValueZero)
   752  			levels.repetitionLevel = levels.repetitionDepth
   753  		}
   754  
   755  		return nil
   756  	}
   757  }
   758  
   759  //go:noinline
   760  func reconstructFuncOfGroup(columnIndex int16, node Node) (int16, reconstructFunc) {
   761  	fields := node.Fields()
   762  	funcs := make([]reconstructFunc, len(fields))
   763  	columnOffsets := make([]int16, len(fields))
   764  	firstColumnIndex := columnIndex
   765  
   766  	for i, field := range fields {
   767  		columnIndex, funcs[i] = reconstructFuncOf(columnIndex, field)
   768  		columnOffsets[i] = columnIndex - firstColumnIndex
   769  	}
   770  
   771  	return columnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   772  		if value.Kind() == reflect.Interface {
   773  			value.Set(reflect.MakeMap(reflect.TypeOf((map[string]interface{})(nil))))
   774  			value = value.Elem()
   775  		}
   776  
   777  		if value.Kind() == reflect.Map {
   778  			elemType := value.Type().Elem()
   779  			name := reflect.New(reflect.TypeOf("")).Elem()
   780  			elem := reflect.New(elemType).Elem()
   781  			zero := reflect.Zero(elemType)
   782  
   783  			if value.Len() > 0 {
   784  				value.Set(reflect.MakeMap(value.Type()))
   785  			}
   786  
   787  			off := int16(0)
   788  
   789  			for i, f := range funcs {
   790  				name.SetString(fields[i].Name())
   791  				end := columnOffsets[i]
   792  				err := f(elem, levels, columns[off:end:end])
   793  				if err != nil {
   794  					return fmt.Errorf("%s → %w", name, err)
   795  				}
   796  				off = end
   797  				value.SetMapIndex(name, elem)
   798  				elem.Set(zero)
   799  			}
   800  		} else {
   801  			off := int16(0)
   802  
   803  			for i, f := range funcs {
   804  				end := columnOffsets[i]
   805  				err := f(fields[i].Value(value), levels, columns[off:end:end])
   806  				if err != nil {
   807  					return fmt.Errorf("%s → %w", fields[i].Name(), err)
   808  				}
   809  				off = end
   810  			}
   811  		}
   812  
   813  		return nil
   814  	}
   815  }
   816  
   817  //go:noinline
   818  func reconstructFuncOfLeaf(columnIndex int16, node Node) (int16, reconstructFunc) {
   819  	typ := node.Type()
   820  	return columnIndex + 1, func(value reflect.Value, _ levels, columns [][]Value) error {
   821  		column := columns[0]
   822  		if len(column) == 0 {
   823  			return fmt.Errorf("no values found in parquet row for column %d", columnIndex)
   824  		}
   825  		return typ.AssignValue(value, column[0])
   826  	}
   827  }