github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/row.go (about)

     1  package parquet
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"reflect"
     8  )
     9  
    10  const (
    11  	defaultRowBufferSize = 42
    12  )
    13  
    14  // Row represents a parquet row as a slice of values.
    15  //
    16  // Each value should embed a column index, repetition level, and definition
    17  // level allowing the program to determine how to reconstruct the original
    18  // object from the row.
    19  type Row []Value
    20  
    21  // MakeRow constructs a Row from a list of column values.
    22  //
    23  // The function panics if the column indexes of values in each column do not
    24  // match their position in the argument list.
    25  func MakeRow(columns ...[]Value) Row { return AppendRow(nil, columns...) }
    26  
    27  // AppendRow appends to row the given list of column values.
    28  //
    29  // AppendRow can be used to construct a Row value from columns, while retaining
    30  // the underlying memory buffer to avoid reallocation; for example:
    31  //
    32  // The function panics if the column indexes of values in each column do not
    33  // match their position in the argument list.
    34  func AppendRow(row Row, columns ...[]Value) Row {
    35  	numValues := 0
    36  
    37  	for expectedColumnIndex, column := range columns {
    38  		numValues += len(column)
    39  
    40  		for _, value := range column {
    41  			if value.columnIndex != ^int16(expectedColumnIndex) {
    42  				panic(fmt.Sprintf("value of column %d has column index %d", expectedColumnIndex, value.Column()))
    43  			}
    44  		}
    45  	}
    46  
    47  	if capacity := cap(row) - len(row); capacity < numValues {
    48  		row = append(make(Row, 0, len(row)+numValues), row...)
    49  	}
    50  
    51  	return appendRow(row, columns)
    52  }
    53  
    54  func appendRow(row Row, columns [][]Value) Row {
    55  	for _, column := range columns {
    56  		row = append(row, column...)
    57  	}
    58  	return row
    59  }
    60  
    61  // Clone creates a copy of the row which shares no pointers.
    62  //
    63  // This method is useful to capture rows after a call to RowReader.ReadRows when
    64  // values need to be retained before the next call to ReadRows or after the lifespan
    65  // of the reader.
    66  func (row Row) Clone() Row {
    67  	clone := make(Row, len(row))
    68  	for i := range row {
    69  		clone[i] = row[i].Clone()
    70  	}
    71  	return clone
    72  }
    73  
    74  // Equal returns true if row and other contain the same sequence of values.
    75  func (row Row) Equal(other Row) bool {
    76  	if len(row) != len(other) {
    77  		return false
    78  	}
    79  	for i := range row {
    80  		if !Equal(row[i], other[i]) {
    81  			return false
    82  		}
    83  		if row[i].repetitionLevel != other[i].repetitionLevel {
    84  			return false
    85  		}
    86  		if row[i].definitionLevel != other[i].definitionLevel {
    87  			return false
    88  		}
    89  		if row[i].columnIndex != other[i].columnIndex {
    90  			return false
    91  		}
    92  	}
    93  	return true
    94  }
    95  
    96  // Range calls f for each column of row.
    97  func (row Row) Range(f func(columnIndex int, columnValues []Value) bool) {
    98  	columnIndex := 0
    99  
   100  	for i := 0; i < len(row); {
   101  		j := i + 1
   102  
   103  		for j < len(row) && row[j].columnIndex == ^int16(columnIndex) {
   104  			j++
   105  		}
   106  
   107  		if !f(columnIndex, row[i:j:j]) {
   108  			break
   109  		}
   110  
   111  		columnIndex++
   112  		i = j
   113  	}
   114  }
   115  
   116  // RowSeeker is an interface implemented by readers of parquet rows which can be
   117  // positioned at a specific row index.
   118  type RowSeeker interface {
   119  	// Positions the stream on the given row index.
   120  	//
   121  	// Some implementations of the interface may only allow seeking forward.
   122  	//
   123  	// The method returns io.ErrClosedPipe if the stream had already been closed.
   124  	SeekToRow(int64) error
   125  }
   126  
   127  // RowReader reads a sequence of parquet rows.
   128  type RowReader interface {
   129  	// ReadRows reads rows from the reader, returning the number of rows read
   130  	// into the buffer, and any error that occurred. Note that the rows read
   131  	// into the buffer are not safe for reuse after a subsequent call to
   132  	// ReadRows. Callers that want to reuse rows must copy the rows using Clone.
   133  	//
   134  	// When all rows have been read, the reader returns io.EOF to indicate the
   135  	// end of the sequence. It is valid for the reader to return both a non-zero
   136  	// number of rows and a non-nil error (including io.EOF).
   137  	//
   138  	// The buffer of rows passed as argument will be used to store values of
   139  	// each row read from the reader. If the rows are not nil, the backing array
   140  	// of the slices will be used as an optimization to avoid re-allocating new
   141  	// arrays.
   142  	//
   143  	// The application is expected to handle the case where ReadRows returns
   144  	// less rows than requested and no error, by looking at the first returned
   145  	// value from ReadRows, which is the number of rows that were read.
   146  	ReadRows([]Row) (int, error)
   147  }
   148  
   149  // RowReaderFrom reads parquet rows from reader.
   150  type RowReaderFrom interface {
   151  	ReadRowsFrom(RowReader) (int64, error)
   152  }
   153  
   154  // RowReaderWithSchema is an extension of the RowReader interface which
   155  // advertises the schema of rows returned by ReadRow calls.
   156  type RowReaderWithSchema interface {
   157  	RowReader
   158  	Schema() *Schema
   159  }
   160  
   161  // RowReadSeeker is an interface implemented by row readers which support
   162  // seeking to arbitrary row positions.
   163  type RowReadSeeker interface {
   164  	RowReader
   165  	RowSeeker
   166  }
   167  
   168  // RowWriter writes parquet rows to an underlying medium.
   169  type RowWriter interface {
   170  	// Writes rows to the writer, returning the number of rows written and any
   171  	// error that occurred.
   172  	//
   173  	// Because columnar operations operate on independent columns of values,
   174  	// writes of rows may not be atomic operations, and could result in some
   175  	// rows being partially written. The method returns the number of rows that
   176  	// were successfully written, but if an error occurs, values of the row(s)
   177  	// that failed to be written may have been partially committed to their
   178  	// columns. For that reason, applications should consider a write error as
   179  	// fatal and assume that they need to discard the state, they cannot retry
   180  	// the write nor recover the underlying file.
   181  	WriteRows([]Row) (int, error)
   182  }
   183  
   184  // RowWriterTo writes parquet rows to a writer.
   185  type RowWriterTo interface {
   186  	WriteRowsTo(RowWriter) (int64, error)
   187  }
   188  
   189  // RowWriterWithSchema is an extension of the RowWriter interface which
   190  // advertises the schema of rows expected to be passed to WriteRow calls.
   191  type RowWriterWithSchema interface {
   192  	RowWriter
   193  	Schema() *Schema
   194  }
   195  
   196  // RowReaderFunc is a function type implementing the RowReader interface.
   197  type RowReaderFunc func([]Row) (int, error)
   198  
   199  func (f RowReaderFunc) ReadRows(rows []Row) (int, error) { return f(rows) }
   200  
   201  // RowWriterFunc is a function type implementing the RowWriter interface.
   202  type RowWriterFunc func([]Row) (int, error)
   203  
   204  func (f RowWriterFunc) WriteRows(rows []Row) (int, error) { return f(rows) }
   205  
   206  // MultiRowWriter constructs a RowWriter which dispatches writes to all the
   207  // writers passed as arguments.
   208  //
   209  // When writing rows, if any of the writers returns an error, the operation is
   210  // aborted and the error returned. If one of the writers did not error, but did
   211  // not write all the rows, the operation is aborted and io.ErrShortWrite is
   212  // returned.
   213  //
   214  // Rows are written sequentially to each writer in the order they are given to
   215  // this function.
   216  func MultiRowWriter(writers ...RowWriter) RowWriter {
   217  	m := &multiRowWriter{writers: make([]RowWriter, len(writers))}
   218  	copy(m.writers, writers)
   219  	return m
   220  }
   221  
   222  type multiRowWriter struct{ writers []RowWriter }
   223  
   224  func (m *multiRowWriter) WriteRows(rows []Row) (int, error) {
   225  	for _, w := range m.writers {
   226  		n, err := w.WriteRows(rows)
   227  		if err != nil {
   228  			return n, err
   229  		}
   230  		if n != len(rows) {
   231  			return n, io.ErrShortWrite
   232  		}
   233  	}
   234  	return len(rows), nil
   235  }
   236  
   237  type forwardRowSeeker struct {
   238  	rows  RowReader
   239  	seek  int64
   240  	index int64
   241  }
   242  
   243  func (r *forwardRowSeeker) ReadRows(rows []Row) (int, error) {
   244  	for {
   245  		n, err := r.rows.ReadRows(rows)
   246  
   247  		if n > 0 && r.index < r.seek {
   248  			skip := r.seek - r.index
   249  			r.index += int64(n)
   250  			if skip >= int64(n) {
   251  				continue
   252  			}
   253  
   254  			for i, j := 0, int(skip); j < n; i++ {
   255  				rows[i] = append(rows[i][:0], rows[j]...)
   256  			}
   257  
   258  			n -= int(skip)
   259  		}
   260  
   261  		return n, err
   262  	}
   263  }
   264  
   265  func (r *forwardRowSeeker) SeekToRow(rowIndex int64) error {
   266  	if rowIndex >= r.index {
   267  		r.seek = rowIndex
   268  		return nil
   269  	}
   270  	return fmt.Errorf(
   271  		"SeekToRow: %T does not implement parquet.RowSeeker: cannot seek backward from row %d to %d",
   272  		r.rows,
   273  		r.index,
   274  		rowIndex,
   275  	)
   276  }
   277  
   278  // CopyRows copies rows from src to dst.
   279  //
   280  // The underlying types of src and dst are tested to determine if they expose
   281  // information about the schema of rows that are read and expected to be
   282  // written. If the schema information are available but do not match, the
   283  // function will attempt to automatically convert the rows from the source
   284  // schema to the destination.
   285  //
   286  // As an optimization, the src argument may implement RowWriterTo to bypass
   287  // the default row copy logic and provide its own. The dst argument may also
   288  // implement RowReaderFrom for the same purpose.
   289  //
   290  // The function returns the number of rows written, or any error encountered
   291  // other than io.EOF.
   292  func CopyRows(dst RowWriter, src RowReader) (int64, error) {
   293  	return copyRows(dst, src, nil)
   294  }
   295  
   296  func copyRows(dst RowWriter, src RowReader, buf []Row) (written int64, err error) {
   297  	targetSchema := targetSchemaOf(dst)
   298  	sourceSchema := sourceSchemaOf(src)
   299  
   300  	if targetSchema != nil && sourceSchema != nil {
   301  		if !nodesAreEqual(targetSchema, sourceSchema) {
   302  			conv, err := Convert(targetSchema, sourceSchema)
   303  			if err != nil {
   304  				return 0, err
   305  			}
   306  			// The conversion effectively disables a potential optimization
   307  			// if the source reader implemented RowWriterTo. It is a trade off
   308  			// we are making to optimize for safety rather than performance.
   309  			//
   310  			// Entering this code path should not be the common case tho, it is
   311  			// most often used when parquet schemas are evolving, but we expect
   312  			// that the majority of files of an application to be sharing a
   313  			// common schema.
   314  			src = ConvertRowReader(src, conv)
   315  		}
   316  	}
   317  
   318  	if wt, ok := src.(RowWriterTo); ok {
   319  		return wt.WriteRowsTo(dst)
   320  	}
   321  
   322  	if rf, ok := dst.(RowReaderFrom); ok {
   323  		return rf.ReadRowsFrom(src)
   324  	}
   325  
   326  	if len(buf) == 0 {
   327  		buf = make([]Row, defaultRowBufferSize)
   328  	}
   329  
   330  	defer clearRows(buf)
   331  
   332  	for {
   333  		rn, err := src.ReadRows(buf)
   334  
   335  		if rn > 0 {
   336  			wn, err := dst.WriteRows(buf[:rn])
   337  			if err != nil {
   338  				return written, err
   339  			}
   340  
   341  			written += int64(wn)
   342  		}
   343  
   344  		if err != nil {
   345  			if errors.Is(err, io.EOF) {
   346  				err = nil
   347  			}
   348  			return written, err
   349  		}
   350  
   351  		if rn == 0 {
   352  			return written, io.ErrNoProgress
   353  		}
   354  	}
   355  }
   356  
   357  func makeRows(n int) []Row {
   358  	buf := make([]Value, n)
   359  	row := make([]Row, n)
   360  	for i := range row {
   361  		row[i] = buf[i : i : i+1]
   362  	}
   363  	return row
   364  }
   365  
   366  func clearRows(rows []Row) {
   367  	for i, values := range rows {
   368  		clearValues(values)
   369  		rows[i] = values[:0]
   370  	}
   371  }
   372  
   373  func sourceSchemaOf(r RowReader) *Schema {
   374  	if rrs, ok := r.(RowReaderWithSchema); ok {
   375  		return rrs.Schema()
   376  	}
   377  	return nil
   378  }
   379  
   380  func targetSchemaOf(w RowWriter) *Schema {
   381  	if rws, ok := w.(RowWriterWithSchema); ok {
   382  		return rws.Schema()
   383  	}
   384  	return nil
   385  }
   386  
   387  // =============================================================================
   388  // Functions returning closures are marked with "go:noinline" below to prevent
   389  // losing naming information of the closure in stack traces.
   390  //
   391  // Because some of the functions are very short (simply return a closure), the
   392  // compiler inlines when at their call site, which result in the closure being
   393  // named something like parquet.deconstructFuncOf.func2 instead of the original
   394  // parquet.deconstructFuncOfLeaf.func1; the latter being much more meaningful
   395  // when reading CPU or memory profiles.
   396  // =============================================================================
   397  
   398  type levels struct {
   399  	repetitionDepth byte
   400  	repetitionLevel byte
   401  	definitionLevel byte
   402  }
   403  
   404  // deconstructFunc accepts a row, the current levels, the value to deserialize
   405  // the current column onto, and returns the row minus the deserialied value(s)
   406  // It recurses until it hits a leaf node, then deserializes that value
   407  // individually as the base case.
   408  type deconstructFunc func([][]Value, levels, reflect.Value)
   409  
   410  func deconstructFuncOf(columnIndex int16, node Node) (int16, deconstructFunc) {
   411  	switch {
   412  	case node.Optional():
   413  		return deconstructFuncOfOptional(columnIndex, node)
   414  	case node.Repeated():
   415  		return deconstructFuncOfRepeated(columnIndex, node)
   416  	case isList(node):
   417  		return deconstructFuncOfList(columnIndex, node)
   418  	case isMap(node):
   419  		return deconstructFuncOfMap(columnIndex, node)
   420  	default:
   421  		return deconstructFuncOfRequired(columnIndex, node)
   422  	}
   423  }
   424  
   425  //go:noinline
   426  func deconstructFuncOfOptional(columnIndex int16, node Node) (int16, deconstructFunc) {
   427  	columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node))
   428  	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
   429  		if value.IsValid() {
   430  			if value.IsZero() {
   431  				value = reflect.Value{}
   432  			} else {
   433  				if value.Kind() == reflect.Ptr {
   434  					value = value.Elem()
   435  				}
   436  				levels.definitionLevel++
   437  			}
   438  		}
   439  		deconstruct(columns, levels, value)
   440  	}
   441  }
   442  
   443  //go:noinline
   444  func deconstructFuncOfRepeated(columnIndex int16, node Node) (int16, deconstructFunc) {
   445  	columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node))
   446  	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
   447  		if !value.IsValid() || value.Len() == 0 {
   448  			deconstruct(columns, levels, reflect.Value{})
   449  			return
   450  		}
   451  
   452  		levels.repetitionDepth++
   453  		levels.definitionLevel++
   454  
   455  		for i, n := 0, value.Len(); i < n; i++ {
   456  			deconstruct(columns, levels, value.Index(i))
   457  			levels.repetitionLevel = levels.repetitionDepth
   458  		}
   459  	}
   460  }
   461  
   462  func deconstructFuncOfRequired(columnIndex int16, node Node) (int16, deconstructFunc) {
   463  	switch {
   464  	case node.Leaf():
   465  		return deconstructFuncOfLeaf(columnIndex, node)
   466  	default:
   467  		return deconstructFuncOfGroup(columnIndex, node)
   468  	}
   469  }
   470  
   471  func deconstructFuncOfList(columnIndex int16, node Node) (int16, deconstructFunc) {
   472  	return deconstructFuncOf(columnIndex, Repeated(listElementOf(node)))
   473  }
   474  
   475  //go:noinline
   476  func deconstructFuncOfMap(columnIndex int16, node Node) (int16, deconstructFunc) {
   477  	keyValue := mapKeyValueOf(node)
   478  	keyValueType := keyValue.GoType()
   479  	keyValueElem := keyValueType.Elem()
   480  	keyType := keyValueElem.Field(0).Type
   481  	valueType := keyValueElem.Field(1).Type
   482  	nextColumnIndex, deconstruct := deconstructFuncOf(columnIndex, schemaOf(keyValueElem))
   483  	return nextColumnIndex, func(columns [][]Value, levels levels, mapValue reflect.Value) {
   484  		if !mapValue.IsValid() || mapValue.Len() == 0 {
   485  			deconstruct(columns, levels, reflect.Value{})
   486  			return
   487  		}
   488  
   489  		levels.repetitionDepth++
   490  		levels.definitionLevel++
   491  
   492  		elem := reflect.New(keyValueElem).Elem()
   493  		k := elem.Field(0)
   494  		v := elem.Field(1)
   495  
   496  		for _, key := range mapValue.MapKeys() {
   497  			k.Set(key.Convert(keyType))
   498  			v.Set(mapValue.MapIndex(key).Convert(valueType))
   499  			deconstruct(columns, levels, elem)
   500  			levels.repetitionLevel = levels.repetitionDepth
   501  		}
   502  	}
   503  }
   504  
   505  //go:noinline
   506  func deconstructFuncOfGroup(columnIndex int16, node Node) (int16, deconstructFunc) {
   507  	fields := node.Fields()
   508  	funcs := make([]deconstructFunc, len(fields))
   509  	for i, field := range fields {
   510  		columnIndex, funcs[i] = deconstructFuncOf(columnIndex, field)
   511  	}
   512  	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
   513  		if value.IsValid() {
   514  			for i, f := range funcs {
   515  				f(columns, levels, fields[i].Value(value))
   516  			}
   517  		} else {
   518  			for _, f := range funcs {
   519  				f(columns, levels, value)
   520  			}
   521  		}
   522  	}
   523  }
   524  
   525  //go:noinline
   526  func deconstructFuncOfLeaf(columnIndex int16, node Node) (int16, deconstructFunc) {
   527  	if columnIndex > MaxColumnIndex {
   528  		panic("row cannot be deconstructed because it has more than 127 columns")
   529  	}
   530  	typ := node.Type()
   531  	kind := typ.Kind()
   532  	lt := typ.LogicalType()
   533  	valueColumnIndex := ^columnIndex
   534  	return columnIndex + 1, func(columns [][]Value, levels levels, value reflect.Value) {
   535  		v := Value{}
   536  
   537  		if value.IsValid() {
   538  			v = makeValue(kind, lt, value)
   539  		}
   540  
   541  		v.repetitionLevel = levels.repetitionLevel
   542  		v.definitionLevel = levels.definitionLevel
   543  		v.columnIndex = valueColumnIndex
   544  
   545  		columns[columnIndex] = append(columns[columnIndex], v)
   546  	}
   547  }
   548  
   549  // "reconstructX" turns a Go value into a Go representation of a Parquet series
   550  // of values
   551  
   552  type reconstructFunc func(reflect.Value, levels, [][]Value) error
   553  
   554  func reconstructFuncOf(columnIndex int16, node Node) (int16, reconstructFunc) {
   555  	switch {
   556  	case node.Optional():
   557  		return reconstructFuncOfOptional(columnIndex, node)
   558  	case node.Repeated():
   559  		return reconstructFuncOfRepeated(columnIndex, node)
   560  	case isList(node):
   561  		return reconstructFuncOfList(columnIndex, node)
   562  	case isMap(node):
   563  		return reconstructFuncOfMap(columnIndex, node)
   564  	default:
   565  		return reconstructFuncOfRequired(columnIndex, node)
   566  	}
   567  }
   568  
   569  //go:noinline
   570  func reconstructFuncOfOptional(columnIndex int16, node Node) (int16, reconstructFunc) {
   571  	// We convert the optional func to required so that we eventually reach the
   572  	// leaf base-case.  We're still using the heuristics of optional in the
   573  	// returned closure (see levels.definitionLevel++), but we don't actually do
   574  	// deserialization here, that happens in the leaf function, hence this line.
   575  	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node))
   576  
   577  	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   578  		levels.definitionLevel++
   579  
   580  		if columns[0][0].definitionLevel < levels.definitionLevel {
   581  			value.Set(reflect.Zero(value.Type()))
   582  			return nil
   583  		}
   584  
   585  		if value.Kind() == reflect.Ptr {
   586  			if value.IsNil() {
   587  				value.Set(reflect.New(value.Type().Elem()))
   588  			}
   589  			value = value.Elem()
   590  		}
   591  
   592  		return reconstruct(value, levels, columns)
   593  	}
   594  }
   595  
   596  func setMakeSlice(v reflect.Value, n int) reflect.Value {
   597  	t := v.Type()
   598  	if t.Kind() == reflect.Interface {
   599  		t = reflect.TypeOf(([]interface{})(nil))
   600  	}
   601  	s := reflect.MakeSlice(t, n, n)
   602  	v.Set(s)
   603  	return s
   604  }
   605  
   606  //go:noinline
   607  func reconstructFuncOfRepeated(columnIndex int16, node Node) (int16, reconstructFunc) {
   608  	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node))
   609  	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   610  		levels.repetitionDepth++
   611  		levels.definitionLevel++
   612  
   613  		if columns[0][0].definitionLevel < levels.definitionLevel {
   614  			setMakeSlice(value, 0)
   615  			return nil
   616  		}
   617  
   618  		values := make([][]Value, len(columns))
   619  		column := columns[0]
   620  		n := 0
   621  
   622  		for i, column := range columns {
   623  			values[i] = column[0:0:len(column)]
   624  		}
   625  
   626  		for i := 0; i < len(column); {
   627  			i++
   628  			n++
   629  
   630  			for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth {
   631  				i++
   632  			}
   633  		}
   634  
   635  		value = setMakeSlice(value, n)
   636  
   637  		for i := 0; i < n; i++ {
   638  			for j, column := range values {
   639  				column = column[:cap(column)]
   640  				if len(column) == 0 {
   641  					continue
   642  				}
   643  
   644  				k := 1
   645  				for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth {
   646  					k++
   647  				}
   648  
   649  				values[j] = column[:k]
   650  			}
   651  
   652  			if err := reconstruct(value.Index(i), levels, values); err != nil {
   653  				return err
   654  			}
   655  
   656  			for j, column := range values {
   657  				values[j] = column[len(column):len(column):cap(column)]
   658  			}
   659  
   660  			levels.repetitionLevel = levels.repetitionDepth
   661  		}
   662  
   663  		return nil
   664  	}
   665  }
   666  
   667  func reconstructFuncOfRequired(columnIndex int16, node Node) (int16, reconstructFunc) {
   668  	switch {
   669  	case node.Leaf():
   670  		return reconstructFuncOfLeaf(columnIndex, node)
   671  	default:
   672  		return reconstructFuncOfGroup(columnIndex, node)
   673  	}
   674  }
   675  
   676  func reconstructFuncOfList(columnIndex int16, node Node) (int16, reconstructFunc) {
   677  	return reconstructFuncOf(columnIndex, Repeated(listElementOf(node)))
   678  }
   679  
   680  //go:noinline
   681  func reconstructFuncOfMap(columnIndex int16, node Node) (int16, reconstructFunc) {
   682  	keyValue := mapKeyValueOf(node)
   683  	keyValueType := keyValue.GoType()
   684  	keyValueElem := keyValueType.Elem()
   685  	keyValueZero := reflect.Zero(keyValueElem)
   686  	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, schemaOf(keyValueElem))
   687  	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   688  		levels.repetitionDepth++
   689  		levels.definitionLevel++
   690  
   691  		if columns[0][0].definitionLevel < levels.definitionLevel {
   692  			value.Set(reflect.MakeMap(value.Type()))
   693  			return nil
   694  		}
   695  
   696  		values := make([][]Value, len(columns))
   697  		column := columns[0]
   698  		t := value.Type()
   699  		k := t.Key()
   700  		v := t.Elem()
   701  		n := 0
   702  
   703  		for i, column := range columns {
   704  			values[i] = column[0:0:len(column)]
   705  		}
   706  
   707  		for i := 0; i < len(column); {
   708  			i++
   709  			n++
   710  
   711  			for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth {
   712  				i++
   713  			}
   714  		}
   715  
   716  		if value.IsNil() {
   717  			value.Set(reflect.MakeMapWithSize(t, n))
   718  		}
   719  
   720  		elem := reflect.New(keyValueElem).Elem()
   721  		for i := 0; i < n; i++ {
   722  			for j, column := range values {
   723  				column = column[:cap(column)]
   724  				k := 1
   725  
   726  				for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth {
   727  					k++
   728  				}
   729  
   730  				values[j] = column[:k]
   731  			}
   732  
   733  			if err := reconstruct(elem, levels, values); err != nil {
   734  				return err
   735  			}
   736  
   737  			for j, column := range values {
   738  				values[j] = column[len(column):len(column):cap(column)]
   739  			}
   740  
   741  			value.SetMapIndex(elem.Field(0).Convert(k), elem.Field(1).Convert(v))
   742  			elem.Set(keyValueZero)
   743  			levels.repetitionLevel = levels.repetitionDepth
   744  		}
   745  
   746  		return nil
   747  	}
   748  }
   749  
   750  //go:noinline
   751  func reconstructFuncOfGroup(columnIndex int16, node Node) (int16, reconstructFunc) {
   752  	fields := node.Fields()
   753  	funcs := make([]reconstructFunc, len(fields))
   754  	columnOffsets := make([]int16, len(fields))
   755  	firstColumnIndex := columnIndex
   756  
   757  	for i, field := range fields {
   758  		columnIndex, funcs[i] = reconstructFuncOf(columnIndex, field)
   759  		columnOffsets[i] = columnIndex - firstColumnIndex
   760  	}
   761  
   762  	return columnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
   763  		if value.Kind() == reflect.Interface {
   764  			value.Set(reflect.MakeMap(reflect.TypeOf((map[string]interface{})(nil))))
   765  			value = value.Elem()
   766  		}
   767  
   768  		if value.Kind() == reflect.Map {
   769  			elemType := value.Type().Elem()
   770  			name := reflect.New(reflect.TypeOf("")).Elem()
   771  			elem := reflect.New(elemType).Elem()
   772  			zero := reflect.Zero(elemType)
   773  
   774  			if value.Len() > 0 {
   775  				value.Set(reflect.MakeMap(value.Type()))
   776  			}
   777  
   778  			off := int16(0)
   779  
   780  			for i, f := range funcs {
   781  				name.SetString(fields[i].Name())
   782  				end := columnOffsets[i]
   783  				err := f(elem, levels, columns[off:end:end])
   784  				if err != nil {
   785  					return fmt.Errorf("%s → %w", name, err)
   786  				}
   787  				off = end
   788  				value.SetMapIndex(name, elem)
   789  				elem.Set(zero)
   790  			}
   791  		} else {
   792  			off := int16(0)
   793  
   794  			for i, f := range funcs {
   795  				end := columnOffsets[i]
   796  				err := f(fields[i].Value(value), levels, columns[off:end:end])
   797  				if err != nil {
   798  					return fmt.Errorf("%s → %w", fields[i].Name(), err)
   799  				}
   800  				off = end
   801  			}
   802  		}
   803  
   804  		return nil
   805  	}
   806  }
   807  
   808  //go:noinline
   809  func reconstructFuncOfLeaf(columnIndex int16, node Node) (int16, reconstructFunc) {
   810  	typ := node.Type()
   811  	return columnIndex + 1, func(value reflect.Value, _ levels, columns [][]Value) error {
   812  		column := columns[0]
   813  		if len(column) == 0 {
   814  			return fmt.Errorf("no values found in parquet row for column %d", columnIndex)
   815  		}
   816  		return typ.AssignValue(value, column[0])
   817  	}
   818  }