github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/column_mapping.go (about)

     1  package parquet
     2  
     3  // LeafColumn is a struct type representing leaf columns of a parquet schema.
     4  type LeafColumn struct {
     5  	Node               Node
     6  	Path               []string
     7  	ColumnIndex        int
     8  	MaxRepetitionLevel int
     9  	MaxDefinitionLevel int
    10  }
    11  
    12  func columnMappingOf(schema Node) (mapping columnMappingGroup, columns [][]string) {
    13  	mapping = make(columnMappingGroup)
    14  	columns = make([][]string, 0, 16)
    15  
    16  	forEachLeafColumnOf(schema, func(leaf leafColumn) {
    17  		path := make(columnPath, len(leaf.path))
    18  		copy(path, leaf.path)
    19  		columns = append(columns, path)
    20  
    21  		group := mapping
    22  		for len(path) > 1 {
    23  			columnName := path[0]
    24  			g, ok := group[columnName].(columnMappingGroup)
    25  			if !ok {
    26  				g = make(columnMappingGroup)
    27  				group[columnName] = g
    28  			}
    29  			group, path = g, path[1:]
    30  		}
    31  
    32  		leaf.path = path // use the copy
    33  		group[path[0]] = &columnMappingLeaf{column: leaf}
    34  	})
    35  
    36  	return mapping, columns
    37  }
    38  
    39  type columnMapping interface {
    40  	lookup(path columnPath) leafColumn
    41  }
    42  
    43  type columnMappingGroup map[string]columnMapping
    44  
    45  func (group columnMappingGroup) lookup(path columnPath) leafColumn {
    46  	if len(path) > 0 {
    47  		c, ok := group[path[0]]
    48  		if ok {
    49  			return c.lookup(path[1:])
    50  		}
    51  	}
    52  	return leafColumn{columnIndex: -1}
    53  }
    54  
    55  type columnMappingLeaf struct {
    56  	column leafColumn
    57  }
    58  
    59  func (leaf *columnMappingLeaf) lookup(path columnPath) leafColumn {
    60  	if len(path) == 0 {
    61  		return leaf.column
    62  	}
    63  	return leafColumn{columnIndex: -1}
    64  }