github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_mapping.go (about)

     1  package parquet
     2  
     3  // LeafColumn is a struct type representing leaf columns of a parquet schema.
     4  type LeafColumn struct {
     5  	Node               Node
     6  	Path               []string
     7  	ColumnIndex        int
     8  	MaxRepetitionLevel int
     9  	MaxDefinitionLevel int
    10  }
    11  
    12  func columnMappingOf(schema Node) (mapping columnMappingGroup, columns [][]string) {
    13  	mapping = make(columnMappingGroup)
    14  	columns = make([][]string, 0, 16)
    15  
    16  	forEachLeafColumnOf(schema, func(leaf leafColumn) {
    17  		path := make(columnPath, len(leaf.path))
    18  		copy(path, leaf.path)
    19  		columns = append(columns, path)
    20  
    21  		group := mapping
    22  		for len(path) > 1 {
    23  			columnName := path[0]
    24  			g, ok := group[columnName].(columnMappingGroup)
    25  			if !ok {
    26  				g = make(columnMappingGroup)
    27  				group[columnName] = g
    28  			}
    29  			group, path = g, path[1:]
    30  		}
    31  
    32  		leaf.path = path // use the copy
    33  		group[path[0]] = &columnMappingLeaf{column: leaf}
    34  	})
    35  
    36  	return mapping, columns
    37  }
    38  
    39  type columnMapping interface {
    40  	lookup(path columnPath) leafColumn
    41  }
    42  
    43  type columnMappingGroup map[string]columnMapping
    44  
    45  func (group columnMappingGroup) lookup(path columnPath) leafColumn {
    46  	if len(path) > 0 {
    47  		c, ok := group[path[0]]
    48  		if ok {
    49  			return c.lookup(path[1:])
    50  		}
    51  	}
    52  	return leafColumn{columnIndex: -1}
    53  }
    54  
    55  func (group columnMappingGroup) lookupClosest(path columnPath) leafColumn {
    56  	for len(path) > 0 {
    57  		g, ok := group[path[0]].(columnMappingGroup)
    58  		if ok {
    59  			group, path = g, path[1:]
    60  		} else {
    61  			firstName := ""
    62  			firstLeaf := (*columnMappingLeaf)(nil)
    63  			for name, child := range group {
    64  				if leaf, ok := child.(*columnMappingLeaf); ok {
    65  					if firstLeaf == nil || name < firstName {
    66  						firstName, firstLeaf = name, leaf
    67  					}
    68  				}
    69  			}
    70  			if firstLeaf != nil {
    71  				return firstLeaf.column
    72  			}
    73  			break
    74  		}
    75  	}
    76  	return leafColumn{columnIndex: -1}
    77  }
    78  
    79  type columnMappingLeaf struct {
    80  	column leafColumn
    81  }
    82  
    83  func (leaf *columnMappingLeaf) lookup(path columnPath) leafColumn {
    84  	if len(path) == 0 {
    85  		return leaf.column
    86  	}
    87  	return leafColumn{columnIndex: -1}
    88  }