github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_mapping.go (about) 1 package parquet 2 3 // LeafColumn is a struct type representing leaf columns of a parquet schema. 4 type LeafColumn struct { 5 Node Node 6 Path []string 7 ColumnIndex int 8 MaxRepetitionLevel int 9 MaxDefinitionLevel int 10 } 11 12 func columnMappingOf(schema Node) (mapping columnMappingGroup, columns [][]string) { 13 mapping = make(columnMappingGroup) 14 columns = make([][]string, 0, 16) 15 16 forEachLeafColumnOf(schema, func(leaf leafColumn) { 17 path := make(columnPath, len(leaf.path)) 18 copy(path, leaf.path) 19 columns = append(columns, path) 20 21 group := mapping 22 for len(path) > 1 { 23 columnName := path[0] 24 g, ok := group[columnName].(columnMappingGroup) 25 if !ok { 26 g = make(columnMappingGroup) 27 group[columnName] = g 28 } 29 group, path = g, path[1:] 30 } 31 32 leaf.path = path // use the copy 33 group[path[0]] = &columnMappingLeaf{column: leaf} 34 }) 35 36 return mapping, columns 37 } 38 39 type columnMapping interface { 40 lookup(path columnPath) leafColumn 41 } 42 43 type columnMappingGroup map[string]columnMapping 44 45 func (group columnMappingGroup) lookup(path columnPath) leafColumn { 46 if len(path) > 0 { 47 c, ok := group[path[0]] 48 if ok { 49 return c.lookup(path[1:]) 50 } 51 } 52 return leafColumn{columnIndex: -1} 53 } 54 55 func (group columnMappingGroup) lookupClosest(path columnPath) leafColumn { 56 for len(path) > 0 { 57 g, ok := group[path[0]].(columnMappingGroup) 58 if ok { 59 group, path = g, path[1:] 60 } else { 61 firstName := "" 62 firstLeaf := (*columnMappingLeaf)(nil) 63 for name, child := range group { 64 if leaf, ok := child.(*columnMappingLeaf); ok { 65 if firstLeaf == nil || name < firstName { 66 firstName, firstLeaf = name, leaf 67 } 68 } 69 } 70 if firstLeaf != nil { 71 return firstLeaf.column 72 } 73 break 74 } 75 } 76 return leafColumn{columnIndex: -1} 77 } 78 79 type columnMappingLeaf struct { 80 column leafColumn 81 } 82 83 func (leaf *columnMappingLeaf) lookup(path columnPath) leafColumn { 84 if len(path) == 0 { 85 return leaf.column 86 } 87 return leafColumn{columnIndex: -1} 88 }