github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_path.go (about) 1 package parquet 2 3 import ( 4 "strings" 5 ) 6 7 type columnPath []string 8 9 func (path columnPath) append(names ...string) columnPath { 10 return append(path[:len(path):len(path)], names...) 11 } 12 13 func (path columnPath) equal(other columnPath) bool { 14 return stringsAreEqual(path, other) 15 } 16 17 func (path columnPath) less(other columnPath) bool { 18 return stringsAreOrdered(path, other) 19 } 20 21 func (path columnPath) String() string { 22 return strings.Join(path, ".") 23 } 24 25 func stringsAreEqual(strings1, strings2 []string) bool { 26 if len(strings1) != len(strings2) { 27 return false 28 } 29 30 for i := range strings1 { 31 if strings1[i] != strings2[i] { 32 return false 33 } 34 } 35 36 return true 37 } 38 39 func stringsAreOrdered(strings1, strings2 []string) bool { 40 n := len(strings1) 41 42 if n > len(strings2) { 43 n = len(strings2) 44 } 45 46 for i := 0; i < n; i++ { 47 if strings1[i] >= strings2[i] { 48 return false 49 } 50 } 51 52 return len(strings1) <= len(strings2) 53 } 54 55 type leafColumn struct { 56 node Node 57 path columnPath 58 maxRepetitionLevel byte 59 maxDefinitionLevel byte 60 columnIndex int16 61 } 62 63 func forEachLeafColumnOf(node Node, do func(leafColumn)) { 64 forEachLeafColumn(node, nil, 0, 0, 0, do) 65 } 66 67 func forEachLeafColumn(node Node, path columnPath, columnIndex, maxRepetitionLevel, maxDefinitionLevel int, do func(leafColumn)) int { 68 switch { 69 case node.Optional(): 70 maxDefinitionLevel++ 71 case node.Repeated(): 72 maxRepetitionLevel++ 73 maxDefinitionLevel++ 74 } 75 76 if node.Leaf() { 77 do(leafColumn{ 78 node: node, 79 path: path, 80 maxRepetitionLevel: makeRepetitionLevel(maxRepetitionLevel), 81 maxDefinitionLevel: makeDefinitionLevel(maxDefinitionLevel), 82 columnIndex: makeColumnIndex(columnIndex), 83 }) 84 return columnIndex + 1 85 } 86 87 for _, field := range node.Fields() { 88 columnIndex = forEachLeafColumn( 89 field, 90 path.append(field.Name()), 91 columnIndex, 92 maxRepetitionLevel, 93 maxDefinitionLevel, 94 do, 95 ) 96 } 97 98 return columnIndex 99 } 100 101 func lookupColumnPath(node Node, path columnPath) Node { 102 for node != nil && len(path) > 0 { 103 node = fieldByName(node, path[0]) 104 path = path[1:] 105 } 106 return node 107 } 108 109 func hasColumnPath(node Node, path columnPath) bool { 110 return lookupColumnPath(node, path) != nil 111 }