github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/column_path.go (about)

     1  package parquet
     2  
     3  import "strings"
     4  
     5  type columnPath []string
     6  
     7  func (path columnPath) append(names ...string) columnPath {
     8  	return append(path[:len(path):len(path)], names...)
     9  }
    10  
    11  func (path columnPath) equal(other columnPath) bool {
    12  	return stringsAreEqual(path, other)
    13  }
    14  
    15  func (path columnPath) less(other columnPath) bool {
    16  	return stringsAreOrdered(path, other)
    17  }
    18  
    19  func (path columnPath) String() string {
    20  	return strings.Join(path, ".")
    21  }
    22  
    23  func stringsAreEqual(strings1, strings2 []string) bool {
    24  	if len(strings1) != len(strings2) {
    25  		return false
    26  	}
    27  
    28  	for i := range strings1 {
    29  		if strings1[i] != strings2[i] {
    30  			return false
    31  		}
    32  	}
    33  
    34  	return true
    35  }
    36  
    37  func stringsAreOrdered(strings1, strings2 []string) bool {
    38  	n := len(strings1)
    39  
    40  	if n > len(strings2) {
    41  		n = len(strings2)
    42  	}
    43  
    44  	for i := 0; i < n; i++ {
    45  		if strings1[i] >= strings2[i] {
    46  			return false
    47  		}
    48  	}
    49  
    50  	return len(strings1) <= len(strings2)
    51  }
    52  
    53  type leafColumn struct {
    54  	node               Node
    55  	path               columnPath
    56  	maxRepetitionLevel byte
    57  	maxDefinitionLevel byte
    58  	columnIndex        int16
    59  }
    60  
    61  func forEachLeafColumnOf(node Node, do func(leafColumn)) {
    62  	forEachLeafColumn(node, nil, 0, 0, 0, do)
    63  }
    64  
    65  func forEachLeafColumn(node Node, path columnPath, columnIndex, maxRepetitionLevel, maxDefinitionLevel int, do func(leafColumn)) int {
    66  	switch {
    67  	case node.Optional():
    68  		maxDefinitionLevel++
    69  	case node.Repeated():
    70  		maxRepetitionLevel++
    71  		maxDefinitionLevel++
    72  	}
    73  
    74  	if node.Leaf() {
    75  		do(leafColumn{
    76  			node:               node,
    77  			path:               path,
    78  			maxRepetitionLevel: makeRepetitionLevel(maxRepetitionLevel),
    79  			maxDefinitionLevel: makeDefinitionLevel(maxDefinitionLevel),
    80  			columnIndex:        makeColumnIndex(columnIndex),
    81  		})
    82  		return columnIndex + 1
    83  	}
    84  
    85  	for _, field := range node.Fields() {
    86  		columnIndex = forEachLeafColumn(
    87  			field,
    88  			path.append(field.Name()),
    89  			columnIndex,
    90  			maxRepetitionLevel,
    91  			maxDefinitionLevel,
    92  			do,
    93  		)
    94  	}
    95  
    96  	return columnIndex
    97  }
    98  
    99  func lookupColumnPath(node Node, path columnPath) Node {
   100  	for node != nil && len(path) > 0 {
   101  		node = childByName(node, path[0])
   102  		path = path[1:]
   103  	}
   104  	return node
   105  }
   106  
   107  func hasColumnPath(node Node, path columnPath) bool {
   108  	return lookupColumnPath(node, path) != nil
   109  }