github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_path.go (about)

     1  package parquet
     2  
     3  import (
     4  	"strings"
     5  )
     6  
     7  type columnPath []string
     8  
     9  func (path columnPath) append(names ...string) columnPath {
    10  	return append(path[:len(path):len(path)], names...)
    11  }
    12  
    13  func (path columnPath) equal(other columnPath) bool {
    14  	return stringsAreEqual(path, other)
    15  }
    16  
    17  func (path columnPath) less(other columnPath) bool {
    18  	return stringsAreOrdered(path, other)
    19  }
    20  
    21  func (path columnPath) String() string {
    22  	return strings.Join(path, ".")
    23  }
    24  
    25  func stringsAreEqual(strings1, strings2 []string) bool {
    26  	if len(strings1) != len(strings2) {
    27  		return false
    28  	}
    29  
    30  	for i := range strings1 {
    31  		if strings1[i] != strings2[i] {
    32  			return false
    33  		}
    34  	}
    35  
    36  	return true
    37  }
    38  
    39  func stringsAreOrdered(strings1, strings2 []string) bool {
    40  	n := len(strings1)
    41  
    42  	if n > len(strings2) {
    43  		n = len(strings2)
    44  	}
    45  
    46  	for i := 0; i < n; i++ {
    47  		if strings1[i] >= strings2[i] {
    48  			return false
    49  		}
    50  	}
    51  
    52  	return len(strings1) <= len(strings2)
    53  }
    54  
    55  type leafColumn struct {
    56  	node               Node
    57  	path               columnPath
    58  	maxRepetitionLevel byte
    59  	maxDefinitionLevel byte
    60  	columnIndex        int16
    61  }
    62  
    63  func forEachLeafColumnOf(node Node, do func(leafColumn)) {
    64  	forEachLeafColumn(node, nil, 0, 0, 0, do)
    65  }
    66  
    67  func forEachLeafColumn(node Node, path columnPath, columnIndex, maxRepetitionLevel, maxDefinitionLevel int, do func(leafColumn)) int {
    68  	switch {
    69  	case node.Optional():
    70  		maxDefinitionLevel++
    71  	case node.Repeated():
    72  		maxRepetitionLevel++
    73  		maxDefinitionLevel++
    74  	}
    75  
    76  	if node.Leaf() {
    77  		do(leafColumn{
    78  			node:               node,
    79  			path:               path,
    80  			maxRepetitionLevel: makeRepetitionLevel(maxRepetitionLevel),
    81  			maxDefinitionLevel: makeDefinitionLevel(maxDefinitionLevel),
    82  			columnIndex:        makeColumnIndex(columnIndex),
    83  		})
    84  		return columnIndex + 1
    85  	}
    86  
    87  	for _, field := range node.Fields() {
    88  		columnIndex = forEachLeafColumn(
    89  			field,
    90  			path.append(field.Name()),
    91  			columnIndex,
    92  			maxRepetitionLevel,
    93  			maxDefinitionLevel,
    94  			do,
    95  		)
    96  	}
    97  
    98  	return columnIndex
    99  }
   100  
   101  func lookupColumnPath(node Node, path columnPath) Node {
   102  	for node != nil && len(path) > 0 {
   103  		node = fieldByName(node, path[0])
   104  		path = path[1:]
   105  	}
   106  	return node
   107  }
   108  
   109  func hasColumnPath(node Node, path columnPath) bool {
   110  	return lookupColumnPath(node, path) != nil
   111  }