github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/page_header.go (about) 1 package parquet 2 3 import ( 4 "fmt" 5 6 "github.com/vc42/parquet-go/format" 7 ) 8 9 // PageHeader is an interface implemented by parquet page headers. 10 type PageHeader interface { 11 // Returns the number of values in the page (including nulls). 12 NumValues() int64 13 14 // Returns the page encoding. 15 Encoding() format.Encoding 16 17 // Returns the parquet format page type. 18 PageType() format.PageType 19 } 20 21 // DataPageHeader is a specialization of the PageHeader interface implemented by 22 // data pages. 23 type DataPageHeader interface { 24 PageHeader 25 26 // Returns the encoding of the repetition level section. 27 RepetitionLevelEncoding() format.Encoding 28 29 // Returns the encoding of the definition level section. 30 DefinitionLevelEncoding() format.Encoding 31 32 // Returns the number of null values in the page. 33 NullCount() int64 34 35 // Returns the minimum value in the page based on the ordering rules of the 36 // column's logical type. 37 // 38 // As an optimization, the method may return the same slice across multiple 39 // calls. Programs must treat the returned value as immutable to prevent 40 // unpredictable behaviors. 41 // 42 // If the page only contains only null values, an empty slice is returned. 43 MinValue() []byte 44 45 // Returns the maximum value in the page based on the ordering rules of the 46 // column's logical type. 47 // 48 // As an optimization, the method may return the same slice across multiple 49 // calls. Programs must treat the returned value as immutable to prevent 50 // unpredictable behaviors. 51 // 52 // If the page only contains only null values, an empty slice is returned. 53 MaxValue() []byte 54 } 55 56 // DictionaryPageHeader is an implementation of the PageHeader interface 57 // representing dictionary pages. 58 type DictionaryPageHeader struct { 59 header *format.DictionaryPageHeader 60 } 61 62 func (dict DictionaryPageHeader) NumValues() int64 { 63 return int64(dict.header.NumValues) 64 } 65 66 func (dict DictionaryPageHeader) Encoding() format.Encoding { 67 return dict.header.Encoding 68 } 69 70 func (dict DictionaryPageHeader) PageType() format.PageType { 71 return format.DictionaryPage 72 } 73 74 func (dict DictionaryPageHeader) IsSorted() bool { 75 return dict.header.IsSorted 76 } 77 78 func (dict DictionaryPageHeader) String() string { 79 return fmt.Sprintf("DICTIONARY_PAGE_HEADER{NumValues=%d,Encoding=%s,IsSorted=%t}", 80 dict.header.NumValues, 81 dict.header.Encoding, 82 dict.header.IsSorted) 83 } 84 85 // DataPageHeaderV1 is an implementation of the DataPageHeader interface 86 // representing data pages version 1. 87 type DataPageHeaderV1 struct { 88 header *format.DataPageHeader 89 } 90 91 func (v1 DataPageHeaderV1) NumValues() int64 { 92 return int64(v1.header.NumValues) 93 } 94 95 func (v1 DataPageHeaderV1) RepetitionLevelEncoding() format.Encoding { 96 return v1.header.RepetitionLevelEncoding 97 } 98 99 func (v1 DataPageHeaderV1) DefinitionLevelEncoding() format.Encoding { 100 return v1.header.DefinitionLevelEncoding 101 } 102 103 func (v1 DataPageHeaderV1) Encoding() format.Encoding { 104 return v1.header.Encoding 105 } 106 107 func (v1 DataPageHeaderV1) PageType() format.PageType { 108 return format.DataPage 109 } 110 111 func (v1 DataPageHeaderV1) NullCount() int64 { 112 return v1.header.Statistics.NullCount 113 } 114 115 func (v1 DataPageHeaderV1) MinValue() []byte { 116 return v1.header.Statistics.MinValue 117 } 118 119 func (v1 DataPageHeaderV1) MaxValue() []byte { 120 return v1.header.Statistics.MaxValue 121 } 122 123 func (v1 DataPageHeaderV1) String() string { 124 return fmt.Sprintf("DATA_PAGE_HEADER{NumValues=%d,Encoding=%s}", 125 v1.header.NumValues, 126 v1.header.Encoding) 127 } 128 129 // DataPageHeaderV2 is an implementation of the DataPageHeader interface 130 // representing data pages version 2. 131 type DataPageHeaderV2 struct { 132 header *format.DataPageHeaderV2 133 } 134 135 func (v2 DataPageHeaderV2) NumValues() int64 { 136 return int64(v2.header.NumValues) 137 } 138 139 func (v2 DataPageHeaderV2) NumNulls() int64 { 140 return int64(v2.header.NumNulls) 141 } 142 143 func (v2 DataPageHeaderV2) NumRows() int64 { 144 return int64(v2.header.NumRows) 145 } 146 147 func (v2 DataPageHeaderV2) RepetitionLevelsByteLength() int64 { 148 return int64(v2.header.RepetitionLevelsByteLength) 149 } 150 151 func (v2 DataPageHeaderV2) DefinitionLevelsByteLength() int64 { 152 return int64(v2.header.DefinitionLevelsByteLength) 153 } 154 155 func (v2 DataPageHeaderV2) RepetitionLevelEncoding() format.Encoding { 156 return format.RLE 157 } 158 159 func (v2 DataPageHeaderV2) DefinitionLevelEncoding() format.Encoding { 160 return format.RLE 161 } 162 163 func (v2 DataPageHeaderV2) Encoding() format.Encoding { 164 return v2.header.Encoding 165 } 166 167 func (v2 DataPageHeaderV2) PageType() format.PageType { 168 return format.DataPageV2 169 } 170 171 func (v2 DataPageHeaderV2) NullCount() int64 { 172 return v2.header.Statistics.NullCount 173 } 174 175 func (v2 DataPageHeaderV2) MinValue() []byte { 176 return v2.header.Statistics.MinValue 177 } 178 179 func (v2 DataPageHeaderV2) MaxValue() []byte { 180 return v2.header.Statistics.MaxValue 181 } 182 183 func (v2 DataPageHeaderV2) IsCompressed() bool { 184 return v2.header.IsCompressed == nil || *v2.header.IsCompressed 185 } 186 187 func (v2 DataPageHeaderV2) String() string { 188 return fmt.Sprintf("DATA_PAGE_HEADER_V2{NumValues=%d,NumNulls=%d,NumRows=%d,Encoding=%s,IsCompressed=%t}", 189 v2.header.NumValues, 190 v2.header.NumNulls, 191 v2.header.NumRows, 192 v2.header.Encoding, 193 v2.IsCompressed()) 194 } 195 196 type unknownPageHeader struct { 197 header *format.PageHeader 198 } 199 200 func (u unknownPageHeader) NumValues() int64 { 201 return 0 202 } 203 204 func (u unknownPageHeader) Encoding() format.Encoding { 205 return -1 206 } 207 208 func (u unknownPageHeader) PageType() format.PageType { 209 return u.header.Type 210 } 211 212 func (u unknownPageHeader) String() string { 213 return fmt.Sprintf("UNKNOWN_PAGE_HEADER{Type=%d}", u.header.Type) 214 } 215 216 var ( 217 _ PageHeader = DictionaryPageHeader{} 218 _ DataPageHeader = DataPageHeaderV1{} 219 _ DataPageHeader = DataPageHeaderV2{} 220 _ PageHeader = unknownPageHeader{} 221 )