github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/page_header.go (about)

     1  package parquet
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/segmentio/parquet-go/format"
     7  )
     8  
     9  // PageHeader is an interface implemented by parquet page headers.
    10  type PageHeader interface {
    11  	// Returns the number of values in the page (including nulls).
    12  	NumValues() int64
    13  
    14  	// Returns the page encoding.
    15  	Encoding() format.Encoding
    16  
    17  	// Returns the parquet format page type.
    18  	PageType() format.PageType
    19  }
    20  
    21  // DataPageHeader is a specialization of the PageHeader interface implemented by
    22  // data pages.
    23  type DataPageHeader interface {
    24  	PageHeader
    25  
    26  	// Returns the encoding of the repetition level section.
    27  	RepetitionLevelEncoding() format.Encoding
    28  
    29  	// Returns the encoding of the definition level section.
    30  	DefinitionLevelEncoding() format.Encoding
    31  
    32  	// Returns the number of null values in the page.
    33  	NullCount() int64
    34  
    35  	// Returns the minimum value in the page based on the ordering rules of the
    36  	// column's logical type.
    37  	//
    38  	// As an optimization, the method may return the same slice across multiple
    39  	// calls. Programs must treat the returned value as immutable to prevent
    40  	// unpredictable behaviors.
    41  	//
    42  	// If the page only contains only null values, an empty slice is returned.
    43  	MinValue() []byte
    44  
    45  	// Returns the maximum value in the page based on the ordering rules of the
    46  	// column's logical type.
    47  	//
    48  	// As an optimization, the method may return the same slice across multiple
    49  	// calls. Programs must treat the returned value as immutable to prevent
    50  	// unpredictable behaviors.
    51  	//
    52  	// If the page only contains only null values, an empty slice is returned.
    53  	MaxValue() []byte
    54  }
    55  
    56  // DictionaryPageHeader is an implementation of the PageHeader interface
    57  // representing dictionary pages.
    58  type DictionaryPageHeader struct {
    59  	header *format.DictionaryPageHeader
    60  }
    61  
    62  func (dict DictionaryPageHeader) NumValues() int64 {
    63  	return int64(dict.header.NumValues)
    64  }
    65  
    66  func (dict DictionaryPageHeader) Encoding() format.Encoding {
    67  	return dict.header.Encoding
    68  }
    69  
    70  func (dict DictionaryPageHeader) PageType() format.PageType {
    71  	return format.DictionaryPage
    72  }
    73  
    74  func (dict DictionaryPageHeader) IsSorted() bool {
    75  	return dict.header.IsSorted
    76  }
    77  
    78  func (dict DictionaryPageHeader) String() string {
    79  	return fmt.Sprintf("DICTIONARY_PAGE_HEADER{NumValues=%d,Encoding=%s,IsSorted=%t}",
    80  		dict.header.NumValues,
    81  		dict.header.Encoding,
    82  		dict.header.IsSorted)
    83  }
    84  
    85  // DataPageHeaderV1 is an implementation of the DataPageHeader interface
    86  // representing data pages version 1.
    87  type DataPageHeaderV1 struct {
    88  	header *format.DataPageHeader
    89  }
    90  
    91  func (v1 DataPageHeaderV1) NumValues() int64 {
    92  	return int64(v1.header.NumValues)
    93  }
    94  
    95  func (v1 DataPageHeaderV1) RepetitionLevelEncoding() format.Encoding {
    96  	return v1.header.RepetitionLevelEncoding
    97  }
    98  
    99  func (v1 DataPageHeaderV1) DefinitionLevelEncoding() format.Encoding {
   100  	return v1.header.DefinitionLevelEncoding
   101  }
   102  
   103  func (v1 DataPageHeaderV1) Encoding() format.Encoding {
   104  	return v1.header.Encoding
   105  }
   106  
   107  func (v1 DataPageHeaderV1) PageType() format.PageType {
   108  	return format.DataPage
   109  }
   110  
   111  func (v1 DataPageHeaderV1) NullCount() int64 {
   112  	return v1.header.Statistics.NullCount
   113  }
   114  
   115  func (v1 DataPageHeaderV1) MinValue() []byte {
   116  	return v1.header.Statistics.MinValue
   117  }
   118  
   119  func (v1 DataPageHeaderV1) MaxValue() []byte {
   120  	return v1.header.Statistics.MaxValue
   121  }
   122  
   123  func (v1 DataPageHeaderV1) String() string {
   124  	return fmt.Sprintf("DATA_PAGE_HEADER{NumValues=%d,Encoding=%s}",
   125  		v1.header.NumValues,
   126  		v1.header.Encoding)
   127  }
   128  
   129  // DataPageHeaderV2 is an implementation of the DataPageHeader interface
   130  // representing data pages version 2.
   131  type DataPageHeaderV2 struct {
   132  	header *format.DataPageHeaderV2
   133  }
   134  
   135  func (v2 DataPageHeaderV2) NumValues() int64 {
   136  	return int64(v2.header.NumValues)
   137  }
   138  
   139  func (v2 DataPageHeaderV2) NumNulls() int64 {
   140  	return int64(v2.header.NumNulls)
   141  }
   142  
   143  func (v2 DataPageHeaderV2) NumRows() int64 {
   144  	return int64(v2.header.NumRows)
   145  }
   146  
   147  func (v2 DataPageHeaderV2) RepetitionLevelsByteLength() int64 {
   148  	return int64(v2.header.RepetitionLevelsByteLength)
   149  }
   150  
   151  func (v2 DataPageHeaderV2) DefinitionLevelsByteLength() int64 {
   152  	return int64(v2.header.DefinitionLevelsByteLength)
   153  }
   154  
   155  func (v2 DataPageHeaderV2) RepetitionLevelEncoding() format.Encoding {
   156  	return format.RLE
   157  }
   158  
   159  func (v2 DataPageHeaderV2) DefinitionLevelEncoding() format.Encoding {
   160  	return format.RLE
   161  }
   162  
   163  func (v2 DataPageHeaderV2) Encoding() format.Encoding {
   164  	return v2.header.Encoding
   165  }
   166  
   167  func (v2 DataPageHeaderV2) PageType() format.PageType {
   168  	return format.DataPageV2
   169  }
   170  
   171  func (v2 DataPageHeaderV2) NullCount() int64 {
   172  	return v2.header.Statistics.NullCount
   173  }
   174  
   175  func (v2 DataPageHeaderV2) MinValue() []byte {
   176  	return v2.header.Statistics.MinValue
   177  }
   178  
   179  func (v2 DataPageHeaderV2) MaxValue() []byte {
   180  	return v2.header.Statistics.MaxValue
   181  }
   182  
   183  func (v2 DataPageHeaderV2) IsCompressed() bool {
   184  	return v2.header.IsCompressed == nil || *v2.header.IsCompressed
   185  }
   186  
   187  func (v2 DataPageHeaderV2) String() string {
   188  	return fmt.Sprintf("DATA_PAGE_HEADER_V2{NumValues=%d,NumNulls=%d,NumRows=%d,Encoding=%s,IsCompressed=%t}",
   189  		v2.header.NumValues,
   190  		v2.header.NumNulls,
   191  		v2.header.NumRows,
   192  		v2.header.Encoding,
   193  		v2.IsCompressed())
   194  }
   195  
   196  type unknownPageHeader struct {
   197  	header *format.PageHeader
   198  }
   199  
   200  func (u unknownPageHeader) NumValues() int64 {
   201  	return 0
   202  }
   203  
   204  func (u unknownPageHeader) Encoding() format.Encoding {
   205  	return -1
   206  }
   207  
   208  func (u unknownPageHeader) PageType() format.PageType {
   209  	return u.header.Type
   210  }
   211  
   212  func (u unknownPageHeader) String() string {
   213  	return fmt.Sprintf("UNKNOWN_PAGE_HEADER{Type=%d}", u.header.Type)
   214  }
   215  
   216  var (
   217  	_ PageHeader     = DictionaryPageHeader{}
   218  	_ DataPageHeader = DataPageHeaderV1{}
   219  	_ DataPageHeader = DataPageHeaderV2{}
   220  	_ PageHeader     = unknownPageHeader{}
   221  )