github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/page_values.go (about)

     1  package parquet
     2  
     3  import (
     4  	"io"
     5  
     6  	"github.com/parquet-go/parquet-go/deprecated"
     7  	"github.com/parquet-go/parquet-go/encoding/plain"
     8  	"github.com/parquet-go/parquet-go/internal/unsafecast"
     9  )
    10  
    11  type optionalPageValues struct {
    12  	page   *optionalPage
    13  	values ValueReader
    14  	offset int
    15  }
    16  
    17  func (r *optionalPageValues) ReadValues(values []Value) (n int, err error) {
    18  	maxDefinitionLevel := r.page.maxDefinitionLevel
    19  	definitionLevels := r.page.definitionLevels
    20  	columnIndex := ^int16(r.page.Column())
    21  
    22  	for n < len(values) && r.offset < len(definitionLevels) {
    23  		for n < len(values) && r.offset < len(definitionLevels) && definitionLevels[r.offset] != maxDefinitionLevel {
    24  			values[n] = Value{
    25  				definitionLevel: definitionLevels[r.offset],
    26  				columnIndex:     columnIndex,
    27  			}
    28  			r.offset++
    29  			n++
    30  		}
    31  
    32  		i := n
    33  		j := r.offset
    34  		for i < len(values) && j < len(definitionLevels) && definitionLevels[j] == maxDefinitionLevel {
    35  			i++
    36  			j++
    37  		}
    38  
    39  		if n < i {
    40  			for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- {
    41  				values[n].definitionLevel = maxDefinitionLevel
    42  				r.offset++
    43  				n++
    44  			}
    45  			// Do not return on an io.EOF here as we may still have null values to read.
    46  			if err != nil && err != io.EOF {
    47  				return n, err
    48  			}
    49  			err = nil
    50  		}
    51  	}
    52  
    53  	if r.offset == len(definitionLevels) {
    54  		err = io.EOF
    55  	}
    56  	return n, err
    57  }
    58  
    59  type repeatedPageValues struct {
    60  	page   *repeatedPage
    61  	values ValueReader
    62  	offset int
    63  }
    64  
    65  func (r *repeatedPageValues) ReadValues(values []Value) (n int, err error) {
    66  	maxDefinitionLevel := r.page.maxDefinitionLevel
    67  	definitionLevels := r.page.definitionLevels
    68  	repetitionLevels := r.page.repetitionLevels
    69  	columnIndex := ^int16(r.page.Column())
    70  
    71  	// While we haven't exceeded the output buffer and we haven't exceeded the page size.
    72  	for n < len(values) && r.offset < len(definitionLevels) {
    73  
    74  		// While we haven't exceeded the output buffer and we haven't exceeded the
    75  		// page size AND the current element's definitionLevel is not the
    76  		// maxDefinitionLevel (this is a null value), Create the zero values to be
    77  		// returned in this run.
    78  		for n < len(values) && r.offset < len(definitionLevels) && definitionLevels[r.offset] != maxDefinitionLevel {
    79  			values[n] = Value{
    80  				repetitionLevel: repetitionLevels[r.offset],
    81  				definitionLevel: definitionLevels[r.offset],
    82  				columnIndex:     columnIndex,
    83  			}
    84  			r.offset++
    85  			n++
    86  		}
    87  
    88  		i := n
    89  		j := r.offset
    90  		// Get the length of the run of non-zero values to be copied.
    91  		for i < len(values) && j < len(definitionLevels) && definitionLevels[j] == maxDefinitionLevel {
    92  			i++
    93  			j++
    94  		}
    95  
    96  		// Copy all the non-zero values in this run.
    97  		if n < i {
    98  			for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- {
    99  				values[n].repetitionLevel = repetitionLevels[r.offset]
   100  				values[n].definitionLevel = maxDefinitionLevel
   101  				r.offset++
   102  				n++
   103  			}
   104  			if err != nil && err != io.EOF {
   105  				return n, err
   106  			}
   107  			err = nil
   108  		}
   109  	}
   110  
   111  	if r.offset == len(definitionLevels) {
   112  		err = io.EOF
   113  	}
   114  	return n, err
   115  }
   116  
   117  type booleanPageValues struct {
   118  	page   *booleanPage
   119  	offset int
   120  }
   121  
   122  func (r *booleanPageValues) ReadBooleans(values []bool) (n int, err error) {
   123  	for n < len(values) && r.offset < int(r.page.numValues) {
   124  		values[n] = r.page.valueAt(r.offset)
   125  		r.offset++
   126  		n++
   127  	}
   128  	if r.offset == int(r.page.numValues) {
   129  		err = io.EOF
   130  	}
   131  	return n, err
   132  }
   133  
   134  func (r *booleanPageValues) ReadValues(values []Value) (n int, err error) {
   135  	for n < len(values) && r.offset < int(r.page.numValues) {
   136  		values[n] = r.page.makeValue(r.page.valueAt(r.offset))
   137  		r.offset++
   138  		n++
   139  	}
   140  	if r.offset == int(r.page.numValues) {
   141  		err = io.EOF
   142  	}
   143  	return n, err
   144  }
   145  
   146  type int32PageValues struct {
   147  	page   *int32Page
   148  	offset int
   149  }
   150  
   151  func (r *int32PageValues) Read(b []byte) (n int, err error) {
   152  	n, err = r.ReadInt32s(unsafecast.BytesToInt32(b))
   153  	return 4 * n, err
   154  }
   155  
   156  func (r *int32PageValues) ReadInt32s(values []int32) (n int, err error) {
   157  	n = copy(values, r.page.values[r.offset:])
   158  	r.offset += n
   159  	if r.offset == len(r.page.values) {
   160  		err = io.EOF
   161  	}
   162  	return n, err
   163  }
   164  
   165  func (r *int32PageValues) ReadValues(values []Value) (n int, err error) {
   166  	for n < len(values) && r.offset < len(r.page.values) {
   167  		values[n] = r.page.makeValue(r.page.values[r.offset])
   168  		r.offset++
   169  		n++
   170  	}
   171  	if r.offset == len(r.page.values) {
   172  		err = io.EOF
   173  	}
   174  	return n, err
   175  }
   176  
   177  type int64PageValues struct {
   178  	page   *int64Page
   179  	offset int
   180  }
   181  
   182  func (r *int64PageValues) Read(b []byte) (n int, err error) {
   183  	n, err = r.ReadInt64s(unsafecast.BytesToInt64(b))
   184  	return 8 * n, err
   185  }
   186  
   187  func (r *int64PageValues) ReadInt64s(values []int64) (n int, err error) {
   188  	n = copy(values, r.page.values[r.offset:])
   189  	r.offset += n
   190  	if r.offset == len(r.page.values) {
   191  		err = io.EOF
   192  	}
   193  	return n, err
   194  }
   195  
   196  func (r *int64PageValues) ReadValues(values []Value) (n int, err error) {
   197  	for n < len(values) && r.offset < len(r.page.values) {
   198  		values[n] = r.page.makeValue(r.page.values[r.offset])
   199  		r.offset++
   200  		n++
   201  	}
   202  	if r.offset == len(r.page.values) {
   203  		err = io.EOF
   204  	}
   205  	return n, err
   206  }
   207  
   208  type int96PageValues struct {
   209  	page   *int96Page
   210  	offset int
   211  }
   212  
   213  func (r *int96PageValues) Read(b []byte) (n int, err error) {
   214  	n, err = r.ReadInt96s(deprecated.BytesToInt96(b))
   215  	return 12 * n, err
   216  }
   217  
   218  func (r *int96PageValues) ReadInt96s(values []deprecated.Int96) (n int, err error) {
   219  	n = copy(values, r.page.values[r.offset:])
   220  	r.offset += n
   221  	if r.offset == len(r.page.values) {
   222  		err = io.EOF
   223  	}
   224  	return n, err
   225  }
   226  
   227  func (r *int96PageValues) ReadValues(values []Value) (n int, err error) {
   228  	for n < len(values) && r.offset < len(r.page.values) {
   229  		values[n] = r.page.makeValue(r.page.values[r.offset])
   230  		r.offset++
   231  		n++
   232  	}
   233  	if r.offset == len(r.page.values) {
   234  		err = io.EOF
   235  	}
   236  	return n, err
   237  }
   238  
   239  type floatPageValues struct {
   240  	page   *floatPage
   241  	offset int
   242  }
   243  
   244  func (r *floatPageValues) Read(b []byte) (n int, err error) {
   245  	n, err = r.ReadFloats(unsafecast.BytesToFloat32(b))
   246  	return 4 * n, err
   247  }
   248  
   249  func (r *floatPageValues) ReadFloats(values []float32) (n int, err error) {
   250  	n = copy(values, r.page.values[r.offset:])
   251  	r.offset += n
   252  	if r.offset == len(r.page.values) {
   253  		err = io.EOF
   254  	}
   255  	return n, err
   256  }
   257  
   258  func (r *floatPageValues) ReadValues(values []Value) (n int, err error) {
   259  	for n < len(values) && r.offset < len(r.page.values) {
   260  		values[n] = r.page.makeValue(r.page.values[r.offset])
   261  		r.offset++
   262  		n++
   263  	}
   264  	if r.offset == len(r.page.values) {
   265  		err = io.EOF
   266  	}
   267  	return n, err
   268  }
   269  
   270  type doublePageValues struct {
   271  	page   *doublePage
   272  	offset int
   273  }
   274  
   275  func (r *doublePageValues) Read(b []byte) (n int, err error) {
   276  	n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b))
   277  	return 8 * n, err
   278  }
   279  
   280  func (r *doublePageValues) ReadDoubles(values []float64) (n int, err error) {
   281  	n = copy(values, r.page.values[r.offset:])
   282  	r.offset += n
   283  	if r.offset == len(r.page.values) {
   284  		err = io.EOF
   285  	}
   286  	return n, err
   287  }
   288  
   289  func (r *doublePageValues) ReadValues(values []Value) (n int, err error) {
   290  	for n < len(values) && r.offset < len(r.page.values) {
   291  		values[n] = r.page.makeValue(r.page.values[r.offset])
   292  		r.offset++
   293  		n++
   294  	}
   295  	if r.offset == len(r.page.values) {
   296  		err = io.EOF
   297  	}
   298  	return n, err
   299  }
   300  
   301  type byteArrayPageValues struct {
   302  	page   *byteArrayPage
   303  	offset int
   304  }
   305  
   306  func (r *byteArrayPageValues) Read(b []byte) (int, error) {
   307  	_, n, err := r.readByteArrays(b)
   308  	return n, err
   309  }
   310  
   311  func (r *byteArrayPageValues) ReadRequired(values []byte) (int, error) {
   312  	return r.ReadByteArrays(values)
   313  }
   314  
   315  func (r *byteArrayPageValues) ReadByteArrays(values []byte) (int, error) {
   316  	n, _, err := r.readByteArrays(values)
   317  	return n, err
   318  }
   319  
   320  func (r *byteArrayPageValues) readByteArrays(values []byte) (c, n int, err error) {
   321  	numValues := r.page.len()
   322  	for r.offset < numValues {
   323  		b := r.page.index(r.offset)
   324  		k := plain.ByteArrayLengthSize + len(b)
   325  		if k > (len(values) - n) {
   326  			break
   327  		}
   328  		plain.PutByteArrayLength(values[n:], len(b))
   329  		n += plain.ByteArrayLengthSize
   330  		n += copy(values[n:], b)
   331  		r.offset++
   332  		c++
   333  	}
   334  	if r.offset == numValues {
   335  		err = io.EOF
   336  	} else if n == 0 && len(values) > 0 {
   337  		err = io.ErrShortBuffer
   338  	}
   339  	return c, n, err
   340  }
   341  
   342  func (r *byteArrayPageValues) ReadValues(values []Value) (n int, err error) {
   343  	numValues := r.page.len()
   344  	for n < len(values) && r.offset < numValues {
   345  		values[n] = r.page.makeValueBytes(r.page.index(r.offset))
   346  		r.offset++
   347  		n++
   348  	}
   349  	if r.offset == numValues {
   350  		err = io.EOF
   351  	}
   352  	return n, err
   353  }
   354  
   355  type fixedLenByteArrayPageValues struct {
   356  	page   *fixedLenByteArrayPage
   357  	offset int
   358  }
   359  
   360  func (r *fixedLenByteArrayPageValues) Read(b []byte) (n int, err error) {
   361  	n, err = r.ReadFixedLenByteArrays(b)
   362  	return n * r.page.size, err
   363  }
   364  
   365  func (r *fixedLenByteArrayPageValues) ReadRequired(values []byte) (int, error) {
   366  	return r.ReadFixedLenByteArrays(values)
   367  }
   368  
   369  func (r *fixedLenByteArrayPageValues) ReadFixedLenByteArrays(values []byte) (n int, err error) {
   370  	n = copy(values, r.page.data[r.offset:]) / r.page.size
   371  	r.offset += n * r.page.size
   372  	if r.offset == len(r.page.data) {
   373  		err = io.EOF
   374  	} else if n == 0 && len(values) > 0 {
   375  		err = io.ErrShortBuffer
   376  	}
   377  	return n, err
   378  }
   379  
   380  func (r *fixedLenByteArrayPageValues) ReadValues(values []Value) (n int, err error) {
   381  	for n < len(values) && r.offset < len(r.page.data) {
   382  		values[n] = r.page.makeValueBytes(r.page.data[r.offset : r.offset+r.page.size])
   383  		r.offset += r.page.size
   384  		n++
   385  	}
   386  	if r.offset == len(r.page.data) {
   387  		err = io.EOF
   388  	}
   389  	return n, err
   390  }
   391  
   392  type uint32PageValues struct {
   393  	page   *uint32Page
   394  	offset int
   395  }
   396  
   397  func (r *uint32PageValues) Read(b []byte) (n int, err error) {
   398  	n, err = r.ReadUint32s(unsafecast.BytesToUint32(b))
   399  	return 4 * n, err
   400  }
   401  
   402  func (r *uint32PageValues) ReadUint32s(values []uint32) (n int, err error) {
   403  	n = copy(values, r.page.values[r.offset:])
   404  	r.offset += n
   405  	if r.offset == len(r.page.values) {
   406  		err = io.EOF
   407  	}
   408  	return n, err
   409  }
   410  
   411  func (r *uint32PageValues) ReadValues(values []Value) (n int, err error) {
   412  	for n < len(values) && r.offset < len(r.page.values) {
   413  		values[n] = r.page.makeValue(r.page.values[r.offset])
   414  		r.offset++
   415  		n++
   416  	}
   417  	if r.offset == len(r.page.values) {
   418  		err = io.EOF
   419  	}
   420  	return n, err
   421  }
   422  
   423  type uint64PageValues struct {
   424  	page   *uint64Page
   425  	offset int
   426  }
   427  
   428  func (r *uint64PageValues) Read(b []byte) (n int, err error) {
   429  	n, err = r.ReadUint64s(unsafecast.BytesToUint64(b))
   430  	return 8 * n, err
   431  }
   432  
   433  func (r *uint64PageValues) ReadUint64s(values []uint64) (n int, err error) {
   434  	n = copy(values, r.page.values[r.offset:])
   435  	r.offset += n
   436  	if r.offset == len(r.page.values) {
   437  		err = io.EOF
   438  	}
   439  	return n, err
   440  }
   441  
   442  func (r *uint64PageValues) ReadValues(values []Value) (n int, err error) {
   443  	for n < len(values) && r.offset < len(r.page.values) {
   444  		values[n] = r.page.makeValue(r.page.values[r.offset])
   445  		r.offset++
   446  		n++
   447  	}
   448  	if r.offset == len(r.page.values) {
   449  		err = io.EOF
   450  	}
   451  	return n, err
   452  }
   453  
   454  type be128PageValues struct {
   455  	page   *be128Page
   456  	offset int
   457  }
   458  
   459  func (r *be128PageValues) ReadValues(values []Value) (n int, err error) {
   460  	for n < len(values) && r.offset < len(r.page.values) {
   461  		values[n] = r.page.makeValue(&r.page.values[r.offset])
   462  		r.offset++
   463  		n++
   464  	}
   465  	if r.offset == len(r.page.values) {
   466  		err = io.EOF
   467  	}
   468  	return n, err
   469  }
   470  
   471  type nullPageValues struct {
   472  	column int
   473  	remain int
   474  }
   475  
   476  func (r *nullPageValues) ReadValues(values []Value) (n int, err error) {
   477  	columnIndex := ^int16(r.column)
   478  	values = values[:min(r.remain, len(values))]
   479  	for i := range values {
   480  		values[i] = Value{columnIndex: columnIndex}
   481  	}
   482  	r.remain -= len(values)
   483  	if r.remain == 0 {
   484  		err = io.EOF
   485  	}
   486  	return len(values), err
   487  }