github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/page_values.go (about) 1 package parquet 2 3 import ( 4 "io" 5 6 "github.com/parquet-go/parquet-go/deprecated" 7 "github.com/parquet-go/parquet-go/encoding/plain" 8 "github.com/parquet-go/parquet-go/internal/unsafecast" 9 ) 10 11 type optionalPageValues struct { 12 page *optionalPage 13 values ValueReader 14 offset int 15 } 16 17 func (r *optionalPageValues) ReadValues(values []Value) (n int, err error) { 18 maxDefinitionLevel := r.page.maxDefinitionLevel 19 definitionLevels := r.page.definitionLevels 20 columnIndex := ^int16(r.page.Column()) 21 22 for n < len(values) && r.offset < len(definitionLevels) { 23 for n < len(values) && r.offset < len(definitionLevels) && definitionLevels[r.offset] != maxDefinitionLevel { 24 values[n] = Value{ 25 definitionLevel: definitionLevels[r.offset], 26 columnIndex: columnIndex, 27 } 28 r.offset++ 29 n++ 30 } 31 32 i := n 33 j := r.offset 34 for i < len(values) && j < len(definitionLevels) && definitionLevels[j] == maxDefinitionLevel { 35 i++ 36 j++ 37 } 38 39 if n < i { 40 for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- { 41 values[n].definitionLevel = maxDefinitionLevel 42 r.offset++ 43 n++ 44 } 45 // Do not return on an io.EOF here as we may still have null values to read. 46 if err != nil && err != io.EOF { 47 return n, err 48 } 49 err = nil 50 } 51 } 52 53 if r.offset == len(definitionLevels) { 54 err = io.EOF 55 } 56 return n, err 57 } 58 59 type repeatedPageValues struct { 60 page *repeatedPage 61 values ValueReader 62 offset int 63 } 64 65 func (r *repeatedPageValues) ReadValues(values []Value) (n int, err error) { 66 maxDefinitionLevel := r.page.maxDefinitionLevel 67 definitionLevels := r.page.definitionLevels 68 repetitionLevels := r.page.repetitionLevels 69 columnIndex := ^int16(r.page.Column()) 70 71 // While we haven't exceeded the output buffer and we haven't exceeded the page size. 72 for n < len(values) && r.offset < len(definitionLevels) { 73 74 // While we haven't exceeded the output buffer and we haven't exceeded the 75 // page size AND the current element's definitionLevel is not the 76 // maxDefinitionLevel (this is a null value), Create the zero values to be 77 // returned in this run. 78 for n < len(values) && r.offset < len(definitionLevels) && definitionLevels[r.offset] != maxDefinitionLevel { 79 values[n] = Value{ 80 repetitionLevel: repetitionLevels[r.offset], 81 definitionLevel: definitionLevels[r.offset], 82 columnIndex: columnIndex, 83 } 84 r.offset++ 85 n++ 86 } 87 88 i := n 89 j := r.offset 90 // Get the length of the run of non-zero values to be copied. 91 for i < len(values) && j < len(definitionLevels) && definitionLevels[j] == maxDefinitionLevel { 92 i++ 93 j++ 94 } 95 96 // Copy all the non-zero values in this run. 97 if n < i { 98 for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- { 99 values[n].repetitionLevel = repetitionLevels[r.offset] 100 values[n].definitionLevel = maxDefinitionLevel 101 r.offset++ 102 n++ 103 } 104 if err != nil && err != io.EOF { 105 return n, err 106 } 107 err = nil 108 } 109 } 110 111 if r.offset == len(definitionLevels) { 112 err = io.EOF 113 } 114 return n, err 115 } 116 117 type booleanPageValues struct { 118 page *booleanPage 119 offset int 120 } 121 122 func (r *booleanPageValues) ReadBooleans(values []bool) (n int, err error) { 123 for n < len(values) && r.offset < int(r.page.numValues) { 124 values[n] = r.page.valueAt(r.offset) 125 r.offset++ 126 n++ 127 } 128 if r.offset == int(r.page.numValues) { 129 err = io.EOF 130 } 131 return n, err 132 } 133 134 func (r *booleanPageValues) ReadValues(values []Value) (n int, err error) { 135 for n < len(values) && r.offset < int(r.page.numValues) { 136 values[n] = r.page.makeValue(r.page.valueAt(r.offset)) 137 r.offset++ 138 n++ 139 } 140 if r.offset == int(r.page.numValues) { 141 err = io.EOF 142 } 143 return n, err 144 } 145 146 type int32PageValues struct { 147 page *int32Page 148 offset int 149 } 150 151 func (r *int32PageValues) Read(b []byte) (n int, err error) { 152 n, err = r.ReadInt32s(unsafecast.BytesToInt32(b)) 153 return 4 * n, err 154 } 155 156 func (r *int32PageValues) ReadInt32s(values []int32) (n int, err error) { 157 n = copy(values, r.page.values[r.offset:]) 158 r.offset += n 159 if r.offset == len(r.page.values) { 160 err = io.EOF 161 } 162 return n, err 163 } 164 165 func (r *int32PageValues) ReadValues(values []Value) (n int, err error) { 166 for n < len(values) && r.offset < len(r.page.values) { 167 values[n] = r.page.makeValue(r.page.values[r.offset]) 168 r.offset++ 169 n++ 170 } 171 if r.offset == len(r.page.values) { 172 err = io.EOF 173 } 174 return n, err 175 } 176 177 type int64PageValues struct { 178 page *int64Page 179 offset int 180 } 181 182 func (r *int64PageValues) Read(b []byte) (n int, err error) { 183 n, err = r.ReadInt64s(unsafecast.BytesToInt64(b)) 184 return 8 * n, err 185 } 186 187 func (r *int64PageValues) ReadInt64s(values []int64) (n int, err error) { 188 n = copy(values, r.page.values[r.offset:]) 189 r.offset += n 190 if r.offset == len(r.page.values) { 191 err = io.EOF 192 } 193 return n, err 194 } 195 196 func (r *int64PageValues) ReadValues(values []Value) (n int, err error) { 197 for n < len(values) && r.offset < len(r.page.values) { 198 values[n] = r.page.makeValue(r.page.values[r.offset]) 199 r.offset++ 200 n++ 201 } 202 if r.offset == len(r.page.values) { 203 err = io.EOF 204 } 205 return n, err 206 } 207 208 type int96PageValues struct { 209 page *int96Page 210 offset int 211 } 212 213 func (r *int96PageValues) Read(b []byte) (n int, err error) { 214 n, err = r.ReadInt96s(deprecated.BytesToInt96(b)) 215 return 12 * n, err 216 } 217 218 func (r *int96PageValues) ReadInt96s(values []deprecated.Int96) (n int, err error) { 219 n = copy(values, r.page.values[r.offset:]) 220 r.offset += n 221 if r.offset == len(r.page.values) { 222 err = io.EOF 223 } 224 return n, err 225 } 226 227 func (r *int96PageValues) ReadValues(values []Value) (n int, err error) { 228 for n < len(values) && r.offset < len(r.page.values) { 229 values[n] = r.page.makeValue(r.page.values[r.offset]) 230 r.offset++ 231 n++ 232 } 233 if r.offset == len(r.page.values) { 234 err = io.EOF 235 } 236 return n, err 237 } 238 239 type floatPageValues struct { 240 page *floatPage 241 offset int 242 } 243 244 func (r *floatPageValues) Read(b []byte) (n int, err error) { 245 n, err = r.ReadFloats(unsafecast.BytesToFloat32(b)) 246 return 4 * n, err 247 } 248 249 func (r *floatPageValues) ReadFloats(values []float32) (n int, err error) { 250 n = copy(values, r.page.values[r.offset:]) 251 r.offset += n 252 if r.offset == len(r.page.values) { 253 err = io.EOF 254 } 255 return n, err 256 } 257 258 func (r *floatPageValues) ReadValues(values []Value) (n int, err error) { 259 for n < len(values) && r.offset < len(r.page.values) { 260 values[n] = r.page.makeValue(r.page.values[r.offset]) 261 r.offset++ 262 n++ 263 } 264 if r.offset == len(r.page.values) { 265 err = io.EOF 266 } 267 return n, err 268 } 269 270 type doublePageValues struct { 271 page *doublePage 272 offset int 273 } 274 275 func (r *doublePageValues) Read(b []byte) (n int, err error) { 276 n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b)) 277 return 8 * n, err 278 } 279 280 func (r *doublePageValues) ReadDoubles(values []float64) (n int, err error) { 281 n = copy(values, r.page.values[r.offset:]) 282 r.offset += n 283 if r.offset == len(r.page.values) { 284 err = io.EOF 285 } 286 return n, err 287 } 288 289 func (r *doublePageValues) ReadValues(values []Value) (n int, err error) { 290 for n < len(values) && r.offset < len(r.page.values) { 291 values[n] = r.page.makeValue(r.page.values[r.offset]) 292 r.offset++ 293 n++ 294 } 295 if r.offset == len(r.page.values) { 296 err = io.EOF 297 } 298 return n, err 299 } 300 301 type byteArrayPageValues struct { 302 page *byteArrayPage 303 offset int 304 } 305 306 func (r *byteArrayPageValues) Read(b []byte) (int, error) { 307 _, n, err := r.readByteArrays(b) 308 return n, err 309 } 310 311 func (r *byteArrayPageValues) ReadRequired(values []byte) (int, error) { 312 return r.ReadByteArrays(values) 313 } 314 315 func (r *byteArrayPageValues) ReadByteArrays(values []byte) (int, error) { 316 n, _, err := r.readByteArrays(values) 317 return n, err 318 } 319 320 func (r *byteArrayPageValues) readByteArrays(values []byte) (c, n int, err error) { 321 numValues := r.page.len() 322 for r.offset < numValues { 323 b := r.page.index(r.offset) 324 k := plain.ByteArrayLengthSize + len(b) 325 if k > (len(values) - n) { 326 break 327 } 328 plain.PutByteArrayLength(values[n:], len(b)) 329 n += plain.ByteArrayLengthSize 330 n += copy(values[n:], b) 331 r.offset++ 332 c++ 333 } 334 if r.offset == numValues { 335 err = io.EOF 336 } else if n == 0 && len(values) > 0 { 337 err = io.ErrShortBuffer 338 } 339 return c, n, err 340 } 341 342 func (r *byteArrayPageValues) ReadValues(values []Value) (n int, err error) { 343 numValues := r.page.len() 344 for n < len(values) && r.offset < numValues { 345 values[n] = r.page.makeValueBytes(r.page.index(r.offset)) 346 r.offset++ 347 n++ 348 } 349 if r.offset == numValues { 350 err = io.EOF 351 } 352 return n, err 353 } 354 355 type fixedLenByteArrayPageValues struct { 356 page *fixedLenByteArrayPage 357 offset int 358 } 359 360 func (r *fixedLenByteArrayPageValues) Read(b []byte) (n int, err error) { 361 n, err = r.ReadFixedLenByteArrays(b) 362 return n * r.page.size, err 363 } 364 365 func (r *fixedLenByteArrayPageValues) ReadRequired(values []byte) (int, error) { 366 return r.ReadFixedLenByteArrays(values) 367 } 368 369 func (r *fixedLenByteArrayPageValues) ReadFixedLenByteArrays(values []byte) (n int, err error) { 370 n = copy(values, r.page.data[r.offset:]) / r.page.size 371 r.offset += n * r.page.size 372 if r.offset == len(r.page.data) { 373 err = io.EOF 374 } else if n == 0 && len(values) > 0 { 375 err = io.ErrShortBuffer 376 } 377 return n, err 378 } 379 380 func (r *fixedLenByteArrayPageValues) ReadValues(values []Value) (n int, err error) { 381 for n < len(values) && r.offset < len(r.page.data) { 382 values[n] = r.page.makeValueBytes(r.page.data[r.offset : r.offset+r.page.size]) 383 r.offset += r.page.size 384 n++ 385 } 386 if r.offset == len(r.page.data) { 387 err = io.EOF 388 } 389 return n, err 390 } 391 392 type uint32PageValues struct { 393 page *uint32Page 394 offset int 395 } 396 397 func (r *uint32PageValues) Read(b []byte) (n int, err error) { 398 n, err = r.ReadUint32s(unsafecast.BytesToUint32(b)) 399 return 4 * n, err 400 } 401 402 func (r *uint32PageValues) ReadUint32s(values []uint32) (n int, err error) { 403 n = copy(values, r.page.values[r.offset:]) 404 r.offset += n 405 if r.offset == len(r.page.values) { 406 err = io.EOF 407 } 408 return n, err 409 } 410 411 func (r *uint32PageValues) ReadValues(values []Value) (n int, err error) { 412 for n < len(values) && r.offset < len(r.page.values) { 413 values[n] = r.page.makeValue(r.page.values[r.offset]) 414 r.offset++ 415 n++ 416 } 417 if r.offset == len(r.page.values) { 418 err = io.EOF 419 } 420 return n, err 421 } 422 423 type uint64PageValues struct { 424 page *uint64Page 425 offset int 426 } 427 428 func (r *uint64PageValues) Read(b []byte) (n int, err error) { 429 n, err = r.ReadUint64s(unsafecast.BytesToUint64(b)) 430 return 8 * n, err 431 } 432 433 func (r *uint64PageValues) ReadUint64s(values []uint64) (n int, err error) { 434 n = copy(values, r.page.values[r.offset:]) 435 r.offset += n 436 if r.offset == len(r.page.values) { 437 err = io.EOF 438 } 439 return n, err 440 } 441 442 func (r *uint64PageValues) ReadValues(values []Value) (n int, err error) { 443 for n < len(values) && r.offset < len(r.page.values) { 444 values[n] = r.page.makeValue(r.page.values[r.offset]) 445 r.offset++ 446 n++ 447 } 448 if r.offset == len(r.page.values) { 449 err = io.EOF 450 } 451 return n, err 452 } 453 454 type be128PageValues struct { 455 page *be128Page 456 offset int 457 } 458 459 func (r *be128PageValues) ReadValues(values []Value) (n int, err error) { 460 for n < len(values) && r.offset < len(r.page.values) { 461 values[n] = r.page.makeValue(&r.page.values[r.offset]) 462 r.offset++ 463 n++ 464 } 465 if r.offset == len(r.page.values) { 466 err = io.EOF 467 } 468 return n, err 469 } 470 471 type nullPageValues struct { 472 column int 473 remain int 474 } 475 476 func (r *nullPageValues) ReadValues(values []Value) (n int, err error) { 477 columnIndex := ^int16(r.column) 478 values = values[:min(r.remain, len(values))] 479 for i := range values { 480 values[i] = Value{columnIndex: columnIndex} 481 } 482 r.remain -= len(values) 483 if r.remain == 0 { 484 err = io.EOF 485 } 486 return len(values), err 487 }