github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/page_values.go (about) 1 package parquet 2 3 import ( 4 "io" 5 6 "github.com/vc42/parquet-go/deprecated" 7 "github.com/vc42/parquet-go/encoding/plain" 8 "github.com/vc42/parquet-go/internal/unsafecast" 9 ) 10 11 type optionalPageValues struct { 12 page *optionalPage 13 values ValueReader 14 offset int 15 } 16 17 func (r *optionalPageValues) ReadValues(values []Value) (n int, err error) { 18 maxDefinitionLevel := r.page.maxDefinitionLevel 19 columnIndex := ^int16(r.page.Column()) 20 21 for n < len(values) && r.offset < len(r.page.definitionLevels) { 22 for n < len(values) && r.offset < len(r.page.definitionLevels) && r.page.definitionLevels[r.offset] != maxDefinitionLevel { 23 values[n] = Value{ 24 definitionLevel: r.page.definitionLevels[r.offset], 25 columnIndex: columnIndex, 26 } 27 r.offset++ 28 n++ 29 } 30 31 i := n 32 j := r.offset 33 for i < len(values) && j < len(r.page.definitionLevels) && r.page.definitionLevels[j] == maxDefinitionLevel { 34 i++ 35 j++ 36 } 37 38 if n < i { 39 for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- { 40 values[n].definitionLevel = maxDefinitionLevel 41 r.offset++ 42 n++ 43 } 44 // Do not return on an io.EOF here as we may still have null values to read. 45 if err != nil && err != io.EOF { 46 return n, err 47 } 48 err = nil 49 } 50 } 51 52 if r.offset == len(r.page.definitionLevels) { 53 err = io.EOF 54 } 55 return n, err 56 } 57 58 type repeatedPageValues struct { 59 page *repeatedPage 60 values ValueReader 61 offset int 62 } 63 64 func (r *repeatedPageValues) ReadValues(values []Value) (n int, err error) { 65 maxDefinitionLevel := r.page.maxDefinitionLevel 66 columnIndex := ^int16(r.page.Column()) 67 68 for n < len(values) && r.offset < len(r.page.definitionLevels) { 69 for n < len(values) && r.offset < len(r.page.definitionLevels) && r.page.definitionLevels[r.offset] != maxDefinitionLevel { 70 values[n] = Value{ 71 repetitionLevel: r.page.repetitionLevels[r.offset], 72 definitionLevel: r.page.definitionLevels[r.offset], 73 columnIndex: columnIndex, 74 } 75 r.offset++ 76 n++ 77 } 78 79 i := n 80 j := r.offset 81 for i < len(values) && j < len(r.page.definitionLevels) && r.page.definitionLevels[j] == maxDefinitionLevel { 82 i++ 83 j++ 84 } 85 86 if n < i { 87 for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- { 88 values[n].repetitionLevel = r.page.repetitionLevels[r.offset] 89 values[n].definitionLevel = maxDefinitionLevel 90 r.offset++ 91 n++ 92 } 93 if err != nil && err != io.EOF { 94 return n, err 95 } 96 err = nil 97 } 98 } 99 100 if r.offset == len(r.page.definitionLevels) { 101 err = io.EOF 102 } 103 return n, err 104 } 105 106 type booleanPageValues struct { 107 page *booleanPage 108 offset int 109 } 110 111 func (r *booleanPageValues) ReadBooleans(values []bool) (n int, err error) { 112 for n < len(values) && r.offset < int(r.page.numValues) { 113 values[n] = r.page.valueAt(r.offset) 114 r.offset++ 115 n++ 116 } 117 if r.offset == int(r.page.numValues) { 118 err = io.EOF 119 } 120 return n, err 121 } 122 123 func (r *booleanPageValues) ReadValues(values []Value) (n int, err error) { 124 for n < len(values) && r.offset < int(r.page.numValues) { 125 values[n] = r.page.makeValue(r.page.valueAt(r.offset)) 126 r.offset++ 127 n++ 128 } 129 if r.offset == int(r.page.numValues) { 130 err = io.EOF 131 } 132 return n, err 133 } 134 135 type int32PageValues struct { 136 page *int32Page 137 offset int 138 } 139 140 func (r *int32PageValues) Read(b []byte) (n int, err error) { 141 n, err = r.ReadInt32s(unsafecast.BytesToInt32(b)) 142 return 4 * n, err 143 } 144 145 func (r *int32PageValues) ReadInt32s(values []int32) (n int, err error) { 146 n = copy(values, r.page.values[r.offset:]) 147 r.offset += n 148 if r.offset == len(r.page.values) { 149 err = io.EOF 150 } 151 return n, err 152 } 153 154 func (r *int32PageValues) ReadValues(values []Value) (n int, err error) { 155 for n < len(values) && r.offset < len(r.page.values) { 156 values[n] = r.page.makeValue(r.page.values[r.offset]) 157 r.offset++ 158 n++ 159 } 160 if r.offset == len(r.page.values) { 161 err = io.EOF 162 } 163 return n, err 164 } 165 166 type int64PageValues struct { 167 page *int64Page 168 offset int 169 } 170 171 func (r *int64PageValues) Read(b []byte) (n int, err error) { 172 n, err = r.ReadInt64s(unsafecast.BytesToInt64(b)) 173 return 8 * n, err 174 } 175 176 func (r *int64PageValues) ReadInt64s(values []int64) (n int, err error) { 177 n = copy(values, r.page.values[r.offset:]) 178 r.offset += n 179 if r.offset == len(r.page.values) { 180 err = io.EOF 181 } 182 return n, err 183 } 184 185 func (r *int64PageValues) ReadValues(values []Value) (n int, err error) { 186 for n < len(values) && r.offset < len(r.page.values) { 187 values[n] = r.page.makeValue(r.page.values[r.offset]) 188 r.offset++ 189 n++ 190 } 191 if r.offset == len(r.page.values) { 192 err = io.EOF 193 } 194 return n, err 195 } 196 197 type int96PageValues struct { 198 page *int96Page 199 offset int 200 } 201 202 func (r *int96PageValues) Read(b []byte) (n int, err error) { 203 n, err = r.ReadInt96s(deprecated.BytesToInt96(b)) 204 return 12 * n, err 205 } 206 207 func (r *int96PageValues) ReadInt96s(values []deprecated.Int96) (n int, err error) { 208 n = copy(values, r.page.values[r.offset:]) 209 r.offset += n 210 if r.offset == len(r.page.values) { 211 err = io.EOF 212 } 213 return n, err 214 } 215 216 func (r *int96PageValues) ReadValues(values []Value) (n int, err error) { 217 for n < len(values) && r.offset < len(r.page.values) { 218 values[n] = r.page.makeValue(r.page.values[r.offset]) 219 r.offset++ 220 n++ 221 } 222 if r.offset == len(r.page.values) { 223 err = io.EOF 224 } 225 return n, err 226 } 227 228 type floatPageValues struct { 229 page *floatPage 230 offset int 231 } 232 233 func (r *floatPageValues) Read(b []byte) (n int, err error) { 234 n, err = r.ReadFloats(unsafecast.BytesToFloat32(b)) 235 return 4 * n, err 236 } 237 238 func (r *floatPageValues) ReadFloats(values []float32) (n int, err error) { 239 n = copy(values, r.page.values[r.offset:]) 240 r.offset += n 241 if r.offset == len(r.page.values) { 242 err = io.EOF 243 } 244 return n, err 245 } 246 247 func (r *floatPageValues) ReadValues(values []Value) (n int, err error) { 248 for n < len(values) && r.offset < len(r.page.values) { 249 values[n] = r.page.makeValue(r.page.values[r.offset]) 250 r.offset++ 251 n++ 252 } 253 if r.offset == len(r.page.values) { 254 err = io.EOF 255 } 256 return n, err 257 } 258 259 type doublePageValues struct { 260 page *doublePage 261 offset int 262 } 263 264 func (r *doublePageValues) Read(b []byte) (n int, err error) { 265 n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b)) 266 return 8 * n, err 267 } 268 269 func (r *doublePageValues) ReadDoubles(values []float64) (n int, err error) { 270 n = copy(values, r.page.values[r.offset:]) 271 r.offset += n 272 if r.offset == len(r.page.values) { 273 err = io.EOF 274 } 275 return n, err 276 } 277 278 func (r *doublePageValues) ReadValues(values []Value) (n int, err error) { 279 for n < len(values) && r.offset < len(r.page.values) { 280 values[n] = r.page.makeValue(r.page.values[r.offset]) 281 r.offset++ 282 n++ 283 } 284 if r.offset == len(r.page.values) { 285 err = io.EOF 286 } 287 return n, err 288 } 289 290 type byteArrayPageValues struct { 291 page *byteArrayPage 292 offset int 293 } 294 295 func (r *byteArrayPageValues) Read(b []byte) (int, error) { 296 _, n, err := r.readByteArrays(b) 297 return n, err 298 } 299 300 func (r *byteArrayPageValues) ReadRequired(values []byte) (int, error) { 301 return r.ReadByteArrays(values) 302 } 303 304 func (r *byteArrayPageValues) ReadByteArrays(values []byte) (int, error) { 305 n, _, err := r.readByteArrays(values) 306 return n, err 307 } 308 309 func (r *byteArrayPageValues) readByteArrays(values []byte) (c, n int, err error) { 310 for r.offset < len(r.page.values) { 311 b := r.page.valueAt(uint32(r.offset)) 312 k := plain.ByteArrayLengthSize + len(b) 313 if k > (len(values) - n) { 314 break 315 } 316 plain.PutByteArrayLength(values[n:], len(b)) 317 n += plain.ByteArrayLengthSize 318 n += copy(values[n:], b) 319 r.offset += plain.ByteArrayLengthSize 320 r.offset += len(b) 321 c++ 322 } 323 if r.offset == len(r.page.values) { 324 err = io.EOF 325 } else if n == 0 && len(values) > 0 { 326 err = io.ErrShortBuffer 327 } 328 return c, n, err 329 } 330 331 func (r *byteArrayPageValues) ReadValues(values []Value) (n int, err error) { 332 for n < len(values) && r.offset < len(r.page.values) { 333 value := r.page.valueAt(uint32(r.offset)) 334 values[n] = r.page.makeValueBytes(copyBytes(value)) 335 r.offset += plain.ByteArrayLengthSize 336 r.offset += len(value) 337 n++ 338 } 339 if r.offset == len(r.page.values) { 340 err = io.EOF 341 } 342 return n, err 343 } 344 345 type fixedLenByteArrayPageValues struct { 346 page *fixedLenByteArrayPage 347 offset int 348 } 349 350 func (r *fixedLenByteArrayPageValues) Read(b []byte) (n int, err error) { 351 n, err = r.ReadFixedLenByteArrays(b) 352 return n * r.page.size, err 353 } 354 355 func (r *fixedLenByteArrayPageValues) ReadRequired(values []byte) (int, error) { 356 return r.ReadFixedLenByteArrays(values) 357 } 358 359 func (r *fixedLenByteArrayPageValues) ReadFixedLenByteArrays(values []byte) (n int, err error) { 360 n = copy(values, r.page.data[r.offset:]) / r.page.size 361 r.offset += n * r.page.size 362 if r.offset == len(r.page.data) { 363 err = io.EOF 364 } else if n == 0 && len(values) > 0 { 365 err = io.ErrShortBuffer 366 } 367 return n, err 368 } 369 370 func (r *fixedLenByteArrayPageValues) ReadValues(values []Value) (n int, err error) { 371 for n < len(values) && r.offset < len(r.page.data) { 372 values[n] = r.page.makeValueBytes(copyBytes(r.page.data[r.offset : r.offset+r.page.size])) 373 r.offset += r.page.size 374 n++ 375 } 376 if r.offset == len(r.page.data) { 377 err = io.EOF 378 } 379 return n, err 380 } 381 382 type uint32PageValues struct { 383 page *uint32Page 384 offset int 385 } 386 387 func (r *uint32PageValues) Read(b []byte) (n int, err error) { 388 n, err = r.ReadUint32s(unsafecast.BytesToUint32(b)) 389 return 4 * n, err 390 } 391 392 func (r *uint32PageValues) ReadUint32s(values []uint32) (n int, err error) { 393 n = copy(values, r.page.values[r.offset:]) 394 r.offset += n 395 if r.offset == len(r.page.values) { 396 err = io.EOF 397 } 398 return n, err 399 } 400 401 func (r *uint32PageValues) ReadValues(values []Value) (n int, err error) { 402 for n < len(values) && r.offset < len(r.page.values) { 403 values[n] = r.page.makeValue(r.page.values[r.offset]) 404 r.offset++ 405 n++ 406 } 407 if r.offset == len(r.page.values) { 408 err = io.EOF 409 } 410 return n, err 411 } 412 413 type uint64PageValues struct { 414 page *uint64Page 415 offset int 416 } 417 418 func (r *uint64PageValues) Read(b []byte) (n int, err error) { 419 n, err = r.ReadUint64s(unsafecast.BytesToUint64(b)) 420 return 8 * n, err 421 } 422 423 func (r *uint64PageValues) ReadUint64s(values []uint64) (n int, err error) { 424 n = copy(values, r.page.values[r.offset:]) 425 r.offset += n 426 if r.offset == len(r.page.values) { 427 err = io.EOF 428 } 429 return n, err 430 } 431 432 func (r *uint64PageValues) ReadValues(values []Value) (n int, err error) { 433 for n < len(values) && r.offset < len(r.page.values) { 434 values[n] = r.page.makeValue(r.page.values[r.offset]) 435 r.offset++ 436 n++ 437 } 438 if r.offset == len(r.page.values) { 439 err = io.EOF 440 } 441 return n, err 442 } 443 444 type be128PageValues struct { 445 page *be128Page 446 offset int 447 } 448 449 func (r *be128PageValues) ReadValues(values []Value) (n int, err error) { 450 for n < len(values) && r.offset < len(r.page.values) { 451 values[n] = r.page.makeValue(&r.page.values[r.offset]) 452 r.offset++ 453 n++ 454 } 455 if r.offset == len(r.page.values) { 456 err = io.EOF 457 } 458 return n, err 459 } 460 461 type nullPageValues struct { 462 column int 463 remain int 464 } 465 466 func (r *nullPageValues) ReadValues(values []Value) (n int, err error) { 467 columnIndex := ^int16(r.column) 468 values = values[:min(r.remain, len(values))] 469 for i := range values { 470 values[i] = Value{columnIndex: columnIndex} 471 } 472 r.remain -= len(values) 473 if r.remain == 0 { 474 err = io.EOF 475 } 476 return len(values), err 477 }