github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/ptable/block.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package ptable 6 7 import ( 8 "encoding/binary" 9 "math" 10 "unsafe" 11 ) 12 13 type columnWriter struct { 14 ctype ColumnType 15 data []byte 16 offsets []int32 17 nulls nullBitmapBuilder 18 count int32 19 nullCount int32 20 } 21 22 func (w *columnWriter) reset() { 23 w.data = w.data[:0] 24 w.offsets = w.offsets[:0] 25 w.nulls = w.nulls[:0] 26 w.count = 0 27 w.nullCount = 0 28 } 29 30 func (w *columnWriter) grow(n int) []byte { 31 i := len(w.data) 32 if cap(w.data)-i < n { 33 newSize := 2 * cap(w.data) 34 if newSize == 0 { 35 newSize = 256 36 } 37 newData := make([]byte, i, newSize) 38 copy(newData, w.data) 39 w.data = newData 40 } 41 w.data = w.data[:i+n] 42 return w.data[i:] 43 } 44 45 func (w *columnWriter) putBool(v bool) { 46 if w.ctype != ColumnTypeBool { 47 panic("bool column value expected") 48 } 49 w.data = (Bitmap)(w.data).set(int(w.count), v) 50 w.nulls = w.nulls.set(int(w.count), false) 51 w.count++ 52 } 53 54 func (w *columnWriter) putInt8(v int8) { 55 if w.ctype != ColumnTypeInt8 { 56 panic("int8 column value expected") 57 } 58 w.data = append(w.data, byte(v)) 59 w.nulls = w.nulls.set(int(w.count), false) 60 w.count++ 61 } 62 63 func (w *columnWriter) putInt16(v int16) { 64 if w.ctype != ColumnTypeInt16 { 65 panic("int16 column value expected") 66 } 67 binary.LittleEndian.PutUint16(w.grow(2), uint16(v)) 68 w.nulls = w.nulls.set(int(w.count), false) 69 w.count++ 70 } 71 72 func (w *columnWriter) putInt32(v int32) { 73 if w.ctype != ColumnTypeInt32 { 74 panic("int32 column value expected") 75 } 76 binary.LittleEndian.PutUint32(w.grow(4), uint32(v)) 77 w.nulls = w.nulls.set(int(w.count), false) 78 w.count++ 79 } 80 81 func (w *columnWriter) putInt64(v int64) { 82 if w.ctype != ColumnTypeInt64 { 83 panic("int64 column value expected") 84 } 85 binary.LittleEndian.PutUint64(w.grow(8), uint64(v)) 86 w.nulls = w.nulls.set(int(w.count), false) 87 w.count++ 88 } 89 90 func (w *columnWriter) putFloat32(v float32) { 91 if w.ctype != ColumnTypeFloat32 { 92 panic("float32 column value expected") 93 } 94 binary.LittleEndian.PutUint32(w.grow(4), math.Float32bits(v)) 95 w.nulls = w.nulls.set(int(w.count), false) 96 w.count++ 97 } 98 99 func (w *columnWriter) putFloat64(v float64) { 100 if w.ctype != ColumnTypeFloat64 { 101 panic("float64 column value expected") 102 } 103 binary.LittleEndian.PutUint64(w.grow(8), math.Float64bits(v)) 104 w.nulls = w.nulls.set(int(w.count), false) 105 w.count++ 106 } 107 108 func (w *columnWriter) putBytes(v []byte) { 109 if w.ctype != ColumnTypeBytes { 110 panic("bytes column value expected") 111 } 112 w.data = append(w.data, v...) 113 w.offsets = append(w.offsets, int32(len(w.data))) 114 w.nulls = w.nulls.set(int(w.count), false) 115 w.count++ 116 } 117 118 func (w *columnWriter) putNull() { 119 w.nulls = w.nulls.set(int(w.count), true) 120 if w.ctype.Width() <= 0 { 121 w.offsets = append(w.offsets, int32(len(w.data))) 122 } 123 w.count++ 124 w.nullCount++ 125 } 126 127 func align(offset, val int32) int32 { 128 return (offset + val - 1) & ^(val - 1) 129 } 130 131 func (w *columnWriter) encode(offset int32, buf []byte) int32 { 132 // The column type. 133 buf[offset] = byte(w.ctype) 134 offset++ 135 // The NULL-bitmap. 136 if w.nullCount == 0 { 137 buf[offset] = 0 // no NULL-bitmap 138 offset++ 139 } else { 140 buf[offset] = 1 // NULL-bitmap exists 141 offset++ 142 offset = align(offset, 4) 143 w.nulls.verify() 144 for i := 0; i < len(w.nulls); i++ { 145 binary.LittleEndian.PutUint32(buf[offset:], w.nulls[i]) 146 offset += 4 147 } 148 } 149 // The column values. 150 offset = align(offset, w.ctype.Alignment()) 151 offset += int32(copy(buf[offset:], w.data)) 152 // The offsets for variable width data. 153 if w.ctype.Width() <= 0 { 154 offset = align(offset, 4) 155 dest := (*[1 << 31]int32)(unsafe.Pointer(&buf[offset]))[:w.count:w.count] 156 copy(dest, w.offsets) 157 offset += int32(len(w.offsets) * 4) 158 } 159 return offset 160 } 161 162 func (w *columnWriter) size(offset int32) int32 { 163 startOffset := offset 164 // The column type. 165 offset++ 166 // The NULL-bitmap. 167 offset++ 168 if w.nullCount > 0 { 169 offset = align(offset, 4) 170 offset += 4 * int32(len(w.nulls)) 171 } 172 // The column values. 173 offset = align(offset, w.ctype.Alignment()) 174 offset += int32(len(w.data)) 175 // The offsets for variable width data. 176 if w.ctype.Width() <= 0 { 177 offset = align(offset, 4) 178 offset += int32(len(w.offsets) * 4) 179 } 180 return offset - startOffset 181 } 182 183 func blockHeaderSize(n int) int32 { 184 return int32(8 + n*4) 185 } 186 187 func pageOffsetPos(i int) int32 { 188 return int32(8 + i*4) 189 } 190 191 type blockWriter struct { 192 cols []columnWriter 193 buf []byte 194 } 195 196 func (w *blockWriter) init(s []ColumnType) { 197 w.cols = make([]columnWriter, len(s)) 198 for i := range w.cols { 199 w.cols[i].ctype = s[i] 200 } 201 } 202 203 func (w *blockWriter) reset() { 204 for i := range w.cols { 205 w.cols[i].reset() 206 } 207 } 208 209 func (w *blockWriter) Finish() []byte { 210 size := w.Size() 211 if int32(cap(w.buf)) < size { 212 w.buf = make([]byte, size) 213 } 214 w.buf = w.buf[:size] 215 n := len(w.cols) 216 binary.LittleEndian.PutUint32(w.buf[0:], uint32(n)) 217 binary.LittleEndian.PutUint32(w.buf[4:], uint32(w.cols[0].count)) 218 pageOffset := blockHeaderSize(n) 219 for i := range w.cols { 220 col := &w.cols[i] 221 binary.LittleEndian.PutUint32(w.buf[pageOffsetPos(i):], uint32(pageOffset)) 222 pageOffset = col.encode(pageOffset, w.buf) 223 } 224 return w.buf 225 } 226 227 func (w *blockWriter) Size() int32 { 228 size := blockHeaderSize(len(w.cols)) 229 for i := range w.cols { 230 size += w.cols[i].size(size) 231 } 232 return size 233 } 234 235 func (w *blockWriter) PutRow(row RowReader) { 236 for i := range w.cols { 237 col := &w.cols[i] 238 if row.Null(i) { 239 col.putNull() 240 continue 241 } 242 switch w.cols[i].ctype { 243 case ColumnTypeBool: 244 col.putBool(row.Bool(i)) 245 case ColumnTypeInt8: 246 col.putInt8(row.Int8(i)) 247 case ColumnTypeInt16: 248 col.putInt16(row.Int16(i)) 249 case ColumnTypeInt32: 250 col.putInt32(row.Int32(i)) 251 case ColumnTypeInt64: 252 col.putInt64(row.Int64(i)) 253 case ColumnTypeFloat32: 254 col.putFloat32(row.Float32(i)) 255 case ColumnTypeFloat64: 256 col.putFloat64(row.Float64(i)) 257 case ColumnTypeBytes: 258 col.putBytes(row.Bytes(i)) 259 } 260 } 261 } 262 263 func (w *blockWriter) PutBool(col int, v bool) { 264 w.cols[col].putBool(v) 265 } 266 267 func (w *blockWriter) PutInt8(col int, v int8) { 268 w.cols[col].putInt8(v) 269 } 270 271 func (w *blockWriter) PutInt16(col int, v int16) { 272 w.cols[col].putInt16(v) 273 } 274 275 func (w *blockWriter) PutInt32(col int, v int32) { 276 w.cols[col].putInt32(v) 277 } 278 279 func (w *blockWriter) PutInt64(col int, v int64) { 280 w.cols[col].putInt64(v) 281 } 282 283 func (w *blockWriter) PutFloat32(col int, v float32) { 284 w.cols[col].putFloat32(v) 285 } 286 287 func (w *blockWriter) PutFloat64(col int, v float64) { 288 w.cols[col].putFloat64(v) 289 } 290 291 func (w *blockWriter) PutBytes(col int, v []byte) { 292 w.cols[col].putBytes(v) 293 } 294 295 func (w *blockWriter) PutNull(col int) { 296 w.cols[col].putNull() 297 } 298 299 // Block is a contiguous chunk of memory that contains column data. The layout 300 // of a block is: 301 // 302 // +---------------------------------------------------------------+ 303 // | ncols(4) | nrows(4) | page1(4) | page2(4) | ... | pageN(4) | 304 // +---------------------------------------------------------------+ 305 // | <bool> | NULL-bitmap | value-bitmap | 306 // +---------------------------------------------------------------+ 307 // | <int32> | NULL-bitmap | values (4-byte aligned) | 308 // +---------------------------------------------------------------+ 309 // | <bytes> | NULL-bitmap | val1 | val2 | ... | pos (4) | pos (4) | 310 // +---------------------------------------------------------------+ 311 // | ... | 312 // +---------------------------------------------------------------+ 313 // 314 // Blocks contain rows following a fixed schema. The data is stored in a 315 // columnar layout: all of the values for a column are stored 316 // contiguously. Column types have either fixed-width values, or 317 // variable-width. All variable-width values are stored in the "bytes" column 318 // type and it is up to higher levels to interpret. 319 // 320 // The data for a column is stored within a "page". The first byte in a page 321 // specifies the column type. Fixed width pages are then followed by a 322 // NULL-bitmap with 1-bit per row indicating whether the column at that row is 323 // null or not. Following the NULL-bitmap is the column data itself. The data 324 // is aligned to the required alignment of the column type (4 for int32, 8 for 325 // int64, etc) so that it can be accessed directly without decoding. 326 // 327 // The NULL-bitmap indicates the presence of a column value. If the i'th bit of 328 // the NULL-bitmap for a column is 1, no value is stored for the column at that 329 // index. The NULL-bitmap is interleaved with a rank lookup table which 330 // accelerates calculation of the rank for bitmap i. The rank is the number of 331 // non-NULL values present in bitmap[0,i). The bitmap is organized as a series 332 // of 32-bit words where the low 16-bits of each word are part of the bitmap 333 // and the high 16-bits are the sum of the set bits in the earlier words. The 334 // NULL-bitmap is omitted if there are no NULL values for a column in a block. 335 // 336 // Variable width data (i.e. the "bytes" column type) is stored in a different 337 // format. Immediately following the column type are the concatenated variable 338 // length values. After the concatenated data is an array of offsets indicating 339 // the end of each column value within the concatenated data. For example, 340 // offset[0] is the end of the first row's column data. A negative offset 341 // indicates a null value. 342 type Block struct { 343 start unsafe.Pointer 344 len int32 345 cols int32 346 rows int32 347 } 348 349 // NewBlock return a new Block configured to read from the specified 350 // memory. The caller must ensure that the data is formatted as to the block 351 // layout specification. 352 func NewBlock(data []byte) *Block { 353 r := &Block{} 354 r.init(data) 355 return r 356 } 357 358 func (r *Block) init(data []byte) { 359 r.start = unsafe.Pointer(&data[0]) 360 r.len = int32(len(data)) 361 r.cols = int32(binary.LittleEndian.Uint32(data[0:])) 362 r.rows = int32(binary.LittleEndian.Uint32(data[4:])) 363 } 364 365 func (r *Block) pageStart(col int) int32 { 366 if int32(col) >= r.cols { 367 return r.len 368 } 369 return *(*int32)(unsafe.Pointer(uintptr(r.start) + 8 + uintptr(col*4))) 370 } 371 372 func (r *Block) pointer(offset int32) unsafe.Pointer { 373 return unsafe.Pointer(uintptr(r.start) + uintptr(offset)) 374 } 375 376 func (r *Block) data() []byte { 377 return (*[1 << 31]byte)(r.start)[:r.len:r.len] 378 } 379 380 // Column returns a Vector for the specified column. The caller must check (or 381 // otherwise know) the type of the column before accessing the column data. The 382 // caller should check to see if the column contains any NULL values 383 // (Vec.Null.Empty()) and specialize processing accordingly. 384 func (r *Block) Column(col int) Vec { 385 if col < 0 || int32(col) >= r.cols { 386 panic("invalid column") 387 } 388 389 start := r.pageStart(col) 390 data := r.pointer(start) 391 392 var v Vec 393 v.N = r.rows 394 // The column type. 395 v.Type = *(*ColumnType)(data) 396 start++ 397 // The NULL-bitmap. 398 if *(*byte)(r.pointer(start)) == 0 { 399 start++ 400 } else { 401 start = align(start, 4) 402 v.ptr = r.pointer(start) 403 start += 4 * (int32(r.rows+15) / 16) 404 } 405 // The column values. 406 start = align(start, v.Type.Alignment()) 407 v.start = r.pointer(start) 408 // The end of the offsets for variable width data. 409 v.end = r.pointer(r.pageStart(col + 1)) 410 return v 411 }