github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/table_set.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "errors" 27 "sync" 28 "sync/atomic" 29 30 "golang.org/x/sync/errgroup" 31 32 "github.com/dolthub/dolt/go/store/atomicerr" 33 "github.com/dolthub/dolt/go/store/chunks" 34 ) 35 36 const concurrentCompactions = 5 37 38 func newTableSet(persister tablePersister) tableSet { 39 return tableSet{p: persister, rl: make(chan struct{}, concurrentCompactions)} 40 } 41 42 // tableSet is an immutable set of persistable chunkSources. 43 type tableSet struct { 44 novel, upstream chunkSources 45 p tablePersister 46 rl chan struct{} 47 } 48 49 func (ts tableSet) has(h addr) (bool, error) { 50 f := func(css chunkSources) (bool, error) { 51 for _, haver := range css { 52 has, err := haver.has(h) 53 54 if err != nil { 55 return false, err 56 } 57 58 if has { 59 return true, nil 60 } 61 } 62 return false, nil 63 } 64 65 novelHas, err := f(ts.novel) 66 67 if err != nil { 68 return false, err 69 } 70 71 if novelHas { 72 return true, nil 73 } 74 75 return f(ts.upstream) 76 } 77 78 func (ts tableSet) hasMany(addrs []hasRecord) (bool, error) { 79 f := func(css chunkSources) (bool, error) { 80 for _, haver := range css { 81 has, err := haver.hasMany(addrs) 82 83 if err != nil { 84 return false, err 85 } 86 87 if !has { 88 return false, nil 89 } 90 } 91 return true, nil 92 } 93 remaining, err := f(ts.novel) 94 95 if err != nil { 96 return false, err 97 } 98 99 if !remaining { 100 return false, nil 101 } 102 103 return f(ts.upstream) 104 } 105 106 func (ts tableSet) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) { 107 f := func(css chunkSources) ([]byte, error) { 108 for _, haver := range css { 109 data, err := haver.get(ctx, h, stats) 110 111 if err != nil { 112 return nil, err 113 } 114 115 if data != nil { 116 return data, nil 117 } 118 } 119 120 return nil, nil 121 } 122 123 data, err := f(ts.novel) 124 125 if err != nil { 126 return nil, err 127 } 128 129 if data != nil { 130 return data, nil 131 } 132 133 return f(ts.upstream) 134 } 135 136 func (ts tableSet) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(*chunks.Chunk), stats *Stats) (remaining bool, err error) { 137 f := func(css chunkSources) bool { 138 for _, haver := range css { 139 if rp, ok := haver.(chunkReadPlanner); ok { 140 offsets, remaining := rp.findOffsets(reqs) 141 err = rp.getManyAtOffsets(ctx, eg, offsets, found, stats) 142 if err != nil { 143 return true 144 } 145 if !remaining { 146 return false 147 } 148 continue 149 } 150 remaining, err = haver.getMany(ctx, eg, reqs, found, stats) 151 if err != nil { 152 return true 153 } 154 if !remaining { 155 return false 156 } 157 } 158 return true 159 } 160 161 return f(ts.novel) && err == nil && f(ts.upstream), err 162 } 163 164 func (ts tableSet) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(CompressedChunk), stats *Stats) (remaining bool, err error) { 165 f := func(css chunkSources) bool { 166 for _, haver := range css { 167 if rp, ok := haver.(chunkReadPlanner); ok { 168 offsets, remaining := rp.findOffsets(reqs) 169 if len(offsets) > 0 { 170 err = rp.getManyCompressedAtOffsets(ctx, eg, offsets, found, stats) 171 if err != nil { 172 return true 173 } 174 } 175 176 if !remaining { 177 return false 178 } 179 180 continue 181 } 182 183 remaining, err = haver.getManyCompressed(ctx, eg, reqs, found, stats) 184 if err != nil { 185 return true 186 } 187 if !remaining { 188 return false 189 } 190 } 191 192 return true 193 } 194 195 return f(ts.novel) && err == nil && f(ts.upstream), err 196 } 197 198 func (ts tableSet) calcReads(reqs []getRecord, blockSize uint64) (reads int, split, remaining bool, err error) { 199 f := func(css chunkSources) (int, bool, bool, error) { 200 reads, split := 0, false 201 for _, haver := range css { 202 rds, rmn, err := haver.calcReads(reqs, blockSize) 203 204 if err != nil { 205 return 0, false, false, err 206 } 207 208 reads += rds 209 if !rmn { 210 return reads, split, false, nil 211 } 212 split = true 213 } 214 return reads, split, true, nil 215 } 216 reads, split, remaining, err = f(ts.novel) 217 218 if err != nil { 219 return 0, false, false, err 220 } 221 222 if remaining { 223 var rds int 224 rds, split, remaining, err = f(ts.upstream) 225 226 if err != nil { 227 return 0, false, false, err 228 } 229 230 reads += rds 231 } 232 233 return reads, split, remaining, nil 234 } 235 236 func (ts tableSet) count() (uint32, error) { 237 f := func(css chunkSources) (count uint32, err error) { 238 for _, haver := range css { 239 thisCount, err := haver.count() 240 241 if err != nil { 242 return 0, err 243 } 244 245 count += thisCount 246 } 247 return 248 } 249 250 novelCount, err := f(ts.novel) 251 252 if err != nil { 253 return 0, err 254 } 255 256 upCount, err := f(ts.upstream) 257 258 if err != nil { 259 return 0, err 260 } 261 262 return novelCount + upCount, nil 263 } 264 265 func (ts tableSet) uncompressedLen() (uint64, error) { 266 f := func(css chunkSources) (data uint64, err error) { 267 for _, haver := range css { 268 uncmpLen, err := haver.uncompressedLen() 269 270 if err != nil { 271 return 0, err 272 } 273 274 data += uncmpLen 275 } 276 return 277 } 278 279 novelCount, err := f(ts.novel) 280 281 if err != nil { 282 return 0, err 283 } 284 285 upCount, err := f(ts.upstream) 286 287 if err != nil { 288 return 0, err 289 } 290 291 return novelCount + upCount, nil 292 } 293 294 func (ts tableSet) physicalLen() (uint64, error) { 295 f := func(css chunkSources) (data uint64, err error) { 296 for _, haver := range css { 297 index, err := haver.index() 298 if err != nil { 299 return 0, err 300 } 301 data += index.TableFileSize() 302 } 303 return 304 } 305 306 lenNovel, err := f(ts.novel) 307 if err != nil { 308 return 0, err 309 } 310 311 lenUp, err := f(ts.upstream) 312 if err != nil { 313 return 0, err 314 } 315 316 return lenNovel + lenUp, nil 317 } 318 319 func (ts tableSet) Close() error { 320 var firstErr error 321 for _, t := range ts.novel { 322 err := t.Close() 323 if err != nil && firstErr == nil { 324 firstErr = err 325 } 326 } 327 for _, t := range ts.upstream { 328 err := t.Close() 329 if err != nil && firstErr == nil { 330 firstErr = err 331 } 332 } 333 return firstErr 334 } 335 336 // Size returns the number of tables in this tableSet. 337 func (ts tableSet) Size() int { 338 return len(ts.novel) + len(ts.upstream) 339 } 340 341 // Novel returns the number of tables containing novel chunks in this 342 // tableSet. 343 func (ts tableSet) Novel() int { 344 return len(ts.novel) 345 } 346 347 // Upstream returns the number of known-persisted tables in this tableSet. 348 func (ts tableSet) Upstream() int { 349 return len(ts.upstream) 350 } 351 352 // Prepend adds a memTable to an existing tableSet, compacting |mt| and 353 // returning a new tableSet with newly compacted table added. 354 func (ts tableSet) Prepend(ctx context.Context, mt *memTable, stats *Stats) tableSet { 355 newTs := tableSet{ 356 novel: make(chunkSources, len(ts.novel)+1), 357 upstream: make(chunkSources, len(ts.upstream)), 358 p: ts.p, 359 rl: ts.rl, 360 } 361 newTs.novel[0] = newPersistingChunkSource(ctx, mt, ts, ts.p, ts.rl, stats) 362 copy(newTs.novel[1:], ts.novel) 363 copy(newTs.upstream, ts.upstream) 364 return newTs 365 } 366 367 func (ts tableSet) extract(ctx context.Context, chunks chan<- extractRecord) error { 368 // Since new tables are _prepended_ to a tableSet, extracting chunks in insertOrder requires iterating ts.upstream back to front, followed by ts.novel. 369 for i := len(ts.upstream) - 1; i >= 0; i-- { 370 err := ts.upstream[i].extract(ctx, chunks) 371 372 if err != nil { 373 return err 374 } 375 } 376 for i := len(ts.novel) - 1; i >= 0; i-- { 377 err := ts.novel[i].extract(ctx, chunks) 378 379 if err != nil { 380 return err 381 } 382 } 383 384 return nil 385 } 386 387 // Flatten returns a new tableSet with |upstream| set to the union of ts.novel 388 // and ts.upstream. 389 func (ts tableSet) Flatten() (tableSet, error) { 390 flattened := tableSet{ 391 upstream: make(chunkSources, 0, ts.Size()), 392 p: ts.p, 393 rl: ts.rl, 394 } 395 396 for _, src := range ts.novel { 397 cnt, err := src.count() 398 399 if err != nil { 400 return tableSet{}, err 401 } 402 403 if cnt > 0 { 404 flattened.upstream = append(flattened.upstream, src) 405 } 406 } 407 408 flattened.upstream = append(flattened.upstream, ts.upstream...) 409 return flattened, nil 410 } 411 412 // Rebase returns a new tableSet holding the novel tables managed by |ts| and 413 // those specified by |specs|. 414 func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats) (tableSet, error) { 415 merged := tableSet{ 416 novel: make(chunkSources, 0, len(ts.novel)), 417 upstream: make(chunkSources, 0, len(specs)), 418 p: ts.p, 419 rl: ts.rl, 420 } 421 422 // Rebase the novel tables, skipping those that are actually empty (usually due to de-duping during table compaction) 423 for _, t := range ts.novel { 424 cnt, err := t.count() 425 426 if err != nil { 427 return tableSet{}, err 428 } 429 430 if cnt > 0 { 431 merged.novel = append(merged.novel, t.Clone()) 432 } 433 } 434 435 // Create a list of tables to open so we can open them in parallel. 436 tablesToOpen := []tableSpec{} // keep specs in order to play nicely with manifest appendix optimization 437 presents := map[addr]tableSpec{} 438 for _, spec := range specs { 439 if _, present := presents[spec.name]; !present { // Filter out dups 440 tablesToOpen = append(tablesToOpen, spec) 441 presents[spec.name] = spec 442 } 443 } 444 445 // Open all the new upstream tables concurrently 446 var rp atomic.Value 447 ae := atomicerr.New() 448 merged.upstream = make(chunkSources, len(tablesToOpen)) 449 wg := &sync.WaitGroup{} 450 wg.Add(len(tablesToOpen)) 451 for i, spec := range tablesToOpen { 452 go func(idx int, spec tableSpec) { 453 defer wg.Done() 454 defer func() { 455 if r := recover(); r != nil { 456 rp.Store(r) 457 } 458 }() 459 if !ae.IsSet() { 460 var err error 461 for _, existing := range ts.upstream { 462 h, err := existing.hash() 463 if err != nil { 464 ae.SetIfError(err) 465 return 466 } 467 if spec.name == h { 468 merged.upstream[idx] = existing.Clone() 469 return 470 } 471 } 472 merged.upstream[idx], err = ts.p.Open(ctx, spec.name, spec.chunkCount, stats) 473 ae.SetIfError(err) 474 } 475 }(i, spec) 476 } 477 wg.Wait() 478 479 if r := rp.Load(); r != nil { 480 panic(r) 481 } 482 483 if err := ae.Get(); err != nil { 484 return tableSet{}, err 485 } 486 487 return merged, nil 488 } 489 490 func (ts tableSet) ToSpecs() ([]tableSpec, error) { 491 tableSpecs := make([]tableSpec, 0, ts.Size()) 492 for _, src := range ts.novel { 493 cnt, err := src.count() 494 495 if err != nil { 496 return nil, err 497 } 498 499 if cnt > 0 { 500 h, err := src.hash() 501 502 if err != nil { 503 return nil, err 504 } 505 506 tableSpecs = append(tableSpecs, tableSpec{h, cnt}) 507 } 508 } 509 for _, src := range ts.upstream { 510 cnt, err := src.count() 511 512 if err != nil { 513 return nil, err 514 } 515 516 if cnt <= 0 { 517 return nil, errors.New("no upstream chunks") 518 } 519 520 h, err := src.hash() 521 522 if err != nil { 523 return nil, err 524 } 525 526 tableSpecs = append(tableSpecs, tableSpec{h, cnt}) 527 } 528 return tableSpecs, nil 529 }