github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/file_table_persister.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "bytes" 26 "context" 27 "errors" 28 "io" 29 "io/fs" 30 "os" 31 "path" 32 "path/filepath" 33 "strings" 34 "sync" 35 "time" 36 37 "github.com/dolthub/dolt/go/libraries/utils/file" 38 "github.com/dolthub/dolt/go/store/chunks" 39 "github.com/dolthub/dolt/go/store/hash" 40 "github.com/dolthub/dolt/go/store/util/tempfiles" 41 ) 42 43 const tempTablePrefix = "nbs_table_" 44 45 func newFSTablePersister(dir string, q MemoryQuotaProvider) tablePersister { 46 return &fsTablePersister{dir, q, sync.Mutex{}, nil, make(map[string]struct{})} 47 } 48 49 type fsTablePersister struct { 50 dir string 51 q MemoryQuotaProvider 52 53 // Protects the following two maps. 54 removeMu sync.Mutex 55 // While we are running PruneTableFiles, any newly created table files are 56 // added to this map. The file delete loop will never delete anything which 57 // appears in this map. Files should be added to this map before they are 58 // written. 59 toKeep map[string]struct{} 60 // Any temp files we are currently writing are always present in this map. 61 // The logic should be taken before we generate the new temp file, and the 62 // new temp file should be added to this map. Care should be taken to always 63 // remove the entry from this map when we are done processing the temp file 64 // or else this map will grow without bound. 65 curTmps map[string]struct{} 66 } 67 68 var _ tablePersister = &fsTablePersister{} 69 var _ tableFilePersister = &fsTablePersister{} 70 71 func (ftp *fsTablePersister) Open(ctx context.Context, name hash.Hash, chunkCount uint32, stats *Stats) (chunkSource, error) { 72 return newFileTableReader(ctx, ftp.dir, name, chunkCount, ftp.q) 73 } 74 75 func (ftp *fsTablePersister) Exists(ctx context.Context, name hash.Hash, chunkCount uint32, stats *Stats) (bool, error) { 76 ftp.removeMu.Lock() 77 defer ftp.removeMu.Unlock() 78 if ftp.toKeep != nil { 79 ftp.toKeep[filepath.Join(ftp.dir, name.String())] = struct{}{} 80 } 81 return tableFileExists(ctx, ftp.dir, name) 82 } 83 84 func (ftp *fsTablePersister) Persist(ctx context.Context, mt *memTable, haver chunkReader, stats *Stats) (chunkSource, error) { 85 t1 := time.Now() 86 defer stats.PersistLatency.SampleTimeSince(t1) 87 88 name, data, chunkCount, err := mt.write(haver, stats) 89 if err != nil { 90 return emptyChunkSource{}, err 91 } 92 93 return ftp.persistTable(ctx, name, data, chunkCount, stats) 94 } 95 96 func (ftp *fsTablePersister) Path() string { 97 return ftp.dir 98 } 99 100 func (ftp *fsTablePersister) CopyTableFile(ctx context.Context, r io.Reader, fileId string, fileSz uint64, chunkCount uint32) error { 101 tn, f, err := func() (n string, cleanup func(), err error) { 102 ftp.removeMu.Lock() 103 var temp *os.File 104 temp, err = tempfiles.MovableTempFileProvider.NewFile(ftp.dir, tempTablePrefix) 105 if err != nil { 106 ftp.removeMu.Unlock() 107 return "", func() {}, err 108 } 109 ftp.curTmps[filepath.Clean(temp.Name())] = struct{}{} 110 ftp.removeMu.Unlock() 111 112 cleanup = func() { 113 ftp.removeMu.Lock() 114 delete(ftp.curTmps, filepath.Clean(temp.Name())) 115 ftp.removeMu.Unlock() 116 } 117 118 defer func() { 119 cerr := temp.Close() 120 if err == nil { 121 err = cerr 122 } 123 }() 124 125 _, err = io.Copy(temp, r) 126 if err != nil { 127 return "", cleanup, err 128 } 129 130 err = temp.Sync() 131 if err != nil { 132 return "", cleanup, err 133 } 134 135 return temp.Name(), cleanup, nil 136 }() 137 defer f() 138 if err != nil { 139 return err 140 } 141 142 path := filepath.Join(ftp.dir, fileId) 143 ftp.removeMu.Lock() 144 if ftp.toKeep != nil { 145 ftp.toKeep[filepath.Clean(path)] = struct{}{} 146 } 147 defer ftp.removeMu.Unlock() 148 return file.Rename(tn, path) 149 } 150 151 func (ftp *fsTablePersister) TryMoveCmpChunkTableWriter(ctx context.Context, filename string, w *CmpChunkTableWriter) error { 152 path := filepath.Join(ftp.dir, filename) 153 ftp.removeMu.Lock() 154 if ftp.toKeep != nil { 155 ftp.toKeep[filepath.Clean(path)] = struct{}{} 156 } 157 defer ftp.removeMu.Unlock() 158 return w.FlushToFile(path) 159 } 160 161 func (ftp *fsTablePersister) persistTable(ctx context.Context, name hash.Hash, data []byte, chunkCount uint32, stats *Stats) (cs chunkSource, err error) { 162 if chunkCount == 0 { 163 return emptyChunkSource{}, nil 164 } 165 166 tempName, f, err := func() (tempName string, cleanup func(), ferr error) { 167 ftp.removeMu.Lock() 168 var temp *os.File 169 temp, ferr = tempfiles.MovableTempFileProvider.NewFile(ftp.dir, tempTablePrefix) 170 if ferr != nil { 171 ftp.removeMu.Unlock() 172 return "", func() {}, ferr 173 } 174 ftp.curTmps[filepath.Clean(temp.Name())] = struct{}{} 175 ftp.removeMu.Unlock() 176 177 cleanup = func() { 178 ftp.removeMu.Lock() 179 delete(ftp.curTmps, filepath.Clean(temp.Name())) 180 ftp.removeMu.Unlock() 181 } 182 183 defer func() { 184 closeErr := temp.Close() 185 if ferr == nil { 186 ferr = closeErr 187 } 188 }() 189 190 _, ferr = io.Copy(temp, bytes.NewReader(data)) 191 if ferr != nil { 192 return "", cleanup, ferr 193 } 194 195 ferr = temp.Sync() 196 if ferr != nil { 197 return "", cleanup, ferr 198 } 199 200 return temp.Name(), cleanup, nil 201 }() 202 defer f() 203 if err != nil { 204 return nil, err 205 } 206 207 newName := filepath.Join(ftp.dir, name.String()) 208 ftp.removeMu.Lock() 209 if ftp.toKeep != nil { 210 ftp.toKeep[filepath.Clean(newName)] = struct{}{} 211 } 212 err = file.Rename(tempName, newName) 213 ftp.removeMu.Unlock() 214 if err != nil { 215 return nil, err 216 } 217 218 return ftp.Open(ctx, name, chunkCount, stats) 219 } 220 221 func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) { 222 plan, err := planRangeCopyConjoin(sources, stats) 223 if err != nil { 224 return emptyChunkSource{}, nil, err 225 } 226 227 if plan.chunkCount == 0 { 228 return emptyChunkSource{}, func() {}, nil 229 } 230 231 name := nameFromSuffixes(plan.suffixes()) 232 tempName, f, err := func() (tempName string, cleanup func(), ferr error) { 233 ftp.removeMu.Lock() 234 var temp *os.File 235 temp, ferr = tempfiles.MovableTempFileProvider.NewFile(ftp.dir, tempTablePrefix) 236 if ferr != nil { 237 ftp.removeMu.Unlock() 238 return "", func() {}, ferr 239 } 240 ftp.curTmps[filepath.Clean(temp.Name())] = struct{}{} 241 ftp.removeMu.Unlock() 242 243 cleanup = func() { 244 ftp.removeMu.Lock() 245 delete(ftp.curTmps, filepath.Clean(temp.Name())) 246 ftp.removeMu.Unlock() 247 } 248 249 defer func() { 250 closeErr := temp.Close() 251 if ferr == nil { 252 ferr = closeErr 253 } 254 }() 255 256 for _, sws := range plan.sources.sws { 257 var r io.ReadCloser 258 r, _, ferr = sws.source.reader(ctx) 259 if ferr != nil { 260 return "", cleanup, ferr 261 } 262 263 n, ferr := io.CopyN(temp, r, int64(sws.dataLen)) 264 if ferr != nil { 265 r.Close() 266 return "", cleanup, ferr 267 } 268 269 if uint64(n) != sws.dataLen { 270 r.Close() 271 return "", cleanup, errors.New("failed to copy all data") 272 } 273 274 err := r.Close() 275 if err != nil { 276 return "", cleanup, err 277 } 278 } 279 280 _, ferr = temp.Write(plan.mergedIndex) 281 282 if ferr != nil { 283 return "", cleanup, ferr 284 } 285 286 ferr = temp.Sync() 287 if ferr != nil { 288 return "", cleanup, ferr 289 } 290 291 return temp.Name(), cleanup, nil 292 }() 293 defer f() 294 if err != nil { 295 return nil, nil, err 296 } 297 298 path := filepath.Join(ftp.dir, name.String()) 299 ftp.removeMu.Lock() 300 if ftp.toKeep != nil { 301 ftp.toKeep[filepath.Clean(path)] = struct{}{} 302 } 303 err = file.Rename(tempName, path) 304 if err != nil { 305 return nil, nil, err 306 } 307 ftp.removeMu.Unlock() 308 309 cs, err := ftp.Open(ctx, name, plan.chunkCount, stats) 310 if err != nil { 311 return nil, nil, err 312 } 313 return cs, func() { 314 for _, s := range sources { 315 file.Remove(filepath.Join(ftp.dir, s.hash().String())) 316 } 317 }, nil 318 } 319 320 func (ftp *fsTablePersister) PruneTableFiles(ctx context.Context, keeper func() []hash.Hash, mtime time.Time) error { 321 ftp.removeMu.Lock() 322 if ftp.toKeep != nil { 323 ftp.removeMu.Unlock() 324 return errors.New("shallow gc already in progress") 325 } 326 ftp.toKeep = make(map[string]struct{}) 327 ftp.removeMu.Unlock() 328 329 defer func() { 330 ftp.removeMu.Lock() 331 ftp.toKeep = nil 332 ftp.removeMu.Unlock() 333 }() 334 335 toKeep := make(map[string]struct{}) 336 for _, k := range keeper() { 337 toKeep[filepath.Clean(filepath.Join(ftp.dir, k.String()))] = struct{}{} 338 } 339 340 ftp.removeMu.Lock() 341 for f := range toKeep { 342 ftp.toKeep[f] = struct{}{} 343 } 344 ftp.removeMu.Unlock() 345 346 fileInfos, err := os.ReadDir(ftp.dir) 347 if err != nil { 348 return err 349 } 350 351 ea := make(gcErrAccum) 352 353 unfilteredTableFiles := make([]string, 0) 354 unfilteredTempFiles := make([]string, 0) 355 356 for _, info := range fileInfos { 357 if info.IsDir() { 358 continue 359 } 360 361 filePath := path.Join(ftp.dir, info.Name()) 362 363 if strings.HasPrefix(info.Name(), tempTablePrefix) { 364 unfilteredTempFiles = append(unfilteredTempFiles, filePath) 365 continue 366 } 367 368 if len(info.Name()) != 32 { 369 continue // not a table file 370 } 371 372 if _, ok := hash.MaybeParse(info.Name()); !ok { 373 continue // not a table file 374 } 375 376 i, err := info.Info() 377 if err != nil { 378 ea.add(filePath, err) 379 continue 380 } 381 382 ctime := i.ModTime() 383 if ctime.After(mtime) { 384 continue // file has been updated more recently than our cutoff time 385 } 386 387 unfilteredTableFiles = append(unfilteredTableFiles, filePath) 388 } 389 390 for _, p := range unfilteredTempFiles { 391 ftp.removeMu.Lock() 392 if _, ok := ftp.curTmps[filepath.Clean(p)]; !ok { 393 err := file.Remove(p) 394 if err != nil && !errors.Is(err, fs.ErrNotExist) { 395 ea.add(p, err) 396 } 397 } 398 ftp.removeMu.Unlock() 399 } 400 401 for _, p := range unfilteredTableFiles { 402 ftp.removeMu.Lock() 403 if _, ok := ftp.toKeep[filepath.Clean(p)]; !ok { 404 err := file.Remove(p) 405 if err != nil && !errors.Is(err, fs.ErrNotExist) { 406 ea.add(p, err) 407 } 408 } 409 ftp.removeMu.Unlock() 410 } 411 412 if !ea.isEmpty() { 413 return ea 414 } 415 416 return nil 417 } 418 419 func (ftp *fsTablePersister) Close() error { 420 return nil 421 } 422 423 func (ftp *fsTablePersister) AccessMode() chunks.ExclusiveAccessMode { 424 return chunks.ExclusiveAccessMode_Shared 425 }