github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_sideload_disk.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "fmt" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 23 "github.com/cockroachdb/cockroach/pkg/storage" 24 "github.com/cockroachdb/errors" 25 "golang.org/x/time/rate" 26 ) 27 28 var _ SideloadStorage = &diskSideloadStorage{} 29 30 type diskSideloadStorage struct { 31 st *cluster.Settings 32 limiter *rate.Limiter 33 dir string 34 dirCreated bool 35 eng storage.Engine 36 } 37 38 func deprecatedSideloadedPath( 39 baseDir string, rangeID roachpb.RangeID, replicaID roachpb.ReplicaID, 40 ) string { 41 return filepath.Join( 42 baseDir, 43 "sideloading", 44 fmt.Sprintf("%d", rangeID%1000), // sharding 45 fmt.Sprintf("%d.%d", rangeID, replicaID), 46 ) 47 } 48 49 func sideloadedPath(baseDir string, rangeID roachpb.RangeID) string { 50 // Use one level of sharding to avoid too many items per directory. For 51 // example, ext3 and older ext4 support only 32k and 64k subdirectories 52 // per directory, respectively. Newer FS typically have no such limitation, 53 // but still. 54 // 55 // For example, r1828 will end up in baseDir/r1XXX/r1828. 56 return filepath.Join( 57 baseDir, 58 "sideloading", 59 fmt.Sprintf("r%dXXXX", rangeID/10000), // sharding 60 fmt.Sprintf("r%d", rangeID), 61 ) 62 } 63 64 func exists(path string) (bool, error) { 65 _, err := os.Stat(path) 66 if err == nil { 67 return true, nil 68 } 69 if os.IsNotExist(err) { 70 return false, nil 71 } 72 return false, err 73 } 74 75 func newDiskSideloadStorage( 76 st *cluster.Settings, 77 rangeID roachpb.RangeID, 78 replicaID roachpb.ReplicaID, 79 baseDir string, 80 limiter *rate.Limiter, 81 eng storage.Engine, 82 ) (*diskSideloadStorage, error) { 83 path := deprecatedSideloadedPath(baseDir, rangeID, replicaID) 84 newPath := sideloadedPath(baseDir, rangeID) 85 // NB: this call to exists() is in the hot path when the server starts 86 // as it will be called once for each replica. However, during steady 87 // state (i.e. when the version variable hasn't *just* flipped), we're 88 // expecting `path` to not exist (since it refers to the legacy path at 89 // the moment). A stat call for a directory that doesn't exist isn't 90 // very expensive (on the order of 1000s of ns). For example, on a 2017 91 // MacBook Pro, this case averages ~3245ns and on a gceworker it's 92 // ~1200ns. At 50k replicas, that's on the order of a tenth of a second; 93 // not enough to matter. 94 // 95 // On the other hand, successful (i.e. directory found) calls take ~23k 96 // ns on my laptop, but only around 2.2k ns on the gceworker. Still, 97 // even on the laptop, 50k replicas would only add 1.2s which is also 98 // acceptable given that it'll happen only once. 99 exists, err := exists(path) 100 if err != nil { 101 return nil, errors.Wrap(err, "checking pre-migration sideloaded directory") 102 } 103 if exists { 104 if err := os.MkdirAll(filepath.Dir(newPath), 0755); err != nil { 105 return nil, errors.Wrap(err, "creating migrated sideloaded directory") 106 } 107 if err := os.Rename(path, newPath); err != nil { 108 return nil, errors.Wrap(err, "while migrating sideloaded directory") 109 } 110 } 111 path = newPath 112 113 ss := &diskSideloadStorage{ 114 dir: path, 115 eng: eng, 116 st: st, 117 limiter: limiter, 118 } 119 return ss, nil 120 } 121 122 func (ss *diskSideloadStorage) createDir() error { 123 err := os.MkdirAll(ss.dir, 0755) 124 ss.dirCreated = ss.dirCreated || err == nil 125 return err 126 } 127 128 // Dir implements SideloadStorage. 129 func (ss *diskSideloadStorage) Dir() string { 130 return ss.dir 131 } 132 133 // Put implements SideloadStorage. 134 func (ss *diskSideloadStorage) Put(ctx context.Context, index, term uint64, contents []byte) error { 135 filename := ss.filename(ctx, index, term) 136 // There's a chance the whole path is missing (for example after Clear()), 137 // in which case handle that transparently. 138 for { 139 // Use 0644 since that's what RocksDB uses: 140 // https://github.com/facebook/rocksdb/blob/56656e12d67d8a63f1e4c4214da9feeec2bd442b/env/env_posix.cc#L171 141 if err := writeFileSyncing(ctx, filename, contents, ss.eng, 0644, ss.st, ss.limiter); err == nil { 142 return nil 143 } else if !os.IsNotExist(err) { 144 return err 145 } 146 // createDir() ensures ss.dir exists but will not create any subdirectories 147 // within ss.dir because filename() does not make subdirectories in ss.dir. 148 if err := ss.createDir(); err != nil { 149 return err 150 } 151 continue 152 } 153 } 154 155 // Get implements SideloadStorage. 156 func (ss *diskSideloadStorage) Get(ctx context.Context, index, term uint64) ([]byte, error) { 157 filename := ss.filename(ctx, index, term) 158 b, err := ss.eng.ReadFile(filename) 159 if os.IsNotExist(err) { 160 return nil, errSideloadedFileNotFound 161 } 162 return b, err 163 } 164 165 // Filename implements SideloadStorage. 166 func (ss *diskSideloadStorage) Filename(ctx context.Context, index, term uint64) (string, error) { 167 return ss.filename(ctx, index, term), nil 168 } 169 170 func (ss *diskSideloadStorage) filename(ctx context.Context, index, term uint64) string { 171 return filepath.Join(ss.dir, fmt.Sprintf("i%d.t%d", index, term)) 172 } 173 174 // Purge implements SideloadStorage. 175 func (ss *diskSideloadStorage) Purge(ctx context.Context, index, term uint64) (int64, error) { 176 return ss.purgeFile(ctx, ss.filename(ctx, index, term)) 177 } 178 179 func (ss *diskSideloadStorage) fileSize(filename string) (int64, error) { 180 // TODO(tschottdorf): this should all be done through the env. As written, 181 // the sizes returned here will be wrong if encryption is on. We want the 182 // size of the unencrypted payload. 183 // 184 // See #31913. 185 info, err := os.Stat(filename) 186 if err != nil { 187 if os.IsNotExist(err) { 188 return 0, errSideloadedFileNotFound 189 } 190 return 0, err 191 } 192 return info.Size(), nil 193 } 194 195 func (ss *diskSideloadStorage) purgeFile(ctx context.Context, filename string) (int64, error) { 196 size, err := ss.fileSize(filename) 197 if err != nil { 198 return 0, err 199 } 200 if err := ss.eng.Remove(filename); err != nil { 201 if os.IsNotExist(err) { 202 return 0, errSideloadedFileNotFound 203 } 204 return 0, err 205 } 206 return size, nil 207 } 208 209 // Clear implements SideloadStorage. 210 func (ss *diskSideloadStorage) Clear(_ context.Context) error { 211 // TODO(jackson): Update this and the rest of `os.` filesystem calls in 212 // this impl to use ss.eng. 213 err := os.RemoveAll(ss.dir) 214 ss.dirCreated = ss.dirCreated && err != nil 215 return err 216 } 217 218 // TruncateTo implements SideloadStorage. 219 func (ss *diskSideloadStorage) TruncateTo( 220 ctx context.Context, firstIndex uint64, 221 ) (bytesFreed, bytesRetained int64, _ error) { 222 deletedAll := true 223 if err := ss.forEach(ctx, func(index uint64, filename string) error { 224 if index >= firstIndex { 225 size, err := ss.fileSize(filename) 226 if err != nil { 227 return err 228 } 229 bytesRetained += size 230 deletedAll = false 231 return nil 232 } 233 fileSize, err := ss.purgeFile(ctx, filename) 234 if err != nil { 235 return err 236 } 237 bytesFreed += fileSize 238 return nil 239 }); err != nil { 240 return 0, 0, err 241 } 242 243 if deletedAll { 244 // The directory may not exist, or it may exist and have been empty. 245 // Not worth trying to figure out which one, just try to delete. 246 err := os.Remove(ss.dir) 247 if !os.IsNotExist(err) { 248 return bytesFreed, 0, errors.Wrapf(err, "while purging %q", ss.dir) 249 } 250 } 251 return bytesFreed, bytesRetained, nil 252 } 253 254 func (ss *diskSideloadStorage) forEach( 255 ctx context.Context, visit func(index uint64, filename string) error, 256 ) error { 257 matches, err := filepath.Glob(filepath.Join(ss.dir, "i*.t*")) 258 if err != nil { 259 return err 260 } 261 for _, match := range matches { 262 base := filepath.Base(match) 263 if len(base) < 1 || base[0] != 'i' { 264 continue 265 } 266 base = base[1:] 267 upToDot := strings.SplitN(base, ".", 2) 268 logIdx, err := strconv.ParseUint(upToDot[0], 10, 64) 269 if err != nil { 270 return errors.Wrapf(err, "while parsing %q during TruncateTo", match) 271 } 272 if err := visit(logIdx, match); err != nil { 273 return errors.Wrapf(err, "matching pattern %q", match) 274 } 275 } 276 return nil 277 } 278 279 // String lists the files in the storage without guaranteeing an ordering. 280 func (ss *diskSideloadStorage) String() string { 281 var buf strings.Builder 282 var count int 283 if err := ss.forEach(context.Background(), func(_ uint64, filename string) error { 284 count++ 285 _, _ = fmt.Fprintln(&buf, filename) 286 return nil 287 }); err != nil { 288 return err.Error() 289 } 290 fmt.Fprintf(&buf, "(%d files)\n", count) 291 return buf.String() 292 }