github.com/m3db/m3@v1.5.0/src/cmd/tools/split_index_shards/main/main.go (about) 1 // Copyright (c) 2021 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package main 22 23 import ( 24 "fmt" 25 iofs "io/fs" 26 "io/ioutil" 27 "log" 28 "os" 29 "path/filepath" 30 "regexp" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/pborman/getopt" 36 "go.uber.org/zap" 37 38 "github.com/m3db/m3/src/dbnode/digest" 39 "github.com/m3db/m3/src/dbnode/generated/proto/index" 40 "github.com/m3db/m3/src/dbnode/persist/fs" 41 xerrors "github.com/m3db/m3/src/x/errors" 42 "github.com/m3db/m3/src/x/ident" 43 xos "github.com/m3db/m3/src/x/os" 44 xtime "github.com/m3db/m3/src/x/time" 45 ) 46 47 var ( 48 checkpointPattern = regexp.MustCompile(`/index/data/(\w+)/fileset-([0-9]+)-([0-9]+)-checkpoint\.db$`) 49 genericPattern = regexp.MustCompile(`/index/data/(\w+)/fileset-([0-9]+)-([0-9]+)-.*\.db$`) 50 ) 51 52 func main() { 53 var ( 54 optPath = getopt.StringLong("path", 'p', "", "Index path [e.g. /temp/lib/m3db/index]") 55 optBlockUntil = getopt.Int64Long("block-until", 'b', 0, "Block Until Time, exclusive [in nsec]") 56 optSrcShards = getopt.Uint32Long("src-shards", 'h', 0, "Original (source) number of shards") 57 optFactor = getopt.IntLong("factor", 'f', 0, "Integer factor to increase the number of shards by") 58 ) 59 getopt.Parse() 60 61 rawLogger, err := zap.NewDevelopment() 62 if err != nil { 63 log.Fatalf("unable to create logger: %+v", err) 64 } 65 logger := rawLogger.Sugar() 66 67 if *optPath == "" || 68 *optBlockUntil <= 0 || 69 *optSrcShards == 0 || 70 *optFactor <= 0 { 71 getopt.Usage() 72 os.Exit(1) 73 } 74 75 var ( 76 filesetLocation = dropIndexSuffix(*optPath) 77 fsOpts = fs.NewOptions().SetFilePathPrefix(filesetLocation) 78 ) 79 80 start := time.Now() 81 82 // Delete filesets of all blocks starting from optBlockUntil (inclusive). 83 if err := filepath.WalkDir(*optPath, func(path string, d iofs.DirEntry, err error) error { 84 if err != nil || d.IsDir() { 85 return err 86 } 87 pathParts := genericPattern.FindStringSubmatch(path) 88 if len(pathParts) != 4 { 89 return fmt.Errorf("failed to parse path %s", path) 90 } 91 92 blockStart, err := strconv.Atoi(pathParts[2]) 93 if err != nil { 94 return err 95 } 96 97 if blockStart >= int(*optBlockUntil) { 98 fmt.Printf("%s - deleting too recent %s\n", time.Now().Local(), path) // nolint: forbidigo 99 return os.Remove(path) 100 } 101 102 return nil 103 }); err != nil { 104 logger.Fatalf("unable to walk the source dir: %+v", err) 105 } 106 107 // Update info files of all blocks before optBlockUntil. 108 if err := filepath.WalkDir(*optPath, func(path string, d iofs.DirEntry, err error) error { 109 if err != nil || d.IsDir() || !strings.HasSuffix(d.Name(), "-checkpoint.db") { 110 return err 111 } 112 fmt.Printf("%s - updating fileset of %s\n", time.Now().Local(), path) // nolint: forbidigo 113 pathParts := checkpointPattern.FindStringSubmatch(path) 114 if len(pathParts) != 4 { 115 return fmt.Errorf("failed to parse path %s", path) 116 } 117 118 var ( 119 namespace = pathParts[1] 120 blockStart, err1 = strconv.Atoi(pathParts[2]) 121 volume, err2 = strconv.Atoi(pathParts[3]) 122 ) 123 if err = xerrors.FirstError(err1, err2); err != nil { 124 return err 125 } 126 127 if blockStart >= int(*optBlockUntil) { 128 return fmt.Errorf( 129 "encountered fileset too recent to split (should have been deleted in previous step): %s", 130 path) 131 } 132 133 namespaceDir := fs.NamespaceIndexDataDirPath(filesetLocation, ident.StringID(namespace)) 134 135 if err = updateIndexInfoFile( 136 namespaceDir, xtime.UnixNano(blockStart), volume, *optSrcShards, *optFactor, fsOpts); err != nil { 137 if strings.Contains(err.Error(), "no such file or directory") { 138 fmt.Println(" - skip (incomplete fileset)") // nolint: forbidigo 139 return nil 140 } 141 return err 142 } 143 144 return err 145 }); err != nil { 146 logger.Fatalf("unable to walk the source dir: %+v", err) 147 } 148 149 runTime := time.Since(start) 150 fmt.Printf("Running time: %s\n", runTime) // nolint: forbidigo 151 } 152 153 func updateIndexInfoFile( 154 namespaceDir string, 155 blockStart xtime.UnixNano, 156 volume int, 157 srcNumShards uint32, 158 factor int, 159 fsOpts fs.Options, 160 ) error { 161 var ( 162 infoFilePath = fs.FilesetPathFromTimeAndIndex(namespaceDir, blockStart, volume, fs.InfoFileSuffix) 163 digestFilePath = fs.FilesetPathFromTimeAndIndex(namespaceDir, blockStart, volume, fs.DigestFileSuffix) 164 checkpointFilePath = fs.FilesetPathFromTimeAndIndex(namespaceDir, blockStart, volume, fs.CheckpointFileSuffix) 165 166 info = index.IndexVolumeInfo{} 167 digests = index.IndexDigests{} 168 ) 169 170 digestsData, err := ioutil.ReadFile(digestFilePath) // nolint: gosec 171 if err != nil { 172 return err 173 } 174 if err = digests.Unmarshal(digestsData); err != nil { 175 return err 176 } 177 178 infoData, err := ioutil.ReadFile(infoFilePath) // nolint: gosec 179 if err != nil { 180 return err 181 } 182 if err = info.Unmarshal(infoData); err != nil { 183 return err 184 } 185 186 var newShards []uint32 187 for _, srcShard := range info.Shards { 188 if srcShard >= srcNumShards { 189 return fmt.Errorf("unexpected source shard ID %d (must be under %d)", srcShard, srcNumShards) 190 } 191 for i := 0; i < factor; i++ { 192 dstShard := mapToDstShard(srcNumShards, i, srcShard) 193 newShards = append(newShards, dstShard) 194 } 195 } 196 info.Shards = newShards 197 198 newInfoData, err := info.Marshal() 199 if err != nil { 200 return err 201 } 202 203 if err = xos.WriteFileSync(infoFilePath, newInfoData, fsOpts.NewFileMode()); err != nil { 204 return err 205 } 206 207 digests.InfoDigest = digest.Checksum(newInfoData) 208 newDigestsData, err := digests.Marshal() 209 if err != nil { 210 return err 211 } 212 213 if err = ioutil.WriteFile(digestFilePath, newDigestsData, fsOpts.NewFileMode()); err != nil { 214 return err 215 } 216 217 digestBuffer := digest.NewBuffer() 218 digestBuffer.WriteDigest(digest.Checksum(newDigestsData)) 219 220 return ioutil.WriteFile(checkpointFilePath, digestBuffer, fsOpts.NewFileMode()) 221 } 222 223 func mapToDstShard(srcNumShards uint32, i int, srcShard uint32) uint32 { 224 return srcNumShards*uint32(i) + srcShard 225 } 226 227 func dropIndexSuffix(path string) string { 228 idx := strings.LastIndex(path, "/index") 229 if idx < 0 { 230 return path 231 } 232 return path[:idx] 233 }