github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/verify_data_files/main/main.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package main 22 23 import ( 24 "fmt" 25 "io" 26 golog "log" 27 "os" 28 "path" 29 "sort" 30 "strconv" 31 "unicode/utf8" 32 33 "github.com/m3db/m3/src/cmd/tools" 34 "github.com/m3db/m3/src/dbnode/digest" 35 "github.com/m3db/m3/src/dbnode/persist" 36 "github.com/m3db/m3/src/dbnode/persist/fs" 37 "github.com/m3db/m3/src/dbnode/storage/index/convert" 38 "github.com/m3db/m3/src/x/checked" 39 "github.com/m3db/m3/src/x/ident" 40 "github.com/m3db/m3/src/x/pool" 41 42 "github.com/pborman/getopt" 43 "go.uber.org/zap" 44 "go.uber.org/zap/zapcore" 45 ) 46 47 func main() { 48 var ( 49 optPathPrefix = getopt.StringLong("path-prefix", 'p', "/var/lib/m3db", "Path prefix [e.g. /var/lib/m3db]") 50 optFailFast = getopt.BoolLong("fail-fast", 'f', "Fail fast will bail on first failure") 51 optFixDir = getopt.StringLong("fix-path-prefix", 'o', "/tmp/m3db", "Fix output path file prefix for fixed files [e.g. /tmp/m3db]") 52 optFixInvalidIDs = getopt.BoolLong("fix-invalid-ids", 'i', "Fix invalid IDs will remove entries with IDs that have non-UTF8 chars") 53 optFixInvalidTags = getopt.BoolLong("fix-invalid-tags", 't', "Fix invalid tags will remove entries with tags that have name/values non-UTF8 chars") 54 optFixInvalidChecksums = getopt.BoolLong("fix-invalid-checksums", 'c', "Fix invalid checksums will remove entries with bad checksums") 55 optDebugLog = getopt.BoolLong("debug", 'd', "Enable debug log level") 56 ) 57 getopt.Parse() 58 59 logLevel := zapcore.InfoLevel 60 if *optDebugLog { 61 logLevel = zapcore.DebugLevel 62 } 63 64 logConfig := zap.NewDevelopmentConfig() 65 logConfig.Level = zap.NewAtomicLevelAt(logLevel) 66 log, err := logConfig.Build() 67 if err != nil { 68 golog.Fatalf("unable to create logger: %+v", err) 69 } 70 71 if *optPathPrefix == "" { 72 getopt.Usage() 73 os.Exit(1) 74 } 75 76 log.Info("creating bytes pool") 77 bytesPool := tools.NewCheckedBytesPool() 78 bytesPool.Init() 79 80 run(runOptions{ 81 filePathPrefix: *optPathPrefix, 82 failFast: *optFailFast, 83 fixDir: *optFixDir, 84 fixInvalidIDs: *optFixInvalidIDs, 85 fixInvalidTags: *optFixInvalidTags, 86 fixInvalidChecksums: *optFixInvalidChecksums, 87 bytesPool: bytesPool, 88 log: log, 89 }) 90 } 91 92 type runOptions struct { 93 filePathPrefix string 94 failFast bool 95 fixDir string 96 fixInvalidIDs bool 97 fixInvalidTags bool 98 fixInvalidChecksums bool 99 bytesPool pool.CheckedBytesPool 100 log *zap.Logger 101 } 102 103 func run(opts runOptions) { 104 filePathPrefix := opts.filePathPrefix 105 bytesPool := opts.bytesPool 106 log := opts.log 107 108 dataDirPath := fs.DataDirPath(filePathPrefix) 109 110 namespaces, err := dirFiles(dataDirPath) 111 if err != nil { 112 log.Fatal("could not read namespaces", zap.Error(err)) 113 } 114 115 // Get all fileset files. 116 log.Info("discovering file sets", 117 zap.Strings("namespaces", namespaces)) 118 var fileSetFiles []fs.FileSetFile 119 for _, namespace := range namespaces { 120 namespacePath := path.Join(dataDirPath, namespace) 121 shards, err := dirFiles(namespacePath) 122 if err != nil { 123 log.Fatal("could not read shards for namespace", 124 zap.String("namespacePath", namespacePath), 125 zap.Error(err)) 126 } 127 128 log.Debug("discovered shards", 129 zap.String("namespace", namespace), 130 zap.String("namespacePath", namespacePath), 131 zap.Strings("shards", shards)) 132 for _, shard := range shards { 133 shardPath := path.Join(namespacePath, shard) 134 shardID, err := strconv.Atoi(shard) 135 if err != nil { 136 log.Fatal("could not parse shard dir as int", 137 zap.String("shardPath", shardPath), zap.Error(err)) 138 } 139 140 shardFileSets, err := fs.DataFiles(filePathPrefix, 141 ident.StringID(namespace), uint32(shardID)) 142 if err != nil { 143 log.Fatal("could not list shard dir file setes", 144 zap.String("shardPath", shardPath), zap.Error(err)) 145 } 146 147 log.Debug("discovered shard file sets", 148 zap.String("namespace", namespace), 149 zap.String("namespacePath", namespacePath), 150 zap.Int("shardID", shardID), 151 zap.Any("fileSets", shardFileSets)) 152 fileSetFiles = append(fileSetFiles, shardFileSets...) 153 } 154 } 155 156 // Sort by time in reverse (usually want to fix latest files first and 157 // can stop once done with fail-fast). 158 log.Info("sorting file sets", zap.Int("numFileSets", len(fileSetFiles))) 159 sort.Slice(fileSetFiles, func(i, j int) bool { 160 return fileSetFiles[i].ID.BlockStart.After(fileSetFiles[j].ID.BlockStart) 161 }) 162 163 log.Info("verifying file sets", zap.Int("numFileSets", len(fileSetFiles))) 164 for _, fileSet := range fileSetFiles { 165 if !fileSet.HasCompleteCheckpointFile() { 166 continue // Don't validate file sets without checkpoint file. 167 } 168 169 log.Info("verifying file set file", zap.Any("fileSet", fileSet)) 170 if err := verifyFileSet(verifyFileSetOptions{ 171 filePathPrefix: filePathPrefix, 172 bytesPool: bytesPool, 173 fileSet: fileSet, 174 fixDir: opts.fixDir, 175 fixInvalidIDs: opts.fixInvalidIDs, 176 fixInvalidTags: opts.fixInvalidTags, 177 fixInvalidChecksums: opts.fixInvalidChecksums, 178 }, log); err != nil { 179 log.Error("file set file failed verification", 180 zap.Error(err), 181 zap.Any("fileSet", fileSet)) 182 183 if opts.failFast { 184 log.Fatal("aborting due to fail fast set") 185 } 186 } 187 } 188 } 189 190 func dirFiles(dirPath string) ([]string, error) { 191 dir, err := os.Open(dirPath) 192 if err != nil { 193 return nil, fmt.Errorf("could not open dir: %v", err) 194 } 195 196 defer dir.Close() 197 198 stat, err := dir.Stat() 199 if err != nil { 200 return nil, fmt.Errorf("could not stat dir: %v", err) 201 } 202 if !stat.IsDir() { 203 return nil, fmt.Errorf("path is not a directory: %s", dirPath) 204 } 205 206 entries, err := dir.Readdirnames(-1) 207 if err != nil { 208 return nil, fmt.Errorf("could not read dir names: %v", err) 209 } 210 211 results := entries[:0] 212 for _, p := range entries { 213 if p == "." || p == ".." || p == "./.." || p == "./" || p == "../" || p == "./../" { 214 continue 215 } 216 results = append(results, p) 217 } 218 return results, nil 219 } 220 221 type verifyFileSetOptions struct { 222 filePathPrefix string 223 bytesPool pool.CheckedBytesPool 224 fileSet fs.FileSetFile 225 226 fixDir string 227 fixInvalidIDs bool 228 fixInvalidTags bool 229 fixInvalidChecksums bool 230 } 231 232 func verifyFileSet( 233 opts verifyFileSetOptions, 234 log *zap.Logger, 235 ) error { 236 fsOpts := fs.NewOptions().SetFilePathPrefix(opts.filePathPrefix) 237 reader, err := fs.NewReader(opts.bytesPool, fsOpts) 238 if err != nil { 239 return err 240 } 241 242 fileSet := opts.fileSet 243 244 openOpts := fs.DataReaderOpenOptions{ 245 Identifier: fileSet.ID, 246 FileSetType: persist.FileSetFlushType, 247 } 248 249 err = reader.Open(openOpts) 250 if err != nil { 251 return err 252 } 253 254 defer reader.Close() 255 256 for { 257 id, tags, data, checksum, err := reader.Read() 258 if err == io.EOF { 259 break 260 } 261 if err != nil { 262 return err 263 } 264 265 check, err := readEntry(id, tags, data, checksum) 266 data.Finalize() // Always finalize data. 267 if err == nil { 268 continue 269 } 270 271 shouldFixInvalidID := check.invalidID && opts.fixInvalidIDs 272 shouldFixInvalidTags := check.invalidTags && opts.fixInvalidTags 273 shouldFixInvalidChecksum := check.invalidChecksum && opts.fixInvalidChecksums 274 if !shouldFixInvalidID && !shouldFixInvalidTags && !shouldFixInvalidChecksum { 275 return err 276 } 277 278 log.Info("starting to fix file set", zap.Any("fileSet", fileSet)) 279 fixErr := fixFileSet(opts, log) 280 if fixErr != nil { 281 log.Error("could not fix file set", 282 zap.Any("fileSet", fileSet), zap.Error(fixErr)) 283 return err 284 } 285 286 log.Info("fixed file set", zap.Any("fileSet", fileSet)) 287 return err 288 } 289 290 return nil 291 } 292 293 type readEntryResult struct { 294 invalidID bool 295 invalidTags bool 296 invalidChecksum bool 297 } 298 299 func readEntry( 300 id ident.ID, 301 tags ident.TagIterator, 302 data checked.Bytes, 303 checksum uint32, 304 ) (readEntryResult, error) { 305 idValue := id.Bytes() 306 if len(idValue) == 0 { 307 return readEntryResult{invalidID: true}, 308 fmt.Errorf("invalid id: err=%s, as_string=%s, as_hex=%x", 309 "empty", idValue, idValue) 310 } 311 if !utf8.Valid(idValue) { 312 return readEntryResult{invalidID: true}, 313 fmt.Errorf("invalid id: err=%s, as_string=%s, as_hex=%x", 314 "non-utf8", idValue, idValue) 315 } 316 317 for tags.Next() { 318 tag := tags.Current() 319 if err := convert.ValidateSeriesTag(tag); err != nil { 320 return readEntryResult{invalidTags: true}, 321 fmt.Errorf("invalid tag: err=%v, "+ 322 "name_as_string=%s, name_as_hex=%s"+ 323 "value_as_string=%s, value_as_hex=%s", 324 err, 325 tag.Name.Bytes(), tag.Name.Bytes(), 326 tag.Value.Bytes(), tag.Value.Bytes()) 327 } 328 } 329 330 data.IncRef() 331 calculatedChecksum := digest.Checksum(data.Bytes()) 332 data.DecRef() 333 334 if calculatedChecksum != checksum { 335 return readEntryResult{invalidChecksum: true}, 336 fmt.Errorf("data checksum invalid: actual=%v, expected=%v", 337 calculatedChecksum, checksum) 338 } 339 return readEntryResult{}, nil 340 } 341 342 func fixFileSet( 343 opts verifyFileSetOptions, 344 log *zap.Logger, 345 ) error { 346 fsOpts := fs.NewOptions().SetFilePathPrefix(opts.filePathPrefix) 347 reader, err := fs.NewReader(opts.bytesPool, fsOpts) 348 if err != nil { 349 return err 350 } 351 352 fileSet := opts.fileSet 353 354 openOpts := fs.DataReaderOpenOptions{ 355 Identifier: fileSet.ID, 356 FileSetType: persist.FileSetFlushType, 357 } 358 359 err = reader.Open(openOpts) 360 if err != nil { 361 return err 362 } 363 364 defer reader.Close() 365 366 // NOTE: we output to a new directory so that we don't clobber files. 367 writeFsOpts := fsOpts.SetFilePathPrefix(opts.fixDir) 368 writer, err := fs.NewWriter(writeFsOpts) 369 if err != nil { 370 return err 371 } 372 373 err = writer.Open(fs.DataWriterOpenOptions{ 374 FileSetType: persist.FileSetFlushType, 375 FileSetContentType: fileSet.ID.FileSetContentType, 376 Identifier: fileSet.ID, 377 BlockSize: reader.Status().BlockSize, 378 }) 379 if err != nil { 380 return err 381 } 382 383 success := false 384 defer func() { 385 if !success { 386 writer.Close() 387 } 388 }() 389 390 var ( 391 removedIDs int 392 removedTags int 393 copies []checked.Bytes 394 ) 395 for { 396 id, tags, data, checksum, err := reader.Read() 397 if err == io.EOF { 398 break 399 } 400 if err != nil { 401 return err 402 } 403 404 tagsCopy := tags.Duplicate() 405 406 check, err := readEntry(id, tags, data, checksum) 407 if err != nil { 408 shouldFixInvalidID := check.invalidID && opts.fixInvalidIDs 409 shouldFixInvalidTags := check.invalidTags && opts.fixInvalidTags 410 shouldFixInvalidChecksum := check.invalidChecksum && opts.fixInvalidChecksums 411 log.Info("read entry for fix", 412 zap.Bool("shouldFixInvalidID", shouldFixInvalidID), 413 zap.Bool("shouldFixInvalidTags", shouldFixInvalidTags), 414 zap.Bool("shouldFixInvalidChecksum", shouldFixInvalidChecksum)) 415 416 if shouldFixInvalidID || shouldFixInvalidTags || shouldFixInvalidChecksum { 417 // Skip this entry being written to the target volume. 418 removedIDs++ 419 continue 420 } 421 422 return fmt.Errorf("encountered an error not enabled to fix: %v", err) 423 } 424 425 metadata := persist.NewMetadataFromIDAndTagIterator(id, tagsCopy, 426 persist.MetadataOptions{ 427 FinalizeID: true, 428 FinalizeTagIterator: true, 429 }) 430 431 data.IncRef() 432 err = writer.Write(metadata, data, checksum) 433 data.DecRef() 434 if err != nil { 435 return fmt.Errorf("could not write fixed file set entry: %v", err) 436 } 437 438 // Finalize data to release back to pool. 439 data.Finalize() 440 441 // Release our copies back to pool. 442 for _, copy := range copies { 443 copy.DecRef() 444 copy.Finalize() 445 } 446 copies = copies[:0] 447 } 448 449 log.Info("finished fixing file set", 450 zap.Any("fileSet", fileSet), 451 zap.Int("removedIDs", removedIDs), 452 zap.Int("removedTags", removedTags)) 453 454 success = true 455 return writer.Close() 456 }