github.com/m3db/m3@v1.5.0/src/cmd/tools/read_index_segments/main/main.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package main 22 23 import ( 24 "fmt" 25 "io" 26 "io/ioutil" 27 golog "log" 28 "math" 29 "os" 30 "runtime" 31 "sync" 32 "time" 33 34 "github.com/m3db/m3/src/dbnode/persist" 35 "github.com/m3db/m3/src/dbnode/persist/fs" 36 "github.com/m3db/m3/src/query/util/json" 37 "github.com/m3db/m3/src/x/ident" 38 xsync "github.com/m3db/m3/src/x/sync" 39 "github.com/m3db/m3/src/x/unsafe" 40 41 "github.com/pborman/getopt" 42 "go.uber.org/zap" 43 ) 44 45 var ( 46 halfCPUs = int(math.Max(float64(runtime.GOMAXPROCS(0)/2), 1)) 47 endlineBytes = []byte("\n") 48 ) 49 50 func main() { 51 var ( 52 optPathPrefix = getopt.StringLong("path-prefix", 'p', "/var/lib/m3db", "Path prefix [e.g. /var/lib/m3db]") 53 optOutputFile = getopt.StringLong("output-file", 'o', "", "Output JSON file of line delimited JSON objects for each segment") 54 optValidate = getopt.BoolLong("validate", 'v', "Validate the segments, do not print out metadata") 55 optValidateConcurrency = getopt.IntLong("validate-concurrency", 'c', halfCPUs, "Validation concurrency") 56 ) 57 getopt.Parse() 58 59 logConfig := zap.NewDevelopmentConfig() 60 log, err := logConfig.Build() 61 if err != nil { 62 golog.Fatalf("unable to create logger: %+v", err) 63 } 64 65 if *optOutputFile != "" && *optValidate { 66 log.Error("cannot write output and validate, do not set output file if validating") 67 getopt.Usage() 68 os.Exit(1) 69 } 70 71 if *optPathPrefix == "" || (*optOutputFile == "" && !*optValidate) { 72 getopt.Usage() 73 os.Exit(1) 74 } 75 76 run(runOptions{ 77 filePathPrefix: *optPathPrefix, 78 outputFilePath: *optOutputFile, 79 validate: *optValidate, 80 validateConcurrency: *optValidateConcurrency, 81 log: log, 82 }) 83 } 84 85 type runOptions struct { 86 filePathPrefix string 87 outputFilePath string 88 validate bool 89 validateConcurrency int 90 log *zap.Logger 91 } 92 93 func run(opts runOptions) { 94 log := opts.log 95 96 fsOpts := fs.NewOptions(). 97 SetFilePathPrefix(opts.filePathPrefix). 98 // Always validate checksums before reading and/or validating contents 99 // regardless of whether this is a validation run or just reading 100 // the raw files. 101 SetIndexReaderAutovalidateIndexSegments(true) 102 103 indexDirPath := fs.IndexDataDirPath(opts.filePathPrefix) 104 105 namespaces, err := dirFiles(indexDirPath) 106 if err != nil { 107 log.Fatal("could not read namespaces", zap.Error(err)) 108 } 109 110 // Get all fileset files. 111 log.Info("discovered namespaces", zap.Strings("namespaces", namespaces)) 112 113 var ( 114 out io.Writer 115 validateWorkerPool xsync.WorkerPool 116 ) 117 if opts.validate { 118 // Only validating, output to dev null. 119 out = ioutil.Discard 120 validateWorkerPool = xsync.NewWorkerPool(opts.validateConcurrency) 121 validateWorkerPool.Init() 122 log.Info("validating segment files", 123 zap.Int("concurrency", opts.validateConcurrency)) 124 } else { 125 // Output to file. 126 out, err = os.Create(opts.outputFilePath) 127 if err != nil { 128 log.Fatal("unable to create output file", 129 zap.String("file", opts.outputFilePath), 130 zap.Error(err)) 131 } 132 log.Info("writing output JSON line delimited", 133 zap.String("path", opts.outputFilePath)) 134 } 135 136 for _, namespace := range namespaces { 137 log.Info("reading segments", zap.String("namespace", namespace)) 138 ns := ident.StringID(namespace) 139 140 readNamespaceSegments(out, opts.validate, validateWorkerPool, 141 ns, fsOpts, log) 142 143 // Separate by endline. 144 if _, err := out.Write(endlineBytes); err != nil { 145 log.Fatal("could not write endline", zap.Error(err)) 146 } 147 } 148 } 149 150 func readNamespaceSegments( 151 out io.Writer, 152 validate bool, 153 validateWorkerPool xsync.WorkerPool, 154 nsID ident.ID, 155 fsOpts fs.Options, 156 log *zap.Logger, 157 ) { 158 var ( 159 infoFiles = fs.ReadIndexInfoFiles(fs.ReadIndexInfoFilesOptions{ 160 FilePathPrefix: fsOpts.FilePathPrefix(), 161 Namespace: nsID, 162 ReaderBufferSize: fsOpts.InfoReaderBufferSize(), 163 }) 164 wg sync.WaitGroup 165 ) 166 167 for _, infoFile := range infoFiles { 168 if err := infoFile.Err.Error(); err != nil { 169 log.Error("unable to read index info file", 170 zap.Stringer("namespace", nsID), 171 zap.Error(err), 172 zap.String("filepath", infoFile.Err.Filepath()), 173 ) 174 continue 175 } 176 177 if !validate { 178 readBlockSegments(out, nsID, infoFile, fsOpts, log) 179 continue 180 } 181 182 // Validating, so use validation concurrency. 183 wg.Add(1) 184 validateWorkerPool.Go(func() { 185 defer wg.Done() 186 readBlockSegments(out, nsID, infoFile, fsOpts, log) 187 }) 188 } 189 190 // Wait for any concurrent validation. 191 wg.Wait() 192 } 193 194 func readBlockSegments( 195 out io.Writer, 196 nsID ident.ID, 197 infoFile fs.ReadIndexInfoFileResult, 198 fsOpts fs.Options, 199 log *zap.Logger, 200 ) { 201 // Make sure if we fatal or error out the exact block is known. 202 log = log.With( 203 zap.String("namespace", nsID.String()), 204 zap.String("blockStart", infoFile.ID.BlockStart.String()), 205 zap.Int64("blockStartUnixNano", int64(infoFile.ID.BlockStart)), 206 zap.Int("volumeIndex", infoFile.ID.VolumeIndex), 207 zap.Strings("files", infoFile.AbsoluteFilePaths)) 208 209 log.Info("reading block segments") 210 211 readResult, err := fs.ReadIndexSegments(fs.ReadIndexSegmentsOptions{ 212 ReaderOptions: fs.IndexReaderOpenOptions{ 213 Identifier: infoFile.ID, 214 FileSetType: persist.FileSetFlushType, 215 }, 216 FilesystemOptions: fsOpts, 217 }) 218 if err != nil { 219 log.Error("unable to read segments from index fileset", zap.Error(err)) 220 return 221 } 222 223 if readResult.Validated { 224 log.Info("validated segments") 225 } else { 226 log.Error("expected to validate segments but did not validate") 227 } 228 229 for i, seg := range readResult.Segments { 230 jw := json.NewWriter(out) 231 jw.BeginObject() 232 233 jw.BeginObjectField("namespace") 234 jw.WriteString(nsID.String()) 235 236 jw.BeginObjectField("blockStart") 237 jw.WriteString(time.Unix(0, infoFile.Info.BlockStart).Format(time.RFC3339)) 238 239 jw.BeginObjectField("volumeIndex") 240 jw.WriteInt(infoFile.ID.VolumeIndex) 241 242 jw.BeginObjectField("segmentIndex") 243 jw.WriteInt(i) 244 245 reader, err := seg.Reader() 246 if err != nil { 247 log.Fatal("unable to create segment reader", zap.Error(err)) 248 } 249 250 iter, err := reader.AllDocs() 251 if err != nil { 252 log.Fatal("unable to iterate segment docs", zap.Error(err)) 253 } 254 255 jw.BeginObjectField("documents") 256 jw.BeginArray() 257 for postingsID := 0; iter.Next(); postingsID++ { 258 d := iter.Current() 259 jw.BeginObject() 260 261 jw.BeginObjectField("postingsID") 262 jw.WriteInt(postingsID) 263 264 jw.BeginObjectField("id") 265 unsafe.WithString(d.ID, func(str string) { 266 jw.WriteString(str) 267 }) 268 269 jw.BeginObjectField("fields") 270 271 jw.BeginArray() 272 for _, field := range d.Fields { 273 jw.BeginObject() 274 275 jw.BeginObjectField("name") 276 unsafe.WithString(field.Name, func(str string) { 277 jw.WriteString(str) 278 }) 279 280 jw.BeginObjectField("value") 281 unsafe.WithString(field.Name, func(str string) { 282 jw.WriteString(str) 283 }) 284 285 jw.EndObject() 286 } 287 jw.EndArray() 288 289 jw.EndObject() 290 } 291 jw.EndArray() 292 293 if err := iter.Err(); err != nil { 294 log.Fatal("doc iterator error", zap.Error(err)) 295 } 296 if err := iter.Close(); err != nil { 297 log.Fatal("doc iterator close error", zap.Error(err)) 298 } 299 300 fieldsIter, err := seg.FieldsIterable().Fields() 301 if err != nil { 302 log.Fatal("could not create fields iterator", zap.Error(err)) 303 } 304 305 jw.BeginObjectField("fields") 306 jw.BeginArray() 307 for fieldsIter.Next() { 308 field := fieldsIter.Current() 309 310 jw.BeginObject() 311 jw.BeginObjectField("field") 312 unsafe.WithString(field, func(str string) { 313 jw.WriteString(str) 314 }) 315 316 termsIter, err := seg.TermsIterable().Terms(field) 317 if err != nil { 318 log.Fatal("could not create terms iterator", zap.Error(err)) 319 } 320 321 jw.BeginObjectField("terms") 322 jw.BeginArray() 323 for termsIter.Next() { 324 term, postingsList := termsIter.Current() 325 326 jw.BeginObject() 327 jw.BeginObjectField("term") 328 unsafe.WithString(term, func(str string) { 329 jw.WriteString(str) 330 }) 331 332 postingsIter := postingsList.Iterator() 333 334 jw.BeginObjectField("postings") 335 jw.BeginArray() 336 for postingsIter.Next() { 337 postingsID := postingsIter.Current() 338 jw.WriteInt(int(postingsID)) 339 } 340 jw.EndArray() 341 jw.EndObject() 342 343 if err := postingsIter.Err(); err != nil { 344 log.Fatal("postings iterator error", zap.Error(err)) 345 } 346 347 if err := postingsIter.Close(); err != nil { 348 log.Fatal("postings iterator close error", zap.Error(err)) 349 } 350 } 351 jw.EndArray() 352 jw.EndObject() 353 354 if err := termsIter.Err(); err != nil { 355 log.Fatal("field iterator error", zap.Error(err)) 356 } 357 358 if err := termsIter.Close(); err != nil { 359 log.Fatal("field iterator close error", zap.Error(err)) 360 } 361 } 362 jw.EndArray() 363 364 if err := fieldsIter.Err(); err != nil { 365 log.Fatal("field iterator error", zap.Error(err)) 366 } 367 368 if err := fieldsIter.Close(); err != nil { 369 log.Fatal("field iterator close error", zap.Error(err)) 370 } 371 372 jw.EndObject() 373 374 if err := jw.Flush(); err != nil { 375 log.Fatal("could not flush JSON writer", zap.Error(err)) 376 } 377 if err := jw.Close(); err != nil { 378 log.Fatal("could not close JSON writer", zap.Error(err)) 379 } 380 } 381 } 382 383 func dirFiles(dirPath string) ([]string, error) { 384 dir, err := os.Open(dirPath) 385 if err != nil { 386 return nil, fmt.Errorf("could not open dir: %v", err) 387 } 388 389 defer dir.Close() 390 391 stat, err := dir.Stat() 392 if err != nil { 393 return nil, fmt.Errorf("could not stat dir: %v", err) 394 } 395 if !stat.IsDir() { 396 return nil, fmt.Errorf("path is not a directory: %s", dirPath) 397 } 398 399 entries, err := dir.Readdirnames(-1) 400 if err != nil { 401 return nil, fmt.Errorf("could not read dir names: %v", err) 402 } 403 404 results := entries[:0] 405 for _, p := range entries { 406 if p == "." || p == ".." || p == "./.." || p == "./" || p == "../" || p == "./../" { 407 continue 408 } 409 results = append(results, p) 410 } 411 return results, nil 412 }