github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/read_index_files/main/main.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package main 22 23 import ( 24 "bufio" 25 "bytes" 26 "fmt" 27 "io" 28 "log" 29 "os" 30 "regexp" 31 "sort" 32 "strconv" 33 34 "github.com/m3db/m3/src/dbnode/persist" 35 "github.com/m3db/m3/src/dbnode/persist/fs" 36 "github.com/m3db/m3/src/m3ninx/doc" 37 m3ninxpersist "github.com/m3db/m3/src/m3ninx/persist" 38 "github.com/m3db/m3/src/x/ident" 39 xtime "github.com/m3db/m3/src/x/time" 40 41 "github.com/pborman/getopt" 42 "go.uber.org/zap" 43 ) 44 45 func main() { 46 var ( 47 optPathPrefix = getopt.StringLong("path-prefix", 'p', "/var/lib/m3db", "Path prefix [e.g. /var/lib/m3db]") 48 optNamespace = getopt.StringLong("namespace", 'n', "metrics", "Namespace [e.g. metrics]") 49 optBlockstart = getopt.Int64Long("block-start", 'b', 0, "Block Start Time [in nsec]") 50 optVolumeIndex = getopt.Int64Long("volume-index", 'v', 0, "Volume index") 51 optLargeFieldLimit = getopt.Int64Long("large-field-limit", 'l', 0, "Large Field Limit (non-zero to display fields with num terms > limit)") 52 optOutputIdsPrefix = getopt.StringLong("output-ids-prefix", 'o', "", "If set, it emits all terms for the _m3ninx_id field.") 53 optSkipValidateIntegrity = getopt.BoolLong("skip-validate-integrity", 's', "If set will skip integrity validation on segment open") 54 ) 55 getopt.Parse() 56 57 rawLogger, err := zap.NewDevelopment() 58 if err != nil { 59 log.Fatalf("unable to create logger: %+v", err) 60 } 61 log := rawLogger.Sugar() 62 63 if *optPathPrefix == "" || 64 *optNamespace == "" || 65 *optBlockstart <= 0 { 66 getopt.Usage() 67 os.Exit(1) 68 } 69 70 fsOpts := fs.NewOptions(). 71 SetFilePathPrefix(*optPathPrefix). 72 SetIndexReaderAutovalidateIndexSegments(!*optSkipValidateIntegrity) 73 reader, err := fs.NewIndexReader(fsOpts) 74 if err != nil { 75 log.Fatalf("could not create new index reader: %v", err) 76 } 77 78 openOpts := fs.IndexReaderOpenOptions{ 79 Identifier: fs.FileSetFileIdentifier{ 80 FileSetContentType: persist.FileSetIndexContentType, 81 Namespace: ident.StringID(*optNamespace), 82 BlockStart: xtime.UnixNano(*optBlockstart), 83 VolumeIndex: int(*optVolumeIndex), 84 }, 85 } 86 87 result, err := reader.Open(openOpts) 88 if err != nil { 89 log.Fatalf("unable to open reader: %v", err) 90 } 91 92 shards := make([]int, 0, len(result.Shards)) 93 for shard := range result.Shards { 94 shards = append(shards, int(shard)) 95 } 96 sort.Ints(shards) 97 98 log.Infof("shards: %v, volumeType: %s", shards, reader.IndexVolumeType()) 99 100 i := 0 101 for { 102 i++ 103 log.Info("opening index segment file set") 104 fileset, err := reader.ReadSegmentFileSet() 105 if err == io.EOF { 106 break 107 } 108 if err != nil { 109 log.Fatalf("unable to retrieve fileset: %v", err) 110 } 111 112 log.Info("validating index segment file set") 113 if err := reader.Validate(); err != nil { 114 log.Fatalf("error validating segment file set: %v", err) 115 } 116 117 seg, err := m3ninxpersist.NewSegment(fileset, fsOpts.FSTOptions()) 118 if err != nil { 119 log.Fatalf("unable to open segment reader: %v", err) 120 } 121 defer seg.Close() 122 123 var ( 124 idsFile *os.File 125 idsWriter *bufio.Writer 126 ) 127 if *optOutputIdsPrefix != "" { 128 idsFile, err = os.Create(fmt.Sprintf("%s-ids-segment-%d.out", *optOutputIdsPrefix, i)) 129 if err != nil { 130 log.Fatalf("unable to create output ids file: %v", err) 131 } 132 idsWriter = bufio.NewWriter(idsFile) 133 defer func() { 134 idsWriter.Flush() 135 idsFile.Sync() 136 if err := idsFile.Close(); err != nil { 137 log.Fatalf("error closing ids file: %v", err) 138 } 139 }() 140 } 141 142 fields, err := seg.FieldsIterable().Fields() 143 if err != nil { 144 log.Fatalf("unable to retrieve segment fields: %v", err) 145 } 146 147 type largeField struct { 148 field string 149 numTerms int 150 } 151 var largeFields []largeField 152 var termLens ints 153 var numFields int 154 for fields.Next() { 155 numFields++ 156 157 f := fields.Current() 158 terms, err := seg.TermsIterable().Terms(f) 159 if err != nil { 160 log.Fatalf("unable to retrieve segment term: %v", err) 161 } 162 163 numTerms := 0 164 isNameField := bytes.Equal(doc.IDReservedFieldName, f) 165 for terms.Next() { 166 numTerms++ 167 168 if isNameField && idsWriter != nil { 169 // ids output 170 t, _ := terms.Current() 171 idsWriter.Write(t) 172 idsWriter.WriteByte('\n') 173 } 174 } 175 176 // large field output 177 if *optLargeFieldLimit > 0 && numTerms > int(*optLargeFieldLimit) { 178 largeFields = append(largeFields, largeField{ 179 field: string(f), 180 numTerms: numTerms, 181 }) 182 } 183 termLens = append(termLens, numTerms) 184 } 185 186 summary := termLens.summary() 187 log.Infof("Segment: [%v], Size: [%v], NumFields: [%v], Num Terms: [%+v]", i, 188 formatCommas(int(seg.Size())), formatCommas(numFields), summary) 189 if *optLargeFieldLimit > 0 { 190 log.Infof("Large fields: %+v", largeFields) 191 } 192 } 193 } 194 195 type summaryStats struct { 196 max float64 197 min float64 198 average float64 199 median float64 200 } 201 202 type ints []int 203 204 func (vals ints) summary() summaryStats { 205 res := summaryStats{} 206 sort.Ints(vals) 207 if len(vals)%2 == 1 { 208 res.median = float64(vals[len(vals)/2]) 209 } else { 210 res.median = (float64(vals[len(vals)/2]) + float64(vals[(1+len(vals))/2])) / 2 211 } 212 res.min = float64(vals[0]) 213 res.max = float64(vals[len(vals)-1]) 214 sum := 0 215 for _, val := range vals { 216 sum += val 217 } 218 res.average = float64(sum) / float64(len(vals)) 219 return res 220 } 221 222 func formatCommas(num int) string { 223 str := strconv.Itoa(num) 224 re := regexp.MustCompile(`(\d+)(\d{3})`) 225 for i := 0; i < (len(str)-1)/3; i++ { 226 str = re.ReplaceAllString(str, "$1,$2") 227 } 228 return str 229 }