github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/tools/blocksconvert/builder/series.go (about) 1 package builder 2 3 import ( 4 "encoding/gob" 5 "fmt" 6 "os" 7 "path/filepath" 8 "sort" 9 "sync" 10 11 "github.com/golang/snappy" 12 "github.com/prometheus/prometheus/pkg/labels" 13 "github.com/prometheus/prometheus/tsdb/chunks" 14 tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" 15 ) 16 17 type series struct { 18 // All fields must be exported for serialization to work properly. 19 Metric labels.Labels 20 Chunks []chunks.Meta 21 MinTime int64 22 MaxTime int64 23 Samples uint64 24 } 25 26 // Keeps series in memory until limit is reached. Then series are sorted, and written to the file. 27 // Each batch goes to different file. 28 // When series are iterated, all files are merged (which is easy to do, as they are already sorted). 29 // Symbols are written to different set of files, they are also sorted, merged and deduplicated on iteration. 30 type seriesList struct { 31 limit int 32 dir string 33 34 mu sync.Mutex 35 sers []series 36 seriesFiles []string 37 symbolsFiles []string 38 } 39 40 func newSeriesList(limit int, dir string) *seriesList { 41 return &seriesList{ 42 limit: limit, 43 dir: dir, 44 } 45 } 46 47 func (sl *seriesList) addSeries(m labels.Labels, cs []chunks.Meta, samples uint64, minTime, maxTime int64) error { 48 sl.mu.Lock() 49 defer sl.mu.Unlock() 50 51 sl.sers = append(sl.sers, series{ 52 Metric: m, 53 Chunks: cs, 54 MinTime: minTime, 55 MaxTime: maxTime, 56 Samples: samples, 57 }) 58 59 return sl.flushSeriesNoLock(false) 60 } 61 62 func (sl *seriesList) unflushedSeries() int { 63 sl.mu.Lock() 64 defer sl.mu.Unlock() 65 66 return len(sl.sers) 67 } 68 69 func (sl *seriesList) flushSeries() error { 70 sl.mu.Lock() 71 defer sl.mu.Unlock() 72 73 return sl.flushSeriesNoLock(true) 74 } 75 76 func (sl *seriesList) flushSeriesNoLock(force bool) error { 77 if !force && len(sl.sers) < sl.limit { 78 return nil 79 } 80 81 // Sort series by labels first. 82 sort.Slice(sl.sers, func(i, j int) bool { 83 return labels.Compare(sl.sers[i].Metric, sl.sers[j].Metric) < 0 84 }) 85 86 seriesFile := filepath.Join(sl.dir, fmt.Sprintf("series_%d", len(sl.seriesFiles))) 87 symbols, err := writeSeries(seriesFile, sl.sers) 88 if err != nil { 89 return err 90 } 91 92 sl.sers = nil 93 sl.seriesFiles = append(sl.seriesFiles, seriesFile) 94 95 // No error so far, let's write symbols too. 96 sortedSymbols := make([]string, 0, len(symbols)) 97 for k := range symbols { 98 sortedSymbols = append(sortedSymbols, k) 99 } 100 101 sort.Strings(sortedSymbols) 102 103 symbolsFile := filepath.Join(sl.dir, fmt.Sprintf("symbols_%d", len(sl.symbolsFiles))) 104 err = writeSymbols(symbolsFile, sortedSymbols) 105 if err == nil { 106 sl.symbolsFiles = append(sl.symbolsFiles, symbolsFile) 107 } 108 109 return err 110 } 111 112 func writeSymbols(filename string, symbols []string) error { 113 f, err := os.Create(filename) 114 if err != nil { 115 return err 116 } 117 118 sn := snappy.NewBufferedWriter(f) 119 enc := gob.NewEncoder(sn) 120 121 errs := tsdb_errors.NewMulti() 122 123 for _, s := range symbols { 124 err := enc.Encode(s) 125 if err != nil { 126 errs.Add(err) 127 break 128 } 129 } 130 131 errs.Add(sn.Close()) 132 errs.Add(f.Close()) 133 return errs.Err() 134 } 135 136 func writeSeries(filename string, sers []series) (map[string]struct{}, error) { 137 f, err := os.Create(filename) 138 if err != nil { 139 return nil, err 140 } 141 142 symbols := map[string]struct{}{} 143 144 errs := tsdb_errors.NewMulti() 145 146 sn := snappy.NewBufferedWriter(f) 147 enc := gob.NewEncoder(sn) 148 149 // Write each series as a separate object, so that we can read them back individually. 150 for _, ser := range sers { 151 for _, sym := range ser.Metric { 152 symbols[sym.Name] = struct{}{} 153 symbols[sym.Value] = struct{}{} 154 } 155 156 err := enc.Encode(ser) 157 if err != nil { 158 errs.Add(err) 159 break 160 } 161 } 162 163 errs.Add(sn.Close()) 164 errs.Add(f.Close()) 165 166 return symbols, errs.Err() 167 } 168 169 // Returns iterator over sorted list of symbols. Each symbol is returned once. 170 func (sl *seriesList) symbolsIterator() (*symbolsIterator, error) { 171 sl.mu.Lock() 172 filenames := append([]string(nil), sl.symbolsFiles...) 173 sl.mu.Unlock() 174 175 files, err := openFiles(filenames) 176 if err != nil { 177 return nil, err 178 } 179 180 var result []*symbolsFile 181 for _, f := range files { 182 result = append(result, newSymbolsFile(f)) 183 } 184 185 return newSymbolsIterator(result), nil 186 } 187 188 // Returns iterator over sorted list of series. 189 func (sl *seriesList) seriesIterator() (*seriesIterator, error) { 190 sl.mu.Lock() 191 filenames := append([]string(nil), sl.seriesFiles...) 192 sl.mu.Unlock() 193 194 files, err := openFiles(filenames) 195 if err != nil { 196 return nil, err 197 } 198 199 var result []*seriesFile 200 for _, f := range files { 201 result = append(result, newSeriesFile(f)) 202 } 203 204 return newSeriesIterator(result), nil 205 } 206 207 func openFiles(filenames []string) ([]*os.File, error) { 208 var result []*os.File 209 210 for _, fn := range filenames { 211 f, err := os.Open(fn) 212 213 if err != nil { 214 // Close opened files so far. 215 for _, sf := range result { 216 _ = sf.Close() 217 } 218 return nil, err 219 } 220 221 result = append(result, f) 222 } 223 return result, nil 224 }