github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/tools/blocksconvert/builder/series.go (about)

     1  package builder
     2  
     3  import (
     4  	"encoding/gob"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"sort"
     9  	"sync"
    10  
    11  	"github.com/golang/snappy"
    12  	"github.com/prometheus/prometheus/pkg/labels"
    13  	"github.com/prometheus/prometheus/tsdb/chunks"
    14  	tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
    15  )
    16  
    17  type series struct {
    18  	// All fields must be exported for serialization to work properly.
    19  	Metric  labels.Labels
    20  	Chunks  []chunks.Meta
    21  	MinTime int64
    22  	MaxTime int64
    23  	Samples uint64
    24  }
    25  
    26  // Keeps series in memory until limit is reached. Then series are sorted, and written to the file.
    27  // Each batch goes to different file.
    28  // When series are iterated, all files are merged (which is easy to do, as they are already sorted).
    29  // Symbols are written to different set of files, they are also sorted, merged and deduplicated on iteration.
    30  type seriesList struct {
    31  	limit int
    32  	dir   string
    33  
    34  	mu           sync.Mutex
    35  	sers         []series
    36  	seriesFiles  []string
    37  	symbolsFiles []string
    38  }
    39  
    40  func newSeriesList(limit int, dir string) *seriesList {
    41  	return &seriesList{
    42  		limit: limit,
    43  		dir:   dir,
    44  	}
    45  }
    46  
    47  func (sl *seriesList) addSeries(m labels.Labels, cs []chunks.Meta, samples uint64, minTime, maxTime int64) error {
    48  	sl.mu.Lock()
    49  	defer sl.mu.Unlock()
    50  
    51  	sl.sers = append(sl.sers, series{
    52  		Metric:  m,
    53  		Chunks:  cs,
    54  		MinTime: minTime,
    55  		MaxTime: maxTime,
    56  		Samples: samples,
    57  	})
    58  
    59  	return sl.flushSeriesNoLock(false)
    60  }
    61  
    62  func (sl *seriesList) unflushedSeries() int {
    63  	sl.mu.Lock()
    64  	defer sl.mu.Unlock()
    65  
    66  	return len(sl.sers)
    67  }
    68  
    69  func (sl *seriesList) flushSeries() error {
    70  	sl.mu.Lock()
    71  	defer sl.mu.Unlock()
    72  
    73  	return sl.flushSeriesNoLock(true)
    74  }
    75  
    76  func (sl *seriesList) flushSeriesNoLock(force bool) error {
    77  	if !force && len(sl.sers) < sl.limit {
    78  		return nil
    79  	}
    80  
    81  	// Sort series by labels first.
    82  	sort.Slice(sl.sers, func(i, j int) bool {
    83  		return labels.Compare(sl.sers[i].Metric, sl.sers[j].Metric) < 0
    84  	})
    85  
    86  	seriesFile := filepath.Join(sl.dir, fmt.Sprintf("series_%d", len(sl.seriesFiles)))
    87  	symbols, err := writeSeries(seriesFile, sl.sers)
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	sl.sers = nil
    93  	sl.seriesFiles = append(sl.seriesFiles, seriesFile)
    94  
    95  	// No error so far, let's write symbols too.
    96  	sortedSymbols := make([]string, 0, len(symbols))
    97  	for k := range symbols {
    98  		sortedSymbols = append(sortedSymbols, k)
    99  	}
   100  
   101  	sort.Strings(sortedSymbols)
   102  
   103  	symbolsFile := filepath.Join(sl.dir, fmt.Sprintf("symbols_%d", len(sl.symbolsFiles)))
   104  	err = writeSymbols(symbolsFile, sortedSymbols)
   105  	if err == nil {
   106  		sl.symbolsFiles = append(sl.symbolsFiles, symbolsFile)
   107  	}
   108  
   109  	return err
   110  }
   111  
   112  func writeSymbols(filename string, symbols []string) error {
   113  	f, err := os.Create(filename)
   114  	if err != nil {
   115  		return err
   116  	}
   117  
   118  	sn := snappy.NewBufferedWriter(f)
   119  	enc := gob.NewEncoder(sn)
   120  
   121  	errs := tsdb_errors.NewMulti()
   122  
   123  	for _, s := range symbols {
   124  		err := enc.Encode(s)
   125  		if err != nil {
   126  			errs.Add(err)
   127  			break
   128  		}
   129  	}
   130  
   131  	errs.Add(sn.Close())
   132  	errs.Add(f.Close())
   133  	return errs.Err()
   134  }
   135  
   136  func writeSeries(filename string, sers []series) (map[string]struct{}, error) {
   137  	f, err := os.Create(filename)
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  
   142  	symbols := map[string]struct{}{}
   143  
   144  	errs := tsdb_errors.NewMulti()
   145  
   146  	sn := snappy.NewBufferedWriter(f)
   147  	enc := gob.NewEncoder(sn)
   148  
   149  	// Write each series as a separate object, so that we can read them back individually.
   150  	for _, ser := range sers {
   151  		for _, sym := range ser.Metric {
   152  			symbols[sym.Name] = struct{}{}
   153  			symbols[sym.Value] = struct{}{}
   154  		}
   155  
   156  		err := enc.Encode(ser)
   157  		if err != nil {
   158  			errs.Add(err)
   159  			break
   160  		}
   161  	}
   162  
   163  	errs.Add(sn.Close())
   164  	errs.Add(f.Close())
   165  
   166  	return symbols, errs.Err()
   167  }
   168  
   169  // Returns iterator over sorted list of symbols. Each symbol is returned once.
   170  func (sl *seriesList) symbolsIterator() (*symbolsIterator, error) {
   171  	sl.mu.Lock()
   172  	filenames := append([]string(nil), sl.symbolsFiles...)
   173  	sl.mu.Unlock()
   174  
   175  	files, err := openFiles(filenames)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	var result []*symbolsFile
   181  	for _, f := range files {
   182  		result = append(result, newSymbolsFile(f))
   183  	}
   184  
   185  	return newSymbolsIterator(result), nil
   186  }
   187  
   188  // Returns iterator over sorted list of series.
   189  func (sl *seriesList) seriesIterator() (*seriesIterator, error) {
   190  	sl.mu.Lock()
   191  	filenames := append([]string(nil), sl.seriesFiles...)
   192  	sl.mu.Unlock()
   193  
   194  	files, err := openFiles(filenames)
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  
   199  	var result []*seriesFile
   200  	for _, f := range files {
   201  		result = append(result, newSeriesFile(f))
   202  	}
   203  
   204  	return newSeriesIterator(result), nil
   205  }
   206  
   207  func openFiles(filenames []string) ([]*os.File, error) {
   208  	var result []*os.File
   209  
   210  	for _, fn := range filenames {
   211  		f, err := os.Open(fn)
   212  
   213  		if err != nil {
   214  			// Close opened files so far.
   215  			for _, sf := range result {
   216  				_ = sf.Close()
   217  			}
   218  			return nil, err
   219  		}
   220  
   221  		result = append(result, f)
   222  	}
   223  	return result, nil
   224  }