github.com/thanos-io/thanos@v0.32.5/pkg/store/local.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package store 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "io" 11 "math" 12 "sort" 13 14 "github.com/go-kit/log" 15 "github.com/go-kit/log/level" 16 "github.com/gogo/protobuf/jsonpb" 17 "github.com/pkg/errors" 18 "github.com/prometheus/prometheus/model/labels" 19 "github.com/prometheus/prometheus/tsdb/fileutil" 20 "google.golang.org/grpc/codes" 21 "google.golang.org/grpc/status" 22 23 "github.com/thanos-io/thanos/pkg/component" 24 "github.com/thanos-io/thanos/pkg/runutil" 25 "github.com/thanos-io/thanos/pkg/store/labelpb" 26 "github.com/thanos-io/thanos/pkg/store/storepb" 27 ) 28 29 // LocalStore implements the store API against single file with stream of proto-based SeriesResponses in JSON format. 30 // Inefficient implementation for quick StoreAPI view. 31 // Chunk order is exactly the same as in a given file. 32 type LocalStore struct { 33 logger log.Logger 34 extLabels labels.Labels 35 36 info *storepb.InfoResponse 37 c io.Closer 38 39 // TODO(bwplotka): This is very naive in-memory DB. We can support much larger files, by 40 // indexing labels, symbolizing strings and get chunk refs only without storing protobufs in memory. 41 // For small debug purposes, this is good enough. 42 series []*storepb.Series 43 sortedChunks [][]int 44 } 45 46 // TODO(bwplotka): Add remote read so Prometheus users can use this. Potentially after streaming will be added 47 // https://github.com/prometheus/prometheus/issues/5926. 48 // TODO(bwplotka): Consider non mmaped version of this, as well different versions. 49 func NewLocalStoreFromJSONMmappableFile( 50 logger log.Logger, 51 component component.StoreAPI, 52 extLabels labels.Labels, 53 path string, 54 split bufio.SplitFunc, 55 ) (*LocalStore, error) { 56 f, err := fileutil.OpenMmapFile(path) 57 if err != nil { 58 return nil, err 59 } 60 defer func() { 61 if err != nil { 62 runutil.CloseWithErrCapture(&err, f, "json file %s close", path) 63 } 64 }() 65 66 s := &LocalStore{ 67 logger: logger, 68 extLabels: extLabels, 69 c: f, 70 info: &storepb.InfoResponse{ 71 LabelSets: []labelpb.ZLabelSet{ 72 {Labels: labelpb.ZLabelsFromPromLabels(extLabels)}, 73 }, 74 StoreType: component.ToProto(), 75 MinTime: math.MaxInt64, 76 MaxTime: math.MinInt64, 77 }, 78 } 79 80 // Do quick pass for in-mem index. 81 content := f.Bytes() 82 contentStart := bytes.Index(content, []byte("{")) 83 if contentStart != -1 { 84 content = content[contentStart:] 85 } 86 87 if idx := bytes.LastIndex(content, []byte("}")); idx != -1 { 88 content = content[:idx+1] 89 } 90 91 skanner := NewNoCopyScanner(content, split) 92 resp := &storepb.SeriesResponse{} 93 for skanner.Scan() { 94 if err := jsonpb.Unmarshal(bytes.NewReader(skanner.Bytes()), resp); err != nil { 95 return nil, errors.Wrapf(err, "unmarshal storepb.SeriesResponse frame for file %s", path) 96 } 97 series := resp.GetSeries() 98 if series == nil { 99 level.Warn(logger).Log("msg", "not a valid series", "frame", resp.String()) 100 continue 101 } 102 chks := make([]int, 0, len(series.Chunks)) 103 // Sort chunks in separate slice by MinTime for easier lookup. Find global max and min. 104 for ci, c := range series.Chunks { 105 if s.info.MinTime > c.MinTime { 106 s.info.MinTime = c.MinTime 107 } 108 if s.info.MaxTime < c.MaxTime { 109 s.info.MaxTime = c.MaxTime 110 } 111 chks = append(chks, ci) 112 } 113 114 sort.Slice(chks, func(i, j int) bool { 115 return series.Chunks[chks[i]].MinTime < series.Chunks[chks[j]].MinTime 116 }) 117 s.series = append(s.series, series) 118 s.sortedChunks = append(s.sortedChunks, chks) 119 } 120 121 if err := skanner.Err(); err != nil { 122 return nil, errors.Wrapf(err, "scanning file %s", path) 123 } 124 level.Info(logger).Log("msg", "loading JSON file succeeded", "file", path, "info", s.info.String(), "series", len(s.series)) 125 return s, nil 126 } 127 128 // ScanGRPCCurlProtoStreamMessages allows to tokenize each streamed gRPC message from grpcurl tool. 129 func ScanGRPCCurlProtoStreamMessages(data []byte, atEOF bool) (advance int, token []byte, err error) { 130 var delim = []byte(`} 131 {`) 132 if atEOF && len(data) == 0 { 133 return 0, nil, nil 134 } 135 if idx := bytes.LastIndex(data, delim); idx != -1 { 136 return idx + 2, data[:idx+1], nil 137 } 138 // If we're at EOF, let's return all. 139 if atEOF { 140 return len(data), data, nil 141 } 142 // Incomplete; get more bytes. 143 return len(delim), nil, nil 144 } 145 146 // Info returns store information about the Prometheus instance. 147 func (s *LocalStore) Info(_ context.Context, _ *storepb.InfoRequest) (*storepb.InfoResponse, error) { 148 return s.info, nil 149 } 150 151 // Series returns all series for a requested time range and label matcher. The returned data may 152 // exceed the requested time bounds. 153 func (s *LocalStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { 154 match, matchers, err := matchesExternalLabels(r.Matchers, s.extLabels) 155 if err != nil { 156 return status.Error(codes.InvalidArgument, err.Error()) 157 } 158 if !match { 159 return nil 160 } 161 if len(matchers) == 0 { 162 return status.Error(codes.InvalidArgument, errors.New("no matchers specified (excluding external labels)").Error()) 163 } 164 165 var chosen []int 166 for si, series := range s.series { 167 lbls := labelpb.ZLabelsToPromLabels(series.Labels) 168 var noMatch bool 169 for _, m := range matchers { 170 extValue := lbls.Get(m.Name) 171 if extValue == "" { 172 continue 173 } 174 if !m.Matches(extValue) { 175 noMatch = true 176 break 177 } 178 } 179 if noMatch { 180 continue 181 } 182 183 chosen = chosen[:0] 184 resp := &storepb.Series{ 185 // Copy labels as in-process clients like proxy tend to work on same memory for labels. 186 Labels: labelpb.DeepCopy(series.Labels), 187 Chunks: make([]storepb.AggrChunk, 0, len(s.sortedChunks[si])), 188 } 189 190 for _, ci := range s.sortedChunks[si] { 191 if series.Chunks[ci].MaxTime < r.MinTime { 192 continue 193 } 194 if series.Chunks[ci].MinTime > r.MaxTime { 195 continue 196 } 197 chosen = append(chosen, ci) 198 } 199 200 sort.Ints(chosen) 201 for _, ci := range chosen { 202 resp.Chunks = append(resp.Chunks, series.Chunks[ci]) 203 } 204 205 if err := srv.Send(storepb.NewSeriesResponse(resp)); err != nil { 206 return status.Error(codes.Aborted, err.Error()) 207 } 208 } 209 return nil 210 } 211 212 // LabelNames returns all known label names. 213 func (s *LocalStore) LabelNames(_ context.Context, _ *storepb.LabelNamesRequest) ( 214 *storepb.LabelNamesResponse, error, 215 ) { 216 // TODO(bwplotka): Consider precomputing. 217 names := map[string]struct{}{} 218 for _, series := range s.series { 219 for _, l := range series.Labels { 220 names[l.Name] = struct{}{} 221 } 222 } 223 resp := &storepb.LabelNamesResponse{} 224 for n := range names { 225 resp.Names = append(resp.Names, n) 226 } 227 return resp, nil 228 } 229 230 // LabelValues returns all known label values for a given label name. 231 func (s *LocalStore) LabelValues(_ context.Context, r *storepb.LabelValuesRequest) ( 232 *storepb.LabelValuesResponse, error, 233 ) { 234 vals := map[string]struct{}{} 235 for _, series := range s.series { 236 lbls := labelpb.ZLabelsToPromLabels(series.Labels) 237 val := lbls.Get(r.Label) 238 if val == "" { 239 continue 240 } 241 vals[val] = struct{}{} 242 } 243 resp := &storepb.LabelValuesResponse{} 244 for val := range vals { 245 resp.Values = append(resp.Values, val) 246 } 247 return resp, nil 248 } 249 250 func (s *LocalStore) Close() (err error) { 251 return s.c.Close() 252 } 253 254 type noCopyScanner struct { 255 b []byte 256 splitFunc bufio.SplitFunc 257 258 start, end int 259 err error 260 261 token []byte 262 } 263 264 // NewNoCopyScanner returns bufio.Scanner-like scanner that is meant to be used on already allocated byte slice (or mmapped) 265 // one. Returned tokens are shared. 266 func NewNoCopyScanner(b []byte, splitFunc bufio.SplitFunc) *noCopyScanner { 267 return &noCopyScanner{ 268 b: b, 269 splitFunc: splitFunc, 270 start: 0, 271 end: 0, 272 } 273 } 274 275 func (s *noCopyScanner) Scan() bool { 276 if s.start >= len(s.b) { 277 return false 278 } 279 280 advance := 1 281 for s.end+advance < len(s.b) { 282 s.end += advance 283 284 advance, s.token, s.err = s.splitFunc(s.b[s.start:s.end], false) 285 if s.err != nil { 286 return false 287 } 288 289 if len(s.token) > 0 { 290 s.start += advance 291 s.end = s.start 292 return true 293 } 294 } 295 296 _, s.token, s.err = s.splitFunc(s.b[s.start:], true) 297 if s.err != nil { 298 return false 299 } 300 s.start = len(s.b) 301 return len(s.token) > 0 302 } 303 304 func (s *noCopyScanner) Bytes() []byte { 305 return s.token 306 } 307 308 func (s *noCopyScanner) Err() error { 309 return s.err 310 }