github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/cmd/migrate/main.go (about) 1 package main 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "log" 8 "net/http" 9 _ "net/http/pprof" 10 "os" 11 "sort" 12 "sync" 13 "time" 14 15 "github.com/prometheus/client_golang/prometheus" 16 "github.com/prometheus/common/model" 17 18 "github.com/prometheus/prometheus/model/labels" 19 "github.com/weaveworks/common/user" 20 21 "github.com/grafana/loki/pkg/logql/syntax" 22 "github.com/grafana/loki/pkg/loki" 23 "github.com/grafana/loki/pkg/storage" 24 "github.com/grafana/loki/pkg/storage/chunk" 25 "github.com/grafana/loki/pkg/storage/config" 26 "github.com/grafana/loki/pkg/util/cfg" 27 util_log "github.com/grafana/loki/pkg/util/log" 28 "github.com/grafana/loki/pkg/validation" 29 ) 30 31 type syncRange struct { 32 number int 33 from int64 34 to int64 35 } 36 37 func main() { 38 var defaultsConfig loki.Config 39 40 from := flag.String("from", "", "Start Time RFC339Nano 2006-01-02T15:04:05.999999999Z07:00") 41 to := flag.String("to", "", "End Time RFC339Nano 2006-01-02T15:04:05.999999999Z07:00") 42 sf := flag.String("source.config.file", "", "source datasource config") 43 df := flag.String("dest.config.file", "", "dest datasource config") 44 source := flag.String("source.tenant", "fake", "Source tenant identifier, default is `fake` for single tenant Loki") 45 dest := flag.String("dest.tenant", "fake", "Destination tenant identifier, default is `fake` for single tenant Loki") 46 match := flag.String("match", "", "Optional label match") 47 48 batch := flag.Int("batchLen", 500, "Specify how many chunks to read/write in one batch") 49 shardBy := flag.Duration("shardBy", 6*time.Hour, "Break down the total interval into shards of this size, making this too small can lead to syncing a lot of duplicate chunks") 50 parallel := flag.Int("parallel", 8, "How many parallel threads to process each shard") 51 flag.Parse() 52 53 go func() { 54 log.Println(http.ListenAndServe("localhost:8080", nil)) 55 }() 56 57 // Create a set of defaults 58 if err := cfg.Unmarshal(&defaultsConfig, cfg.Defaults(flag.CommandLine)); err != nil { 59 log.Println("Failed parsing defaults config:", err) 60 os.Exit(1) 61 } 62 63 var sourceConfig loki.ConfigWrapper 64 srcArgs := []string{"-config.file=" + *sf} 65 if err := cfg.DynamicUnmarshal(&sourceConfig, srcArgs, flag.NewFlagSet("config-file-loader", flag.ContinueOnError)); err != nil { 66 fmt.Fprintf(os.Stderr, "failed parsing config: %v\n", err) 67 os.Exit(1) 68 } 69 70 var destConfig loki.ConfigWrapper 71 destArgs := []string{"-config.file=" + *df} 72 if err := cfg.DynamicUnmarshal(&destConfig, destArgs, flag.NewFlagSet("config-file-loader", flag.ContinueOnError)); err != nil { 73 fmt.Fprintf(os.Stderr, "failed parsing config: %v\n", err) 74 os.Exit(1) 75 } 76 77 // This is a little brittle, if we add a new cache it may easily get missed here but it's important to disable 78 // any of the chunk caches to save on memory because we write chunks to the cache when we call Put operations on the store. 79 sourceConfig.ChunkStoreConfig.ChunkCacheConfig.EnableFifoCache = false 80 sourceConfig.ChunkStoreConfig.ChunkCacheConfig.MemcacheClient = defaultsConfig.ChunkStoreConfig.ChunkCacheConfig.MemcacheClient 81 sourceConfig.ChunkStoreConfig.ChunkCacheConfig.Redis = defaultsConfig.ChunkStoreConfig.ChunkCacheConfig.Redis 82 sourceConfig.ChunkStoreConfig.WriteDedupeCacheConfig.EnableFifoCache = false 83 sourceConfig.ChunkStoreConfig.WriteDedupeCacheConfig.MemcacheClient = defaultsConfig.ChunkStoreConfig.WriteDedupeCacheConfig.MemcacheClient 84 sourceConfig.ChunkStoreConfig.WriteDedupeCacheConfig.Redis = defaultsConfig.ChunkStoreConfig.WriteDedupeCacheConfig.Redis 85 86 destConfig.ChunkStoreConfig.ChunkCacheConfig.EnableFifoCache = false 87 destConfig.ChunkStoreConfig.ChunkCacheConfig.MemcacheClient = defaultsConfig.ChunkStoreConfig.ChunkCacheConfig.MemcacheClient 88 destConfig.ChunkStoreConfig.ChunkCacheConfig.Redis = defaultsConfig.ChunkStoreConfig.ChunkCacheConfig.Redis 89 destConfig.ChunkStoreConfig.WriteDedupeCacheConfig.EnableFifoCache = false 90 destConfig.ChunkStoreConfig.WriteDedupeCacheConfig.MemcacheClient = defaultsConfig.ChunkStoreConfig.WriteDedupeCacheConfig.MemcacheClient 91 destConfig.ChunkStoreConfig.WriteDedupeCacheConfig.Redis = defaultsConfig.ChunkStoreConfig.WriteDedupeCacheConfig.Redis 92 93 // Don't keep fetched index files for very long 94 sourceConfig.StorageConfig.BoltDBShipperConfig.CacheTTL = 30 * time.Minute 95 96 // Shorten these timers up so we resync a little faster and clear index files a little quicker 97 destConfig.StorageConfig.IndexCacheValidity = 1 * time.Minute 98 destConfig.StorageConfig.BoltDBShipperConfig.ResyncInterval = 1 * time.Minute 99 100 // The long nature of queries requires stretching out the cardinality limit some and removing the query length limit 101 sourceConfig.LimitsConfig.CardinalityLimit = 1e9 102 sourceConfig.LimitsConfig.MaxQueryLength = 0 103 limits, err := validation.NewOverrides(sourceConfig.LimitsConfig, nil) 104 if err != nil { 105 log.Println("Failed to create limit overrides:", err) 106 os.Exit(1) 107 } 108 err = sourceConfig.Validate() 109 if err != nil { 110 log.Println("Failed to validate source store config:", err) 111 os.Exit(1) 112 } 113 err = destConfig.Validate() 114 if err != nil { 115 log.Println("Failed to validate dest store config:", err) 116 os.Exit(1) 117 } 118 // Create a new registerer to avoid registering duplicate metrics 119 prometheus.DefaultRegisterer = prometheus.NewRegistry() 120 clientMetrics := storage.NewClientMetrics() 121 s, err := storage.NewStore(sourceConfig.StorageConfig, sourceConfig.ChunkStoreConfig, sourceConfig.SchemaConfig, limits, clientMetrics, prometheus.DefaultRegisterer, util_log.Logger) 122 if err != nil { 123 log.Println("Failed to create source store:", err) 124 os.Exit(1) 125 } 126 127 // Create a new registerer to avoid registering duplicate metrics 128 prometheus.DefaultRegisterer = prometheus.NewRegistry() 129 130 d, err := storage.NewStore(destConfig.StorageConfig, destConfig.ChunkStoreConfig, destConfig.SchemaConfig, limits, clientMetrics, prometheus.DefaultRegisterer, util_log.Logger) 131 if err != nil { 132 log.Println("Failed to create destination store:", err) 133 os.Exit(1) 134 } 135 136 nameLabelMatcher, err := labels.NewMatcher(labels.MatchEqual, labels.MetricName, "logs") 137 if err != nil { 138 log.Println("Failed to create label matcher:", err) 139 os.Exit(1) 140 } 141 142 matchers := []*labels.Matcher{nameLabelMatcher} 143 144 if *match != "" { 145 m, err := syntax.ParseMatchers(*match) 146 if err != nil { 147 log.Println("Failed to parse log matcher:", err) 148 os.Exit(1) 149 } 150 matchers = append(matchers, m...) 151 } 152 153 ctx := context.Background() 154 // This is a little weird but it was the easiest way to guarantee the userID is in the right format 155 ctx = user.InjectOrgID(ctx, *source) 156 157 parsedFrom := mustParse(*from) 158 parsedTo := mustParse(*to) 159 160 start := time.Now() 161 162 shardByNs := *shardBy 163 syncRanges := calcSyncRanges(parsedFrom.UnixNano(), parsedTo.UnixNano(), shardByNs.Nanoseconds()) 164 log.Printf("With a shard duration of %v, %v ranges have been calculated.\n", shardByNs, len(syncRanges)-1) 165 166 // Pass dest schema config, the destination determines the new chunk external keys using potentially a different schema config. 167 cm := newChunkMover(ctx, destConfig.SchemaConfig, s, d, *source, *dest, matchers, *batch, len(syncRanges)-1) 168 syncChan := make(chan *syncRange) 169 errorChan := make(chan error) 170 statsChan := make(chan stats) 171 172 // Start the parallel processors 173 var wg sync.WaitGroup 174 cancelContext, cancelFunc := context.WithCancel(ctx) 175 for i := 0; i < *parallel; i++ { 176 wg.Add(1) 177 go func(threadId int) { 178 defer wg.Done() 179 cm.moveChunks(cancelContext, threadId, syncChan, errorChan, statsChan) 180 }(i) 181 } 182 183 // Launch a thread to dispatch requests: 184 go func() { 185 i := 0 186 length := len(syncRanges) 187 for i < length { 188 //log.Printf("Dispatching sync range %v of %v\n", i+1, length) 189 syncChan <- syncRanges[i] 190 i++ 191 } 192 // Everything processed, exit 193 cancelFunc() 194 }() 195 196 var processedChunks uint64 197 var processedBytes uint64 198 199 // Launch a thread to track stats 200 go func() { 201 for stat := range statsChan { 202 processedChunks += stat.totalChunks 203 processedBytes += stat.totalBytes 204 } 205 log.Printf("Transferring %v chunks totalling %s in %v for an average throughput of %s/second\n", processedChunks, ByteCountDecimal(processedBytes), time.Since(start), ByteCountDecimal(uint64(float64(processedBytes)/time.Since(start).Seconds()))) 206 log.Println("Exiting stats thread") 207 }() 208 209 // Wait for an error or the context to be canceled 210 select { 211 case <-cancelContext.Done(): 212 log.Println("Received done call") 213 case err := <-errorChan: 214 log.Println("Received an error from processing thread, shutting down: ", err) 215 cancelFunc() 216 } 217 log.Println("Waiting for threads to exit") 218 wg.Wait() 219 close(statsChan) 220 log.Println("All threads finished, stopping destination store (uploading index files for boltdb-shipper)") 221 222 // For boltdb shipper this is important as it will upload all the index files. 223 d.Stop() 224 225 log.Println("Going to sleep....") 226 for { 227 time.Sleep(100 * time.Second) 228 } 229 } 230 231 func calcSyncRanges(from, to int64, shardBy int64) []*syncRange { 232 // Calculate the sync ranges 233 syncRanges := []*syncRange{} 234 // diff := to - from 235 // shards := diff / shardBy 236 currentFrom := from 237 // currentTo := from 238 currentTo := from + shardBy 239 number := 0 240 for currentFrom < to && currentTo <= to { 241 s := &syncRange{ 242 number: number, 243 from: currentFrom, 244 to: currentTo, 245 } 246 syncRanges = append(syncRanges, s) 247 number++ 248 249 currentFrom = currentTo + 1 250 currentTo = currentTo + shardBy 251 252 if currentTo > to { 253 currentTo = to 254 } 255 } 256 return syncRanges 257 } 258 259 type stats struct { 260 totalChunks uint64 261 totalBytes uint64 262 } 263 264 type chunkMover struct { 265 ctx context.Context 266 schema config.SchemaConfig 267 source storage.Store 268 dest storage.Store 269 sourceUser string 270 destUser string 271 matchers []*labels.Matcher 272 batch int 273 syncRanges int 274 } 275 276 func newChunkMover(ctx context.Context, s config.SchemaConfig, source, dest storage.Store, sourceUser, destUser string, matchers []*labels.Matcher, batch int, syncRanges int) *chunkMover { 277 cm := &chunkMover{ 278 ctx: ctx, 279 schema: s, 280 source: source, 281 dest: dest, 282 sourceUser: sourceUser, 283 destUser: destUser, 284 matchers: matchers, 285 batch: batch, 286 syncRanges: syncRanges, 287 } 288 return cm 289 } 290 291 func (m *chunkMover) moveChunks(ctx context.Context, threadID int, syncRangeCh <-chan *syncRange, errCh chan<- error, statsCh chan<- stats) { 292 for { 293 select { 294 case <-ctx.Done(): 295 log.Println(threadID, "Requested to be done, context cancelled, quitting.") 296 return 297 case sr := <-syncRangeCh: 298 start := time.Now() 299 var totalBytes uint64 300 var totalChunks uint64 301 //log.Printf("%d processing sync range %d - Start: %v, End: %v\n", threadID, sr.number, time.Unix(0, sr.from).UTC(), time.Unix(0, sr.to).UTC()) 302 schemaGroups, fetchers, err := m.source.GetChunkRefs(m.ctx, m.sourceUser, model.TimeFromUnixNano(sr.from), model.TimeFromUnixNano(sr.to), m.matchers...) 303 if err != nil { 304 log.Println(threadID, "Error querying index for chunk refs:", err) 305 errCh <- err 306 return 307 } 308 for i, f := range fetchers { 309 //log.Printf("%v Processing Schema %v which contains %v chunks\n", threadID, i, len(schemaGroups[i])) 310 311 // Slice up into batches 312 for j := 0; j < len(schemaGroups[i]); j += m.batch { 313 k := j + m.batch 314 if k > len(schemaGroups[i]) { 315 k = len(schemaGroups[i]) 316 } 317 318 chunks := schemaGroups[i][j:k] 319 //log.Printf("%v Processing chunks %v-%v of %v\n", threadID, j, k, len(schemaGroups[i])) 320 321 keys := make([]string, 0, len(chunks)) 322 chks := make([]chunk.Chunk, 0, len(chunks)) 323 324 // FetchChunks requires chunks to be ordered by external key. 325 sort.Slice(chunks, func(x, y int) bool { 326 return m.schema.ExternalKey(chunks[x].ChunkRef) < m.schema.ExternalKey(chunks[y].ChunkRef) 327 }) 328 for _, chk := range chunks { 329 key := m.schema.ExternalKey(chk.ChunkRef) 330 keys = append(keys, key) 331 chks = append(chks, chk) 332 } 333 for retry := 10; retry >= 0; retry-- { 334 chks, err = f.FetchChunks(m.ctx, chks, keys) 335 if err != nil { 336 if retry == 0 { 337 log.Println(threadID, "Final error retrieving chunks, giving up:", err) 338 errCh <- err 339 return 340 } 341 log.Println(threadID, "Error fetching chunks, will retry:", err) 342 time.Sleep(5 * time.Second) 343 } else { 344 break 345 } 346 } 347 348 totalChunks += uint64(len(chks)) 349 350 output := make([]chunk.Chunk, 0, len(chks)) 351 352 // Calculate some size stats and change the tenant ID if necessary 353 for i, chk := range chks { 354 if enc, err := chk.Encoded(); err == nil { 355 totalBytes += uint64(len(enc)) 356 } else { 357 log.Println(threadID, "Error encoding a chunk:", err) 358 errCh <- err 359 return 360 } 361 if m.sourceUser != m.destUser { 362 // Because the incoming chunks are already encoded, to change the username we have to make a new chunk 363 nc := chunk.NewChunk(m.destUser, chk.FingerprintModel(), chk.Metric, chk.Data, chk.From, chk.Through) 364 err := nc.Encode() 365 if err != nil { 366 log.Println(threadID, "Failed to encode new chunk with new user:", err) 367 errCh <- err 368 return 369 } 370 output = append(output, nc) 371 } else { 372 output = append(output, chks[i]) 373 } 374 375 } 376 for retry := 4; retry >= 0; retry-- { 377 err = m.dest.Put(m.ctx, output) 378 if err != nil { 379 if retry == 0 { 380 log.Println(threadID, "Final error sending chunks to new store, giving up:", err) 381 errCh <- err 382 return 383 } 384 log.Println(threadID, "Error sending chunks to new store, will retry:", err) 385 } else { 386 break 387 } 388 } 389 //log.Println(threadID, "Batch sent successfully") 390 } 391 } 392 log.Printf("%d Finished processing sync range %d of %d - Start: %v, End: %v, %v chunks, %s in %.1f seconds %s/second\n", threadID, sr.number, m.syncRanges, time.Unix(0, sr.from).UTC(), time.Unix(0, sr.to).UTC(), totalChunks, ByteCountDecimal(totalBytes), time.Since(start).Seconds(), ByteCountDecimal(uint64(float64(totalBytes)/time.Since(start).Seconds()))) 393 statsCh <- stats{ 394 totalChunks: totalChunks, 395 totalBytes: totalBytes, 396 } 397 } 398 } 399 } 400 401 func mustParse(t string) time.Time { 402 ret, err := time.Parse(time.RFC3339Nano, t) 403 if err != nil { 404 log.Fatalf("Unable to parse time %v", err) 405 } 406 407 return ret 408 } 409 410 func ByteCountDecimal(b uint64) string { 411 const unit = 1000 412 if b < unit { 413 return fmt.Sprintf("%d B", b) 414 } 415 div, exp := uint64(unit), 0 416 for n := b / unit; n >= unit; n /= unit { 417 div *= unit 418 exp++ 419 } 420 return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "kMGTPE"[exp]) 421 }