github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/compaction/plan.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package compaction
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"sort"
    27  
    28  	"github.com/m3db/m3/src/dbnode/storage/index/segments"
    29  )
    30  
    31  var (
    32  	errMutableCompactionAgeNegative = errors.New("mutable compaction age must be positive")
    33  	errLevelsUndefined              = errors.New("compaction levels are undefined")
    34  )
    35  
    36  var (
    37  	// DefaultLevels are the default Level(s) used for compaction options.
    38  	DefaultLevels = []Level{ // i.e. tiers for compaction [0, 262K)
    39  		{
    40  			MinSizeInclusive: 0,
    41  			MaxSizeExclusive: 1 << 18,
    42  		},
    43  	}
    44  
    45  	// DefaultOptions are the default compaction PlannerOptions.
    46  	DefaultOptions = PlannerOptions{
    47  		MutableSegmentSizeThreshold:   0,                                  // any mutable segment is eligible for compactions
    48  		MutableCompactionAgeThreshold: 0,                                  // any mutable segment is eligible for compactions
    49  		Levels:                        DefaultLevels,                      // sizes defined above
    50  		OrderBy:                       TasksOrderedByOldestMutableAndSize, // compact mutable segments first
    51  	}
    52  )
    53  
    54  // NewPlan returns a new compaction.Plan per the rules above and the knobs provided.
    55  func NewPlan(compactableSegments []Segment, opts PlannerOptions) (*Plan, error) {
    56  	if err := opts.Validate(); err != nil {
    57  		return nil, err
    58  	}
    59  
    60  	// NB: making a copy of levels to ensure we don't modify any input vars.
    61  	levels := make([]Level, len(opts.Levels))
    62  	copy(levels, opts.Levels)
    63  	sort.Sort(ByMinSize(levels))
    64  
    65  	// if we don't have any compactable segments, we can early terminate
    66  	if len(compactableSegments) == 0 {
    67  		return &Plan{}, nil
    68  	}
    69  
    70  	// initialise to avoid allocs as much as possible
    71  	plan := &Plan{
    72  		OrderBy:        opts.OrderBy,
    73  		UnusedSegments: make([]Segment, 0, len(compactableSegments)),
    74  	}
    75  
    76  	// Come up with a logical plan for all compactable segments using the following steps:
    77  	//  (a) Group the segments into given levels (compactions can only be performed for
    78  	//      segments within the same level). In addition, any mutable segment outside known
    79  	//      levels can still be compacted.
    80  	//  (b) For each level:
    81  	//  (b1) Accumulate segments until cumulative size is over the max of the current level.
    82  	//  (b2) Add a Task which comprises segments from (b1) to the Plan.
    83  	//  (b3) Continue (b1) until the level is empty.
    84  	//  (c) Priotize Tasks w/ "compactable" Mutable Segments over all others
    85  
    86  	var (
    87  		// group segments into levels (a)
    88  		segementsByLevel = make(map[Level][]Segment, len(levels))
    89  		// mutable segment which don't fit a known level are still considered compactable
    90  		catchAllMutableSegmentTask Task
    91  	)
    92  	for _, seg := range compactableSegments {
    93  		var (
    94  			level      Level
    95  			levelFound bool
    96  		)
    97  		for _, b := range levels {
    98  			if b.MinSizeInclusive <= seg.Size && seg.Size < b.MaxSizeExclusive {
    99  				level = b
   100  				levelFound = true
   101  				break
   102  			}
   103  		}
   104  		if levelFound {
   105  			segementsByLevel[level] = append(segementsByLevel[level], seg)
   106  			continue
   107  		}
   108  		// we need to compact mutable segments regardless of whether they belong to a known level.
   109  		if seg.Type == segments.MutableType {
   110  			catchAllMutableSegmentTask.Segments = append(catchAllMutableSegmentTask.Segments, seg)
   111  			continue
   112  		}
   113  		// in all other situations, we simply mark the segment unused and move on
   114  		plan.UnusedSegments = append(plan.UnusedSegments, seg)
   115  	}
   116  
   117  	// any segments that don't fit any known buckets
   118  	if len(catchAllMutableSegmentTask.Segments) != 0 {
   119  		plan.Tasks = append(plan.Tasks, Task{
   120  			Segments: catchAllMutableSegmentTask.Segments,
   121  		})
   122  	}
   123  
   124  	// for each level, sub-group segments into tier'd sizes (b)
   125  	for level, levelSegments := range segementsByLevel {
   126  		var (
   127  			task            Task
   128  			accumulatedSize int64
   129  		)
   130  		sort.Slice(levelSegments, func(i, j int) bool {
   131  			return levelSegments[i].Size < levelSegments[j].Size
   132  		})
   133  		for _, seg := range levelSegments {
   134  			accumulatedSize += seg.Size
   135  			task.Segments = append(task.Segments, seg)
   136  			if accumulatedSize >= level.MaxSizeExclusive {
   137  				plan.Tasks = append(plan.Tasks, task)
   138  				task = Task{}
   139  				accumulatedSize = 0
   140  			}
   141  		}
   142  		// fall thru cases: no accumulation, so we're good
   143  		if len(task.Segments) == 0 || accumulatedSize == 0 {
   144  			continue
   145  		}
   146  
   147  		// in case we never went over accumulated size, but have 2 or more segments, we should still compact them
   148  		if len(task.Segments) > 1 {
   149  			plan.Tasks = append(plan.Tasks, task)
   150  			continue
   151  		}
   152  
   153  		// even if we only have a single segment, if its a mutable segment, we should compact it to convert into a FST
   154  		if task.Segments[0].Type == segments.MutableType {
   155  			plan.Tasks = append(plan.Tasks, task)
   156  			continue
   157  		}
   158  
   159  		// at this point, we have a single FST segment but don't need to compact it; so mark it as such
   160  		plan.UnusedSegments = append(plan.UnusedSegments, task.Segments[0])
   161  	}
   162  
   163  	// now that we have the plan, we priortise the tasks as requested in the opts. (c)
   164  	sort.Stable(plan)
   165  	return plan, nil
   166  }
   167  
   168  func (p *Plan) Len() int      { return len(p.Tasks) }
   169  func (p *Plan) Swap(i, j int) { p.Tasks[i], p.Tasks[j] = p.Tasks[j], p.Tasks[i] }
   170  func (p *Plan) Less(i, j int) bool {
   171  	switch p.OrderBy {
   172  	case TasksOrderedByOldestMutableAndSize:
   173  		fallthrough
   174  	default:
   175  		// NB: the intent with the conditions below is to optimise for e2e ingest latency first,
   176  		// which is why we prefer to compact older mutable segments first, then any larger ones,
   177  		// after which, we fall back to the graceful plan of compacting smaller segments over
   178  		// larger ones to reduce total compactions required.
   179  		taskSummaryi, taskSummaryj := p.Tasks[i].Summary(), p.Tasks[j].Summary()
   180  		if taskSummaryi.CumulativeMutableAge != taskSummaryj.CumulativeMutableAge {
   181  			// i.e. put those tasks which have cumulative age greater first
   182  			return taskSummaryi.CumulativeMutableAge > taskSummaryj.CumulativeMutableAge
   183  		}
   184  		if taskSummaryi.NumMutable != taskSummaryj.NumMutable {
   185  			// i.e. put those tasks with more mutable segments first
   186  			return taskSummaryi.NumMutable > taskSummaryj.NumMutable
   187  		}
   188  		// i.e. smaller tasks over bigger ones
   189  		return taskSummaryi.CumulativeSize < taskSummaryj.CumulativeSize
   190  	}
   191  }
   192  
   193  // Validate ensures the receiver PlannerOptions specify valid values
   194  // for each of the knobs.
   195  func (o PlannerOptions) Validate() error {
   196  	if o.MutableCompactionAgeThreshold < 0 {
   197  		return errMutableCompactionAgeNegative
   198  	}
   199  	if len(o.Levels) == 0 {
   200  		return errLevelsUndefined
   201  	}
   202  	sort.Sort(ByMinSize(o.Levels))
   203  	for i := 0; i < len(o.Levels); i++ {
   204  		current := o.Levels[i]
   205  		if current.MaxSizeExclusive <= current.MinSizeInclusive {
   206  			return fmt.Errorf("illegal size levels definition, MaxSize <= MinSize (%+v)", current)
   207  		}
   208  	}
   209  	return nil
   210  }
   211  
   212  // ByMinSize orders a []Level by MinSize in ascending order.
   213  type ByMinSize []Level
   214  
   215  func (a ByMinSize) Len() int           { return len(a) }
   216  func (a ByMinSize) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   217  func (a ByMinSize) Less(i, j int) bool { return a[i].MinSizeInclusive < a[j].MinSizeInclusive }
   218  
   219  // Summary returns the TaskSummary for the given task.
   220  func (t Task) Summary() TaskSummary {
   221  	ts := TaskSummary{}
   222  	for _, s := range t.Segments {
   223  		ts.CumulativeSize += s.Size
   224  		if s.Type == segments.MutableType {
   225  			ts.NumMutable++
   226  			ts.CumulativeMutableAge += s.Age
   227  		} else if s.Type == segments.FSTType {
   228  			ts.NumFST++
   229  		}
   230  	}
   231  	return ts
   232  }