github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/compaction/plan.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package compaction 22 23 import ( 24 "errors" 25 "fmt" 26 "sort" 27 28 "github.com/m3db/m3/src/dbnode/storage/index/segments" 29 ) 30 31 var ( 32 errMutableCompactionAgeNegative = errors.New("mutable compaction age must be positive") 33 errLevelsUndefined = errors.New("compaction levels are undefined") 34 ) 35 36 var ( 37 // DefaultLevels are the default Level(s) used for compaction options. 38 DefaultLevels = []Level{ // i.e. tiers for compaction [0, 262K) 39 { 40 MinSizeInclusive: 0, 41 MaxSizeExclusive: 1 << 18, 42 }, 43 } 44 45 // DefaultOptions are the default compaction PlannerOptions. 46 DefaultOptions = PlannerOptions{ 47 MutableSegmentSizeThreshold: 0, // any mutable segment is eligible for compactions 48 MutableCompactionAgeThreshold: 0, // any mutable segment is eligible for compactions 49 Levels: DefaultLevels, // sizes defined above 50 OrderBy: TasksOrderedByOldestMutableAndSize, // compact mutable segments first 51 } 52 ) 53 54 // NewPlan returns a new compaction.Plan per the rules above and the knobs provided. 55 func NewPlan(compactableSegments []Segment, opts PlannerOptions) (*Plan, error) { 56 if err := opts.Validate(); err != nil { 57 return nil, err 58 } 59 60 // NB: making a copy of levels to ensure we don't modify any input vars. 61 levels := make([]Level, len(opts.Levels)) 62 copy(levels, opts.Levels) 63 sort.Sort(ByMinSize(levels)) 64 65 // if we don't have any compactable segments, we can early terminate 66 if len(compactableSegments) == 0 { 67 return &Plan{}, nil 68 } 69 70 // initialise to avoid allocs as much as possible 71 plan := &Plan{ 72 OrderBy: opts.OrderBy, 73 UnusedSegments: make([]Segment, 0, len(compactableSegments)), 74 } 75 76 // Come up with a logical plan for all compactable segments using the following steps: 77 // (a) Group the segments into given levels (compactions can only be performed for 78 // segments within the same level). In addition, any mutable segment outside known 79 // levels can still be compacted. 80 // (b) For each level: 81 // (b1) Accumulate segments until cumulative size is over the max of the current level. 82 // (b2) Add a Task which comprises segments from (b1) to the Plan. 83 // (b3) Continue (b1) until the level is empty. 84 // (c) Priotize Tasks w/ "compactable" Mutable Segments over all others 85 86 var ( 87 // group segments into levels (a) 88 segementsByLevel = make(map[Level][]Segment, len(levels)) 89 // mutable segment which don't fit a known level are still considered compactable 90 catchAllMutableSegmentTask Task 91 ) 92 for _, seg := range compactableSegments { 93 var ( 94 level Level 95 levelFound bool 96 ) 97 for _, b := range levels { 98 if b.MinSizeInclusive <= seg.Size && seg.Size < b.MaxSizeExclusive { 99 level = b 100 levelFound = true 101 break 102 } 103 } 104 if levelFound { 105 segementsByLevel[level] = append(segementsByLevel[level], seg) 106 continue 107 } 108 // we need to compact mutable segments regardless of whether they belong to a known level. 109 if seg.Type == segments.MutableType { 110 catchAllMutableSegmentTask.Segments = append(catchAllMutableSegmentTask.Segments, seg) 111 continue 112 } 113 // in all other situations, we simply mark the segment unused and move on 114 plan.UnusedSegments = append(plan.UnusedSegments, seg) 115 } 116 117 // any segments that don't fit any known buckets 118 if len(catchAllMutableSegmentTask.Segments) != 0 { 119 plan.Tasks = append(plan.Tasks, Task{ 120 Segments: catchAllMutableSegmentTask.Segments, 121 }) 122 } 123 124 // for each level, sub-group segments into tier'd sizes (b) 125 for level, levelSegments := range segementsByLevel { 126 var ( 127 task Task 128 accumulatedSize int64 129 ) 130 sort.Slice(levelSegments, func(i, j int) bool { 131 return levelSegments[i].Size < levelSegments[j].Size 132 }) 133 for _, seg := range levelSegments { 134 accumulatedSize += seg.Size 135 task.Segments = append(task.Segments, seg) 136 if accumulatedSize >= level.MaxSizeExclusive { 137 plan.Tasks = append(plan.Tasks, task) 138 task = Task{} 139 accumulatedSize = 0 140 } 141 } 142 // fall thru cases: no accumulation, so we're good 143 if len(task.Segments) == 0 || accumulatedSize == 0 { 144 continue 145 } 146 147 // in case we never went over accumulated size, but have 2 or more segments, we should still compact them 148 if len(task.Segments) > 1 { 149 plan.Tasks = append(plan.Tasks, task) 150 continue 151 } 152 153 // even if we only have a single segment, if its a mutable segment, we should compact it to convert into a FST 154 if task.Segments[0].Type == segments.MutableType { 155 plan.Tasks = append(plan.Tasks, task) 156 continue 157 } 158 159 // at this point, we have a single FST segment but don't need to compact it; so mark it as such 160 plan.UnusedSegments = append(plan.UnusedSegments, task.Segments[0]) 161 } 162 163 // now that we have the plan, we priortise the tasks as requested in the opts. (c) 164 sort.Stable(plan) 165 return plan, nil 166 } 167 168 func (p *Plan) Len() int { return len(p.Tasks) } 169 func (p *Plan) Swap(i, j int) { p.Tasks[i], p.Tasks[j] = p.Tasks[j], p.Tasks[i] } 170 func (p *Plan) Less(i, j int) bool { 171 switch p.OrderBy { 172 case TasksOrderedByOldestMutableAndSize: 173 fallthrough 174 default: 175 // NB: the intent with the conditions below is to optimise for e2e ingest latency first, 176 // which is why we prefer to compact older mutable segments first, then any larger ones, 177 // after which, we fall back to the graceful plan of compacting smaller segments over 178 // larger ones to reduce total compactions required. 179 taskSummaryi, taskSummaryj := p.Tasks[i].Summary(), p.Tasks[j].Summary() 180 if taskSummaryi.CumulativeMutableAge != taskSummaryj.CumulativeMutableAge { 181 // i.e. put those tasks which have cumulative age greater first 182 return taskSummaryi.CumulativeMutableAge > taskSummaryj.CumulativeMutableAge 183 } 184 if taskSummaryi.NumMutable != taskSummaryj.NumMutable { 185 // i.e. put those tasks with more mutable segments first 186 return taskSummaryi.NumMutable > taskSummaryj.NumMutable 187 } 188 // i.e. smaller tasks over bigger ones 189 return taskSummaryi.CumulativeSize < taskSummaryj.CumulativeSize 190 } 191 } 192 193 // Validate ensures the receiver PlannerOptions specify valid values 194 // for each of the knobs. 195 func (o PlannerOptions) Validate() error { 196 if o.MutableCompactionAgeThreshold < 0 { 197 return errMutableCompactionAgeNegative 198 } 199 if len(o.Levels) == 0 { 200 return errLevelsUndefined 201 } 202 sort.Sort(ByMinSize(o.Levels)) 203 for i := 0; i < len(o.Levels); i++ { 204 current := o.Levels[i] 205 if current.MaxSizeExclusive <= current.MinSizeInclusive { 206 return fmt.Errorf("illegal size levels definition, MaxSize <= MinSize (%+v)", current) 207 } 208 } 209 return nil 210 } 211 212 // ByMinSize orders a []Level by MinSize in ascending order. 213 type ByMinSize []Level 214 215 func (a ByMinSize) Len() int { return len(a) } 216 func (a ByMinSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 217 func (a ByMinSize) Less(i, j int) bool { return a[i].MinSizeInclusive < a[j].MinSizeInclusive } 218 219 // Summary returns the TaskSummary for the given task. 220 func (t Task) Summary() TaskSummary { 221 ts := TaskSummary{} 222 for _, s := range t.Segments { 223 ts.CumulativeSize += s.Size 224 if s.Type == segments.MutableType { 225 ts.NumMutable++ 226 ts.CumulativeMutableAge += s.Age 227 } else if s.Type == segments.FSTType { 228 ts.NumFST++ 229 } 230 } 231 return ts 232 }