github.com/flower-corp/rosedb@v1.1.2-0.20230117132829-21dc4f7b319a/discard.go (about) 1 package rosedb 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "io" 7 "path/filepath" 8 "sort" 9 "sync" 10 11 "github.com/flower-corp/rosedb/ioselector" 12 "github.com/flower-corp/rosedb/logfile" 13 "github.com/flower-corp/rosedb/logger" 14 ) 15 16 const ( 17 discardRecordSize = 12 18 // 8kb, contains mostly 682 records in file. 19 discardFileSize int64 = 2 << 12 20 discardFileName = "discard" 21 ) 22 23 // ErrDiscardNoSpace no enough space for discard file. 24 var ErrDiscardNoSpace = errors.New("not enough space can be allocated for the discard file") 25 26 // Discard is used to record total size and discarded size in a log file. 27 // Mainly for log files compaction. 28 type discard struct { 29 sync.Mutex 30 once *sync.Once 31 valChan chan *indexNode 32 file ioselector.IOSelector 33 freeList []int64 // contains file offset that can be allocated 34 location map[uint32]int64 // offset of each fid 35 } 36 37 func newDiscard(path, name string, bufferSize int) (*discard, error) { 38 fname := filepath.Join(path, name) 39 file, err := ioselector.NewMMapSelector(fname, discardFileSize) 40 if err != nil { 41 return nil, err 42 } 43 44 var freeList []int64 45 var offset int64 46 location := make(map[uint32]int64) 47 for { 48 // read fid and total is enough. 49 buf := make([]byte, 8) 50 if _, err := file.Read(buf, offset); err != nil { 51 if err == io.EOF || err == logfile.ErrEndOfEntry { 52 break 53 } 54 return nil, err 55 } 56 fid := binary.LittleEndian.Uint32(buf[:4]) 57 total := binary.LittleEndian.Uint32(buf[4:8]) 58 if fid == 0 && total == 0 { 59 freeList = append(freeList, offset) 60 } else { 61 location[fid] = offset 62 } 63 offset += discardRecordSize 64 } 65 66 d := &discard{ 67 valChan: make(chan *indexNode, bufferSize), 68 once: new(sync.Once), 69 file: file, 70 freeList: freeList, 71 location: location, 72 } 73 go d.listenUpdates() 74 return d, nil 75 } 76 77 func (d *discard) sync() error { 78 return d.file.Sync() 79 } 80 81 func (d *discard) close() error { 82 return d.file.Close() 83 } 84 85 // CCL means compaction cnadidate list. 86 // iterate and find the file with most discarded data, 87 // there are 682 records at most, no need to worry about the performance. 88 func (d *discard) getCCL(activeFid uint32, ratio float64) ([]uint32, error) { 89 var offset int64 90 var ccl []uint32 91 d.Lock() 92 defer d.Unlock() 93 for { 94 buf := make([]byte, discardRecordSize) 95 _, err := d.file.Read(buf, offset) 96 if err != nil { 97 if err == io.EOF || err == logfile.ErrEndOfEntry { 98 break 99 } 100 return nil, err 101 } 102 offset += discardRecordSize 103 104 fid := binary.LittleEndian.Uint32(buf[:4]) 105 total := binary.LittleEndian.Uint32(buf[4:8]) 106 discard := binary.LittleEndian.Uint32(buf[8:12]) 107 var curRatio float64 108 if total != 0 && discard != 0 { 109 curRatio = float64(discard) / float64(total) 110 } 111 if curRatio >= ratio && fid != activeFid { 112 ccl = append(ccl, fid) 113 } 114 } 115 116 // sort in ascending order, guarantee the older file will compact firstly. 117 sort.Slice(ccl, func(i, j int) bool { 118 return ccl[i] < ccl[j] 119 }) 120 return ccl, nil 121 } 122 123 func (d *discard) listenUpdates() { 124 for { 125 select { 126 case idxNode, ok := <-d.valChan: 127 if !ok { 128 if err := d.file.Close(); err != nil { 129 logger.Errorf("close discard file err: %v", err) 130 } 131 return 132 } 133 d.incrDiscard(idxNode.fid, idxNode.entrySize) 134 } 135 } 136 } 137 138 func (d *discard) closeChan() { 139 d.once.Do(func() { close(d.valChan) }) 140 } 141 142 func (d *discard) setTotal(fid uint32, totalSize uint32) { 143 d.Lock() 144 defer d.Unlock() 145 146 if _, ok := d.location[fid]; ok { 147 return 148 } 149 offset, err := d.alloc(fid) 150 if err != nil { 151 logger.Errorf("discard file allocate err: %+v", err) 152 return 153 } 154 155 buf := make([]byte, 8) 156 binary.LittleEndian.PutUint32(buf[:4], fid) 157 binary.LittleEndian.PutUint32(buf[4:8], totalSize) 158 if _, err = d.file.Write(buf, offset); err != nil { 159 logger.Errorf("incr value in discard err: %v", err) 160 return 161 } 162 } 163 164 func (d *discard) clear(fid uint32) { 165 d.incr(fid, -1) 166 d.Lock() 167 if offset, ok := d.location[fid]; ok { 168 d.freeList = append(d.freeList, offset) 169 delete(d.location, fid) 170 } 171 d.Unlock() 172 } 173 174 func (d *discard) incrDiscard(fid uint32, delta int) { 175 if delta > 0 { 176 d.incr(fid, delta) 177 } 178 } 179 180 // format of discard file` record: 181 // +-------+--------------+----------------+ +-------+--------------+----------------+ 182 // | fid | total size | discarded size | | fid | total size | discarded size | 183 // +-------+--------------+----------------+ +-------+--------------+----------------+ 184 // 0-------4--------------8---------------12 12------16------------20----------------24 185 func (d *discard) incr(fid uint32, delta int) { 186 d.Lock() 187 defer d.Unlock() 188 189 offset, err := d.alloc(fid) 190 if err != nil { 191 logger.Errorf("discard file allocate err: %+v", err) 192 return 193 } 194 195 var buf []byte 196 if delta > 0 { 197 buf = make([]byte, 4) 198 offset += 8 199 if _, err := d.file.Read(buf, offset); err != nil { 200 logger.Errorf("incr value in discard err:%v", err) 201 return 202 } 203 204 v := binary.LittleEndian.Uint32(buf) 205 binary.LittleEndian.PutUint32(buf, v+uint32(delta)) 206 } else { 207 buf = make([]byte, discardRecordSize) 208 } 209 210 if _, err := d.file.Write(buf, offset); err != nil { 211 logger.Errorf("incr value in discard err:%v", err) 212 return 213 } 214 } 215 216 // must hold the lock before invoking 217 func (d *discard) alloc(fid uint32) (int64, error) { 218 if offset, ok := d.location[fid]; ok { 219 return offset, nil 220 } 221 if len(d.freeList) == 0 { 222 return 0, ErrDiscardNoSpace 223 } 224 225 offset := d.freeList[len(d.freeList)-1] 226 d.freeList = d.freeList[:len(d.freeList)-1] 227 d.location[fid] = offset 228 return offset, nil 229 }