github.com/matrixorigin/matrixone@v1.2.0/pkg/util/fault/fault.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // A very simple fault injection tool. 16 package fault 17 18 import ( 19 "context" 20 "math" 21 "math/rand" 22 "strconv" 23 "strings" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/matrixorigin/matrixone/pkg/common/moerr" 29 ) 30 31 const ( 32 STOP = iota 33 LOOKUP 34 ADD 35 REMOVE 36 TRIGGER 37 ) 38 39 const ( 40 RETURN = iota 41 GETCOUNT 42 SLEEP 43 WAIT 44 GETWAITERS 45 NOTIFY 46 NOTIFYALL 47 PANIC 48 ECHO 49 ) 50 51 // faultEntry describes how we shall fail 52 type faultEntry struct { 53 cmd int // command 54 name string // name of the fault 55 cnt int // count how many times we run into this 56 start, end, skip int // start, end, skip 57 prob float64 // probability of failure 58 action int 59 iarg int64 // int arg 60 sarg string // string arg 61 62 nWaiters int 63 mutex sync.Mutex 64 cond *sync.Cond 65 } 66 67 type faultMap struct { 68 faultPoints map[string]*faultEntry 69 chIn chan *faultEntry 70 chOut chan *faultEntry 71 } 72 73 var enabled atomic.Value 74 var gfm *faultMap 75 76 func (fm *faultMap) run() { 77 for { 78 e := <-fm.chIn 79 switch e.cmd { 80 case STOP: 81 return 82 case ADD: 83 if _, ok := fm.faultPoints[e.name]; ok { 84 fm.chOut <- nil 85 } else { 86 fm.faultPoints[e.name] = e 87 fm.chOut <- e 88 } 89 case REMOVE: 90 if v, ok := fm.faultPoints[e.name]; ok { 91 delete(fm.faultPoints, e.name) 92 fm.chOut <- v 93 } else { 94 fm.chOut <- nil 95 } 96 case TRIGGER: 97 var out *faultEntry 98 if v, ok := fm.faultPoints[e.name]; ok { 99 v.cnt += 1 100 if v.cnt >= v.start && v.cnt <= v.end && (v.cnt-v.start)%v.skip == 0 { 101 if v.prob == 1 || rand.Float64() < v.prob { 102 out = v 103 } 104 } 105 } 106 fm.chOut <- out 107 case LOOKUP: 108 fm.chOut <- fm.faultPoints[e.sarg] 109 default: 110 fm.chOut <- nil 111 } 112 } 113 } 114 115 func (e *faultEntry) do() (int64, string) { 116 switch e.action { 117 case RETURN: // no op 118 case SLEEP: 119 time.Sleep(time.Duration(e.iarg) * time.Second) 120 case GETCOUNT: 121 if ee := lookup(e.sarg); ee != nil { 122 return int64(ee.cnt), "" 123 } 124 case WAIT: 125 e.mutex.Lock() 126 e.nWaiters += 1 127 e.cond.Wait() 128 e.nWaiters -= 1 129 e.mutex.Unlock() 130 case GETWAITERS: 131 if ee := lookup(e.sarg); ee != nil { 132 ee.mutex.Lock() 133 nw := ee.nWaiters 134 ee.mutex.Unlock() 135 return int64(nw), "" 136 } 137 case NOTIFY: 138 if ee := lookup(e.sarg); ee != nil { 139 ee.cond.Signal() 140 } 141 case NOTIFYALL: 142 if ee := lookup(e.sarg); ee != nil { 143 ee.cond.Broadcast() 144 } 145 case PANIC: 146 panic(e.sarg) 147 case ECHO: 148 return e.iarg, e.sarg 149 } 150 return 0, "" 151 } 152 153 func startFaultMap() { 154 gfm = new(faultMap) 155 gfm.faultPoints = make(map[string]*faultEntry) 156 gfm.chIn = make(chan *faultEntry) 157 gfm.chOut = make(chan *faultEntry) 158 go gfm.run() 159 } 160 161 func stopFaultMap() { 162 var msg faultEntry 163 msg.cmd = STOP 164 gfm.chIn <- &msg 165 gfm = nil 166 } 167 168 // Enable fault injection 169 func Enable() { 170 if !IsEnabled() { 171 startFaultMap() 172 enabled.Store(gfm) 173 } 174 } 175 176 // Disable fault injection 177 func Disable() { 178 if IsEnabled() { 179 stopFaultMap() 180 enabled.Store(gfm) 181 } 182 } 183 184 func IsEnabled() bool { 185 ld := enabled.Load() 186 if ld == nil { 187 return false 188 } 189 return ld.(*faultMap) != nil 190 } 191 192 // Trigger a fault point. 193 func TriggerFault(name string) (iret int64, sret string, exist bool) { 194 if !IsEnabled() { 195 return 196 } 197 var msg faultEntry 198 msg.cmd = TRIGGER 199 msg.name = name 200 gfm.chIn <- &msg 201 out := <-gfm.chOut 202 203 if out == nil { 204 return 205 } 206 exist = true 207 iret, sret = out.do() 208 return 209 } 210 211 func AddFaultPoint(ctx context.Context, name string, freq string, action string, iarg int64, sarg string) error { 212 if !IsEnabled() { 213 return moerr.NewInternalError(ctx, "add fault point not enabled") 214 } 215 216 var err error 217 218 // Build msg from input. 219 var msg faultEntry 220 msg.cmd = ADD 221 msg.name = name 222 223 // freq is start:end:skip:prob 224 sesp := strings.Split(freq, ":") 225 if len(sesp) != 4 { 226 return moerr.NewInvalidArg(ctx, "fault point freq", freq) 227 } 228 229 if sesp[0] == "" { 230 msg.start = 1 231 } else { 232 msg.start, err = strconv.Atoi(sesp[0]) 233 if err != nil { 234 return moerr.NewInvalidArg(ctx, "fault point freq", freq) 235 } 236 } 237 if sesp[1] == "" { 238 msg.end = math.MaxInt 239 } else { 240 msg.end, err = strconv.Atoi(sesp[1]) 241 if err != nil || msg.end < msg.start { 242 return moerr.NewInvalidArg(ctx, "fault point freq", freq) 243 } 244 } 245 if sesp[2] == "" { 246 msg.skip = 1 247 } else { 248 msg.skip, err = strconv.Atoi(sesp[2]) 249 if err != nil || msg.skip <= 0 { 250 return moerr.NewInvalidArg(ctx, "fault point freq", freq) 251 } 252 } 253 if sesp[3] == "" { 254 msg.prob = 1.0 255 } else { 256 msg.prob, err = strconv.ParseFloat(sesp[3], 64) 257 if err != nil || msg.prob <= 0 || msg.prob >= 1 { 258 return moerr.NewInvalidArg(ctx, "fault point freq", freq) 259 } 260 } 261 262 // Action 263 switch strings.ToUpper(action) { 264 case "RETURN": 265 msg.action = RETURN 266 case "SLEEP": 267 msg.action = SLEEP 268 case "GETCOUNT": 269 msg.action = GETCOUNT 270 case "WAIT": 271 msg.action = WAIT 272 case "GETWAITERS": 273 msg.action = GETWAITERS 274 case "NOTIFY": 275 msg.action = NOTIFY 276 case "NOTIFYALL": 277 msg.action = NOTIFYALL 278 case "PANIC": 279 msg.action = PANIC 280 case "ECHO": 281 msg.action = ECHO 282 default: 283 return moerr.NewInvalidArg(ctx, "fault action", action) 284 } 285 286 msg.iarg = iarg 287 msg.sarg = sarg 288 289 if msg.action == WAIT { 290 msg.cond = sync.NewCond(&msg.mutex) 291 } 292 293 gfm.chIn <- &msg 294 out := <-gfm.chOut 295 if out == nil { 296 return moerr.NewInternalError(ctx, "add fault injection point failed.") 297 } 298 return nil 299 } 300 301 func RemoveFaultPoint(ctx context.Context, name string) error { 302 if !IsEnabled() { 303 return moerr.NewInternalError(ctx, "add fault injection point not enabled.") 304 } 305 306 var msg faultEntry 307 msg.cmd = REMOVE 308 msg.name = name 309 gfm.chIn <- &msg 310 out := <-gfm.chOut 311 if out == nil { 312 return moerr.NewInvalidInput(ctx, "invalid injection point %s", name) 313 } 314 return nil 315 } 316 317 func lookup(name string) *faultEntry { 318 if !IsEnabled() { 319 return nil 320 } 321 322 var msg faultEntry 323 msg.cmd = LOOKUP 324 msg.sarg = name 325 gfm.chIn <- &msg 326 out := <-gfm.chOut 327 return out 328 }