github.com/matrixorigin/matrixone@v1.2.0/pkg/util/fault/fault.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // A very simple fault injection tool.
    16  package fault
    17  
    18  import (
    19  	"context"
    20  	"math"
    21  	"math/rand"
    22  	"strconv"
    23  	"strings"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    29  )
    30  
    31  const (
    32  	STOP = iota
    33  	LOOKUP
    34  	ADD
    35  	REMOVE
    36  	TRIGGER
    37  )
    38  
    39  const (
    40  	RETURN = iota
    41  	GETCOUNT
    42  	SLEEP
    43  	WAIT
    44  	GETWAITERS
    45  	NOTIFY
    46  	NOTIFYALL
    47  	PANIC
    48  	ECHO
    49  )
    50  
    51  // faultEntry describes how we shall fail
    52  type faultEntry struct {
    53  	cmd              int     // command
    54  	name             string  // name of the fault
    55  	cnt              int     // count how many times we run into this
    56  	start, end, skip int     // start, end, skip
    57  	prob             float64 // probability of failure
    58  	action           int
    59  	iarg             int64  // int arg
    60  	sarg             string // string arg
    61  
    62  	nWaiters int
    63  	mutex    sync.Mutex
    64  	cond     *sync.Cond
    65  }
    66  
    67  type faultMap struct {
    68  	faultPoints map[string]*faultEntry
    69  	chIn        chan *faultEntry
    70  	chOut       chan *faultEntry
    71  }
    72  
    73  var enabled atomic.Value
    74  var gfm *faultMap
    75  
    76  func (fm *faultMap) run() {
    77  	for {
    78  		e := <-fm.chIn
    79  		switch e.cmd {
    80  		case STOP:
    81  			return
    82  		case ADD:
    83  			if _, ok := fm.faultPoints[e.name]; ok {
    84  				fm.chOut <- nil
    85  			} else {
    86  				fm.faultPoints[e.name] = e
    87  				fm.chOut <- e
    88  			}
    89  		case REMOVE:
    90  			if v, ok := fm.faultPoints[e.name]; ok {
    91  				delete(fm.faultPoints, e.name)
    92  				fm.chOut <- v
    93  			} else {
    94  				fm.chOut <- nil
    95  			}
    96  		case TRIGGER:
    97  			var out *faultEntry
    98  			if v, ok := fm.faultPoints[e.name]; ok {
    99  				v.cnt += 1
   100  				if v.cnt >= v.start && v.cnt <= v.end && (v.cnt-v.start)%v.skip == 0 {
   101  					if v.prob == 1 || rand.Float64() < v.prob {
   102  						out = v
   103  					}
   104  				}
   105  			}
   106  			fm.chOut <- out
   107  		case LOOKUP:
   108  			fm.chOut <- fm.faultPoints[e.sarg]
   109  		default:
   110  			fm.chOut <- nil
   111  		}
   112  	}
   113  }
   114  
   115  func (e *faultEntry) do() (int64, string) {
   116  	switch e.action {
   117  	case RETURN: // no op
   118  	case SLEEP:
   119  		time.Sleep(time.Duration(e.iarg) * time.Second)
   120  	case GETCOUNT:
   121  		if ee := lookup(e.sarg); ee != nil {
   122  			return int64(ee.cnt), ""
   123  		}
   124  	case WAIT:
   125  		e.mutex.Lock()
   126  		e.nWaiters += 1
   127  		e.cond.Wait()
   128  		e.nWaiters -= 1
   129  		e.mutex.Unlock()
   130  	case GETWAITERS:
   131  		if ee := lookup(e.sarg); ee != nil {
   132  			ee.mutex.Lock()
   133  			nw := ee.nWaiters
   134  			ee.mutex.Unlock()
   135  			return int64(nw), ""
   136  		}
   137  	case NOTIFY:
   138  		if ee := lookup(e.sarg); ee != nil {
   139  			ee.cond.Signal()
   140  		}
   141  	case NOTIFYALL:
   142  		if ee := lookup(e.sarg); ee != nil {
   143  			ee.cond.Broadcast()
   144  		}
   145  	case PANIC:
   146  		panic(e.sarg)
   147  	case ECHO:
   148  		return e.iarg, e.sarg
   149  	}
   150  	return 0, ""
   151  }
   152  
   153  func startFaultMap() {
   154  	gfm = new(faultMap)
   155  	gfm.faultPoints = make(map[string]*faultEntry)
   156  	gfm.chIn = make(chan *faultEntry)
   157  	gfm.chOut = make(chan *faultEntry)
   158  	go gfm.run()
   159  }
   160  
   161  func stopFaultMap() {
   162  	var msg faultEntry
   163  	msg.cmd = STOP
   164  	gfm.chIn <- &msg
   165  	gfm = nil
   166  }
   167  
   168  // Enable fault injection
   169  func Enable() {
   170  	if !IsEnabled() {
   171  		startFaultMap()
   172  		enabled.Store(gfm)
   173  	}
   174  }
   175  
   176  // Disable fault injection
   177  func Disable() {
   178  	if IsEnabled() {
   179  		stopFaultMap()
   180  		enabled.Store(gfm)
   181  	}
   182  }
   183  
   184  func IsEnabled() bool {
   185  	ld := enabled.Load()
   186  	if ld == nil {
   187  		return false
   188  	}
   189  	return ld.(*faultMap) != nil
   190  }
   191  
   192  // Trigger a fault point.
   193  func TriggerFault(name string) (iret int64, sret string, exist bool) {
   194  	if !IsEnabled() {
   195  		return
   196  	}
   197  	var msg faultEntry
   198  	msg.cmd = TRIGGER
   199  	msg.name = name
   200  	gfm.chIn <- &msg
   201  	out := <-gfm.chOut
   202  
   203  	if out == nil {
   204  		return
   205  	}
   206  	exist = true
   207  	iret, sret = out.do()
   208  	return
   209  }
   210  
   211  func AddFaultPoint(ctx context.Context, name string, freq string, action string, iarg int64, sarg string) error {
   212  	if !IsEnabled() {
   213  		return moerr.NewInternalError(ctx, "add fault point not enabled")
   214  	}
   215  
   216  	var err error
   217  
   218  	// Build msg from input.
   219  	var msg faultEntry
   220  	msg.cmd = ADD
   221  	msg.name = name
   222  
   223  	// freq is start:end:skip:prob
   224  	sesp := strings.Split(freq, ":")
   225  	if len(sesp) != 4 {
   226  		return moerr.NewInvalidArg(ctx, "fault point freq", freq)
   227  	}
   228  
   229  	if sesp[0] == "" {
   230  		msg.start = 1
   231  	} else {
   232  		msg.start, err = strconv.Atoi(sesp[0])
   233  		if err != nil {
   234  			return moerr.NewInvalidArg(ctx, "fault point freq", freq)
   235  		}
   236  	}
   237  	if sesp[1] == "" {
   238  		msg.end = math.MaxInt
   239  	} else {
   240  		msg.end, err = strconv.Atoi(sesp[1])
   241  		if err != nil || msg.end < msg.start {
   242  			return moerr.NewInvalidArg(ctx, "fault point freq", freq)
   243  		}
   244  	}
   245  	if sesp[2] == "" {
   246  		msg.skip = 1
   247  	} else {
   248  		msg.skip, err = strconv.Atoi(sesp[2])
   249  		if err != nil || msg.skip <= 0 {
   250  			return moerr.NewInvalidArg(ctx, "fault point freq", freq)
   251  		}
   252  	}
   253  	if sesp[3] == "" {
   254  		msg.prob = 1.0
   255  	} else {
   256  		msg.prob, err = strconv.ParseFloat(sesp[3], 64)
   257  		if err != nil || msg.prob <= 0 || msg.prob >= 1 {
   258  			return moerr.NewInvalidArg(ctx, "fault point freq", freq)
   259  		}
   260  	}
   261  
   262  	// Action
   263  	switch strings.ToUpper(action) {
   264  	case "RETURN":
   265  		msg.action = RETURN
   266  	case "SLEEP":
   267  		msg.action = SLEEP
   268  	case "GETCOUNT":
   269  		msg.action = GETCOUNT
   270  	case "WAIT":
   271  		msg.action = WAIT
   272  	case "GETWAITERS":
   273  		msg.action = GETWAITERS
   274  	case "NOTIFY":
   275  		msg.action = NOTIFY
   276  	case "NOTIFYALL":
   277  		msg.action = NOTIFYALL
   278  	case "PANIC":
   279  		msg.action = PANIC
   280  	case "ECHO":
   281  		msg.action = ECHO
   282  	default:
   283  		return moerr.NewInvalidArg(ctx, "fault action", action)
   284  	}
   285  
   286  	msg.iarg = iarg
   287  	msg.sarg = sarg
   288  
   289  	if msg.action == WAIT {
   290  		msg.cond = sync.NewCond(&msg.mutex)
   291  	}
   292  
   293  	gfm.chIn <- &msg
   294  	out := <-gfm.chOut
   295  	if out == nil {
   296  		return moerr.NewInternalError(ctx, "add fault injection point failed.")
   297  	}
   298  	return nil
   299  }
   300  
   301  func RemoveFaultPoint(ctx context.Context, name string) error {
   302  	if !IsEnabled() {
   303  		return moerr.NewInternalError(ctx, "add fault injection point not enabled.")
   304  	}
   305  
   306  	var msg faultEntry
   307  	msg.cmd = REMOVE
   308  	msg.name = name
   309  	gfm.chIn <- &msg
   310  	out := <-gfm.chOut
   311  	if out == nil {
   312  		return moerr.NewInvalidInput(ctx, "invalid injection point %s", name)
   313  	}
   314  	return nil
   315  }
   316  
   317  func lookup(name string) *faultEntry {
   318  	if !IsEnabled() {
   319  		return nil
   320  	}
   321  
   322  	var msg faultEntry
   323  	msg.cmd = LOOKUP
   324  	msg.sarg = name
   325  	gfm.chIn <- &msg
   326  	out := <-gfm.chOut
   327  	return out
   328  }