go.etcd.io/etcd@v3.3.27+incompatible/functional/tester/case_failpoints.go (about)

     1  // Copyright 2018 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tester
    16  
    17  import (
    18  	"fmt"
    19  	"io/ioutil"
    20  	"net/http"
    21  	"strings"
    22  	"sync"
    23  
    24  	"github.com/coreos/etcd/functional/rpcpb"
    25  )
    26  
    27  type failpointStats struct {
    28  	mu sync.Mutex
    29  	// crashes counts the number of crashes for a failpoint
    30  	crashes map[string]int
    31  }
    32  
    33  var fpStats failpointStats
    34  
    35  func failpointFailures(clus *Cluster) (ret []Case, err error) {
    36  	var fps []string
    37  	fps, err = failpointPaths(clus.Members[0].FailpointHTTPAddr)
    38  	if err != nil {
    39  		return nil, err
    40  	}
    41  	// create failure objects for all failpoints
    42  	for _, fp := range fps {
    43  		if len(fp) == 0 {
    44  			continue
    45  		}
    46  
    47  		fpFails := casesFromFailpoint(fp, clus.Tester.FailpointCommands)
    48  
    49  		// wrap in delays so failpoint has time to trigger
    50  		for i, fpf := range fpFails {
    51  			if strings.Contains(fp, "Snap") {
    52  				// hack to trigger snapshot failpoints
    53  				fpFails[i] = &caseUntilSnapshot{
    54  					desc:      fpf.Desc(),
    55  					rpcpbCase: rpcpb.Case_FAILPOINTS,
    56  					Case:      fpf,
    57  				}
    58  			} else {
    59  				fpFails[i] = &caseDelay{
    60  					Case:          fpf,
    61  					delayDuration: clus.GetCaseDelayDuration(),
    62  				}
    63  			}
    64  		}
    65  		ret = append(ret, fpFails...)
    66  	}
    67  	fpStats.crashes = make(map[string]int)
    68  	return ret, err
    69  }
    70  
    71  func failpointPaths(endpoint string) ([]string, error) {
    72  	resp, err := http.Get(endpoint)
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	defer resp.Body.Close()
    77  	body, rerr := ioutil.ReadAll(resp.Body)
    78  	if rerr != nil {
    79  		return nil, rerr
    80  	}
    81  	var fps []string
    82  	for _, l := range strings.Split(string(body), "\n") {
    83  		fp := strings.Split(l, "=")[0]
    84  		fps = append(fps, fp)
    85  	}
    86  	return fps, nil
    87  }
    88  
    89  // failpoints follows FreeBSD FAIL_POINT syntax.
    90  // e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester")
    91  func casesFromFailpoint(fp string, failpointCommands []string) (fs []Case) {
    92  	recov := makeRecoverFailpoint(fp)
    93  	for _, fcmd := range failpointCommands {
    94  		inject := makeInjectFailpoint(fp, fcmd)
    95  		fs = append(fs, []Case{
    96  			&caseFollower{
    97  				caseByFunc: caseByFunc{
    98  					desc:          fmt.Sprintf("failpoint %q (one: %q)", fp, fcmd),
    99  					rpcpbCase:     rpcpb.Case_FAILPOINTS,
   100  					injectMember:  inject,
   101  					recoverMember: recov,
   102  				},
   103  				last: -1,
   104  				lead: -1,
   105  			},
   106  			&caseLeader{
   107  				caseByFunc: caseByFunc{
   108  					desc:          fmt.Sprintf("failpoint %q (leader: %q)", fp, fcmd),
   109  					rpcpbCase:     rpcpb.Case_FAILPOINTS,
   110  					injectMember:  inject,
   111  					recoverMember: recov,
   112  				},
   113  				last: -1,
   114  				lead: -1,
   115  			},
   116  			&caseQuorum{
   117  				caseByFunc: caseByFunc{
   118  					desc:          fmt.Sprintf("failpoint %q (quorum: %q)", fp, fcmd),
   119  					rpcpbCase:     rpcpb.Case_FAILPOINTS,
   120  					injectMember:  inject,
   121  					recoverMember: recov,
   122  				},
   123  				injected: make(map[int]struct{}),
   124  			},
   125  			&caseAll{
   126  				desc:          fmt.Sprintf("failpoint %q (all: %q)", fp, fcmd),
   127  				rpcpbCase:     rpcpb.Case_FAILPOINTS,
   128  				injectMember:  inject,
   129  				recoverMember: recov,
   130  			},
   131  		}...)
   132  	}
   133  	return fs
   134  }
   135  
   136  func makeInjectFailpoint(fp, val string) injectMemberFunc {
   137  	return func(clus *Cluster, idx int) (err error) {
   138  		return putFailpoint(clus.Members[idx].FailpointHTTPAddr, fp, val)
   139  	}
   140  }
   141  
   142  func makeRecoverFailpoint(fp string) recoverMemberFunc {
   143  	return func(clus *Cluster, idx int) error {
   144  		if err := delFailpoint(clus.Members[idx].FailpointHTTPAddr, fp); err == nil {
   145  			return nil
   146  		}
   147  		// node not responding, likely dead from fp panic; restart
   148  		fpStats.mu.Lock()
   149  		fpStats.crashes[fp]++
   150  		fpStats.mu.Unlock()
   151  		return recover_SIGTERM_ETCD(clus, idx)
   152  	}
   153  }
   154  
   155  func putFailpoint(ep, fp, val string) error {
   156  	req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val))
   157  	c := http.Client{}
   158  	resp, err := c.Do(req)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	resp.Body.Close()
   163  	if resp.StatusCode/100 != 2 {
   164  		return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status)
   165  	}
   166  	return nil
   167  }
   168  
   169  func delFailpoint(ep, fp string) error {
   170  	req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader(""))
   171  	c := http.Client{}
   172  	resp, err := c.Do(req)
   173  	if err != nil {
   174  		return err
   175  	}
   176  	resp.Body.Close()
   177  	if resp.StatusCode/100 != 2 {
   178  		return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status)
   179  	}
   180  	return nil
   181  }