vitess.io/vitess@v0.16.2/go/test/endtoend/cluster/topo_process.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cluster
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"net/http"
    23  	"os"
    24  	"os/exec"
    25  	"path"
    26  	"strings"
    27  	"syscall"
    28  	"time"
    29  
    30  	"vitess.io/vitess/go/vt/log"
    31  )
    32  
    33  // TopoProcess is a generic handle for a running Topo service .
    34  // It can be spawned manually
    35  type TopoProcess struct {
    36  	Name               string
    37  	Binary             string
    38  	DataDirectory      string
    39  	LogDirectory       string
    40  	ListenClientURL    string
    41  	AdvertiseClientURL string
    42  	Port               int
    43  	Host               string
    44  	VerifyURL          string
    45  	PeerURL            string
    46  	ZKPorts            string
    47  
    48  	proc *exec.Cmd
    49  	exit chan error
    50  }
    51  
    52  // Setup starts a new topo service
    53  func (topo *TopoProcess) Setup(topoFlavor string, cluster *LocalProcessCluster) (err error) {
    54  	switch topoFlavor {
    55  	case "zk2":
    56  		return topo.SetupZookeeper(cluster)
    57  	case "consul":
    58  		return topo.SetupConsul(cluster)
    59  	default:
    60  		// We still rely on the etcd v2 API for things like mkdir.
    61  		// If this ENV var is not set then some tests may fail with etcd 3.4+
    62  		// where the v2 API is disabled by default in both the client and server.
    63  		os.Setenv("ETCDCTL_API", "2")
    64  		return topo.SetupEtcd()
    65  	}
    66  }
    67  
    68  // SetupEtcd spawns a new etcd service and initializes it with the defaults.
    69  // The service is kept running in the background until TearDown() is called.
    70  func (topo *TopoProcess) SetupEtcd() (err error) {
    71  	topo.proc = exec.Command(
    72  		topo.Binary,
    73  		"--name", topo.Name,
    74  		"--data-dir", topo.DataDirectory,
    75  		"--listen-client-urls", topo.ListenClientURL,
    76  		"--advertise-client-urls", topo.AdvertiseClientURL,
    77  		"--initial-advertise-peer-urls", topo.PeerURL,
    78  		"--listen-peer-urls", topo.PeerURL,
    79  		"--initial-cluster", fmt.Sprintf("%s=%s", topo.Name, topo.PeerURL),
    80  		"--enable-v2=true",
    81  	)
    82  
    83  	err = createDirectory(topo.DataDirectory, 0700)
    84  	if err != nil && !os.IsExist(err) {
    85  		return err
    86  	}
    87  	errFile, err := os.Create(path.Join(topo.DataDirectory, "topo-stderr.txt"))
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	topo.proc.Stderr = errFile
    93  
    94  	topo.proc.Env = append(topo.proc.Env, os.Environ()...)
    95  
    96  	log.Infof("Starting etcd with command: %v", strings.Join(topo.proc.Args, " "))
    97  
    98  	err = topo.proc.Start()
    99  	if err != nil {
   100  		return
   101  	}
   102  
   103  	topo.exit = make(chan error)
   104  	go func() {
   105  		topo.exit <- topo.proc.Wait()
   106  		close(topo.exit)
   107  	}()
   108  
   109  	timeout := time.Now().Add(60 * time.Second)
   110  	for time.Now().Before(timeout) {
   111  		if topo.IsHealthy() {
   112  			return
   113  		}
   114  		select {
   115  		case err := <-topo.exit:
   116  			return fmt.Errorf("process '%s' exited prematurely (err: %s)", topo.Binary, err)
   117  		default:
   118  			time.Sleep(300 * time.Millisecond)
   119  		}
   120  	}
   121  
   122  	return fmt.Errorf("process '%s' timed out after 60s (err: %s)", topo.Binary, <-topo.exit)
   123  }
   124  
   125  // SetupZookeeper spawns a new zookeeper topo service and initializes it with the defaults.
   126  // The service is kept running in the background until TearDown() is called.
   127  func (topo *TopoProcess) SetupZookeeper(cluster *LocalProcessCluster) (err error) {
   128  
   129  	host, err := os.Hostname()
   130  	if err != nil {
   131  		return
   132  	}
   133  
   134  	topo.ZKPorts = fmt.Sprintf("%d:%d:%d", cluster.GetAndReservePort(), cluster.GetAndReservePort(), topo.Port)
   135  
   136  	topo.proc = exec.Command(
   137  		topo.Binary,
   138  		"--log_dir", topo.LogDirectory,
   139  		"--zk.cfg", fmt.Sprintf("1@%v:%s", host, topo.ZKPorts),
   140  		"init",
   141  	)
   142  
   143  	errFile, _ := os.Create(path.Join(topo.DataDirectory, "topo-stderr.txt"))
   144  	topo.proc.Stderr = errFile
   145  	topo.proc.Env = append(topo.proc.Env, os.Environ()...)
   146  
   147  	log.Infof("Starting zookeeper with args %v", strings.Join(topo.proc.Args, " "))
   148  	err = topo.proc.Run()
   149  	if err != nil {
   150  		return
   151  	}
   152  	return
   153  }
   154  
   155  // ConsulConfigs are the configurations that are added the config files which are used by consul
   156  type ConsulConfigs struct {
   157  	Ports   PortsInfo `json:"ports"`
   158  	DataDir string    `json:"data_dir"`
   159  	LogFile string    `json:"log_file"`
   160  }
   161  
   162  // PortsInfo is the different ports used by consul
   163  type PortsInfo struct {
   164  	DNS     int `json:"dns"`
   165  	HTTP    int `json:"http"`
   166  	SerfLan int `json:"serf_lan"`
   167  	SerfWan int `json:"serf_wan"`
   168  	Server  int `json:"server"`
   169  }
   170  
   171  // SetupConsul spawns a new consul service and initializes it with the defaults.
   172  // The service is kept running in the background until TearDown() is called.
   173  func (topo *TopoProcess) SetupConsul(cluster *LocalProcessCluster) (err error) {
   174  
   175  	topo.VerifyURL = fmt.Sprintf("http://%s:%d/v1/kv/?keys", topo.Host, topo.Port)
   176  
   177  	_ = os.MkdirAll(topo.LogDirectory, os.ModePerm)
   178  	_ = os.MkdirAll(topo.DataDirectory, os.ModePerm)
   179  
   180  	configFile := path.Join(os.Getenv("VTDATAROOT"), "consul.json")
   181  
   182  	logFile := path.Join(topo.LogDirectory, "/consul.log")
   183  	_, _ = os.Create(logFile)
   184  
   185  	var config []byte
   186  	configs := ConsulConfigs{
   187  		Ports: PortsInfo{
   188  			DNS:     cluster.GetAndReservePort(),
   189  			HTTP:    topo.Port,
   190  			SerfLan: cluster.GetAndReservePort(),
   191  			SerfWan: cluster.GetAndReservePort(),
   192  			Server:  cluster.GetAndReservePort(),
   193  		},
   194  		DataDir: topo.DataDirectory,
   195  		LogFile: logFile,
   196  	}
   197  	config, err = json.Marshal(configs)
   198  	if err != nil {
   199  		log.Error(err.Error())
   200  		return
   201  	}
   202  
   203  	err = os.WriteFile(configFile, config, 0666)
   204  	if err != nil {
   205  		return
   206  	}
   207  
   208  	topo.proc = exec.Command(
   209  		topo.Binary, "agent",
   210  		"-server",
   211  		"-ui",
   212  		"-bootstrap-expect", "1",
   213  		"-bind", "127.0.0.1",
   214  		"-config-file", configFile,
   215  	)
   216  
   217  	errFile, _ := os.Create(path.Join(topo.DataDirectory, "topo-stderr.txt"))
   218  	topo.proc.Stderr = errFile
   219  
   220  	topo.proc.Env = append(topo.proc.Env, os.Environ()...)
   221  
   222  	log.Errorf("Starting consul with args %v", strings.Join(topo.proc.Args, " "))
   223  	err = topo.proc.Start()
   224  	if err != nil {
   225  		return
   226  	}
   227  
   228  	topo.exit = make(chan error)
   229  	go func() {
   230  		topo.exit <- topo.proc.Wait()
   231  		close(topo.exit)
   232  	}()
   233  
   234  	timeout := time.Now().Add(60 * time.Second)
   235  	for time.Now().Before(timeout) {
   236  		if topo.IsHealthy() {
   237  			return
   238  		}
   239  		select {
   240  		case err := <-topo.exit:
   241  			return fmt.Errorf("process '%s' exited prematurely (err: %s)", topo.Binary, err)
   242  		default:
   243  			time.Sleep(300 * time.Millisecond)
   244  		}
   245  	}
   246  
   247  	return fmt.Errorf("process '%s' timed out after 60s (err: %s)", topo.Binary, <-topo.exit)
   248  }
   249  
   250  // TearDown shutdowns the running topo service
   251  func (topo *TopoProcess) TearDown(Cell string, originalVtRoot string, currentRoot string, keepdata bool, topoFlavor string) error {
   252  
   253  	if topoFlavor == "zk2" {
   254  		cmd := "shutdown"
   255  		if keepdata {
   256  			cmd = "teardown"
   257  		}
   258  		topo.proc = exec.Command(
   259  			topo.Binary,
   260  			"--log_dir", topo.LogDirectory,
   261  			"--zk.cfg", fmt.Sprintf("1@%v:%s", topo.Host, topo.ZKPorts),
   262  			cmd,
   263  		)
   264  
   265  		err := topo.proc.Run()
   266  		if err != nil {
   267  			return err
   268  		}
   269  	} else {
   270  		if topo.proc == nil || topo.exit == nil {
   271  			return nil
   272  		}
   273  
   274  		if !(*keepData || keepdata) {
   275  			topo.removeTopoDirectories(Cell)
   276  		}
   277  
   278  		// Attempt graceful shutdown with SIGTERM first
   279  		_ = topo.proc.Process.Signal(syscall.SIGTERM)
   280  
   281  		if !(*keepData || keepdata) {
   282  			_ = os.RemoveAll(topo.DataDirectory)
   283  			_ = os.RemoveAll(currentRoot)
   284  			_ = os.Setenv("VTDATAROOT", originalVtRoot)
   285  		}
   286  
   287  		select {
   288  		case <-topo.exit:
   289  			topo.proc = nil
   290  			return nil
   291  
   292  		case <-time.After(10 * time.Second):
   293  			topo.proc.Process.Kill()
   294  			err := <-topo.exit
   295  			topo.proc = nil
   296  			return err
   297  		}
   298  	}
   299  
   300  	return nil
   301  }
   302  
   303  // IsHealthy function checks if topo server is up and running
   304  func (topo *TopoProcess) IsHealthy() bool {
   305  	resp, err := http.Get(topo.VerifyURL)
   306  	if err != nil {
   307  		return false
   308  	}
   309  	defer resp.Body.Close()
   310  	return resp.StatusCode == 200
   311  }
   312  
   313  func (topo *TopoProcess) removeTopoDirectories(Cell string) {
   314  	if err := topo.ManageTopoDir("rmdir", "/vitess/global"); err != nil {
   315  		log.Errorf("Failed to remove global topo directory: %v", err)
   316  	}
   317  	if err := topo.ManageTopoDir("rmdir", "/vitess/"+Cell); err != nil {
   318  		log.Errorf("Failed to remove local topo directory: %v", err)
   319  	}
   320  }
   321  
   322  // ManageTopoDir creates global and zone in etcd2
   323  func (topo *TopoProcess) ManageTopoDir(command string, directory string) (err error) {
   324  	url := topo.VerifyURL + directory
   325  	payload := strings.NewReader(`{"dir":"true"}`)
   326  	if command == "mkdir" {
   327  		req, _ := http.NewRequest("PUT", url, payload)
   328  		req.Header.Add("content-type", "application/json")
   329  		resp, err := http.DefaultClient.Do(req)
   330  		if err == nil {
   331  			defer resp.Body.Close()
   332  		}
   333  		return err
   334  	} else if command == "rmdir" {
   335  		req, _ := http.NewRequest("DELETE", url+"?dir=true", payload)
   336  		resp, err := http.DefaultClient.Do(req)
   337  		if err == nil {
   338  			defer resp.Body.Close()
   339  		}
   340  		return err
   341  	} else {
   342  		return nil
   343  	}
   344  }
   345  
   346  // TopoProcessInstance returns a TopoProcess handle for a etcd sevice,
   347  // configured with the given Config.
   348  // The process must be manually started by calling setup()
   349  func TopoProcessInstance(port int, peerPort int, hostname string, flavor string, name string) *TopoProcess {
   350  	binary := "etcd"
   351  	if flavor == "zk2" {
   352  		binary = "zkctl"
   353  	}
   354  	if flavor == "consul" {
   355  		binary = "consul"
   356  	}
   357  
   358  	topo := &TopoProcess{
   359  		Name:   name,
   360  		Binary: binary,
   361  		Port:   port,
   362  		Host:   hostname,
   363  	}
   364  
   365  	topo.AdvertiseClientURL = fmt.Sprintf("http://%s:%d", topo.Host, topo.Port)
   366  	topo.ListenClientURL = fmt.Sprintf("http://%s:%d", topo.Host, topo.Port)
   367  	topo.DataDirectory = path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("%s_%d", "topo", port))
   368  	topo.LogDirectory = path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("%s_%d", "topo", port), "logs")
   369  	topo.VerifyURL = fmt.Sprintf("http://%s:%d/v2/keys", topo.Host, topo.Port)
   370  	topo.PeerURL = fmt.Sprintf("http://%s:%d", hostname, peerPort)
   371  	return topo
   372  }