vitess.io/vitess@v0.16.2/go/test/endtoend/cluster/topo_process.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cluster 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "net/http" 23 "os" 24 "os/exec" 25 "path" 26 "strings" 27 "syscall" 28 "time" 29 30 "vitess.io/vitess/go/vt/log" 31 ) 32 33 // TopoProcess is a generic handle for a running Topo service . 34 // It can be spawned manually 35 type TopoProcess struct { 36 Name string 37 Binary string 38 DataDirectory string 39 LogDirectory string 40 ListenClientURL string 41 AdvertiseClientURL string 42 Port int 43 Host string 44 VerifyURL string 45 PeerURL string 46 ZKPorts string 47 48 proc *exec.Cmd 49 exit chan error 50 } 51 52 // Setup starts a new topo service 53 func (topo *TopoProcess) Setup(topoFlavor string, cluster *LocalProcessCluster) (err error) { 54 switch topoFlavor { 55 case "zk2": 56 return topo.SetupZookeeper(cluster) 57 case "consul": 58 return topo.SetupConsul(cluster) 59 default: 60 // We still rely on the etcd v2 API for things like mkdir. 61 // If this ENV var is not set then some tests may fail with etcd 3.4+ 62 // where the v2 API is disabled by default in both the client and server. 63 os.Setenv("ETCDCTL_API", "2") 64 return topo.SetupEtcd() 65 } 66 } 67 68 // SetupEtcd spawns a new etcd service and initializes it with the defaults. 69 // The service is kept running in the background until TearDown() is called. 70 func (topo *TopoProcess) SetupEtcd() (err error) { 71 topo.proc = exec.Command( 72 topo.Binary, 73 "--name", topo.Name, 74 "--data-dir", topo.DataDirectory, 75 "--listen-client-urls", topo.ListenClientURL, 76 "--advertise-client-urls", topo.AdvertiseClientURL, 77 "--initial-advertise-peer-urls", topo.PeerURL, 78 "--listen-peer-urls", topo.PeerURL, 79 "--initial-cluster", fmt.Sprintf("%s=%s", topo.Name, topo.PeerURL), 80 "--enable-v2=true", 81 ) 82 83 err = createDirectory(topo.DataDirectory, 0700) 84 if err != nil && !os.IsExist(err) { 85 return err 86 } 87 errFile, err := os.Create(path.Join(topo.DataDirectory, "topo-stderr.txt")) 88 if err != nil { 89 return err 90 } 91 92 topo.proc.Stderr = errFile 93 94 topo.proc.Env = append(topo.proc.Env, os.Environ()...) 95 96 log.Infof("Starting etcd with command: %v", strings.Join(topo.proc.Args, " ")) 97 98 err = topo.proc.Start() 99 if err != nil { 100 return 101 } 102 103 topo.exit = make(chan error) 104 go func() { 105 topo.exit <- topo.proc.Wait() 106 close(topo.exit) 107 }() 108 109 timeout := time.Now().Add(60 * time.Second) 110 for time.Now().Before(timeout) { 111 if topo.IsHealthy() { 112 return 113 } 114 select { 115 case err := <-topo.exit: 116 return fmt.Errorf("process '%s' exited prematurely (err: %s)", topo.Binary, err) 117 default: 118 time.Sleep(300 * time.Millisecond) 119 } 120 } 121 122 return fmt.Errorf("process '%s' timed out after 60s (err: %s)", topo.Binary, <-topo.exit) 123 } 124 125 // SetupZookeeper spawns a new zookeeper topo service and initializes it with the defaults. 126 // The service is kept running in the background until TearDown() is called. 127 func (topo *TopoProcess) SetupZookeeper(cluster *LocalProcessCluster) (err error) { 128 129 host, err := os.Hostname() 130 if err != nil { 131 return 132 } 133 134 topo.ZKPorts = fmt.Sprintf("%d:%d:%d", cluster.GetAndReservePort(), cluster.GetAndReservePort(), topo.Port) 135 136 topo.proc = exec.Command( 137 topo.Binary, 138 "--log_dir", topo.LogDirectory, 139 "--zk.cfg", fmt.Sprintf("1@%v:%s", host, topo.ZKPorts), 140 "init", 141 ) 142 143 errFile, _ := os.Create(path.Join(topo.DataDirectory, "topo-stderr.txt")) 144 topo.proc.Stderr = errFile 145 topo.proc.Env = append(topo.proc.Env, os.Environ()...) 146 147 log.Infof("Starting zookeeper with args %v", strings.Join(topo.proc.Args, " ")) 148 err = topo.proc.Run() 149 if err != nil { 150 return 151 } 152 return 153 } 154 155 // ConsulConfigs are the configurations that are added the config files which are used by consul 156 type ConsulConfigs struct { 157 Ports PortsInfo `json:"ports"` 158 DataDir string `json:"data_dir"` 159 LogFile string `json:"log_file"` 160 } 161 162 // PortsInfo is the different ports used by consul 163 type PortsInfo struct { 164 DNS int `json:"dns"` 165 HTTP int `json:"http"` 166 SerfLan int `json:"serf_lan"` 167 SerfWan int `json:"serf_wan"` 168 Server int `json:"server"` 169 } 170 171 // SetupConsul spawns a new consul service and initializes it with the defaults. 172 // The service is kept running in the background until TearDown() is called. 173 func (topo *TopoProcess) SetupConsul(cluster *LocalProcessCluster) (err error) { 174 175 topo.VerifyURL = fmt.Sprintf("http://%s:%d/v1/kv/?keys", topo.Host, topo.Port) 176 177 _ = os.MkdirAll(topo.LogDirectory, os.ModePerm) 178 _ = os.MkdirAll(topo.DataDirectory, os.ModePerm) 179 180 configFile := path.Join(os.Getenv("VTDATAROOT"), "consul.json") 181 182 logFile := path.Join(topo.LogDirectory, "/consul.log") 183 _, _ = os.Create(logFile) 184 185 var config []byte 186 configs := ConsulConfigs{ 187 Ports: PortsInfo{ 188 DNS: cluster.GetAndReservePort(), 189 HTTP: topo.Port, 190 SerfLan: cluster.GetAndReservePort(), 191 SerfWan: cluster.GetAndReservePort(), 192 Server: cluster.GetAndReservePort(), 193 }, 194 DataDir: topo.DataDirectory, 195 LogFile: logFile, 196 } 197 config, err = json.Marshal(configs) 198 if err != nil { 199 log.Error(err.Error()) 200 return 201 } 202 203 err = os.WriteFile(configFile, config, 0666) 204 if err != nil { 205 return 206 } 207 208 topo.proc = exec.Command( 209 topo.Binary, "agent", 210 "-server", 211 "-ui", 212 "-bootstrap-expect", "1", 213 "-bind", "127.0.0.1", 214 "-config-file", configFile, 215 ) 216 217 errFile, _ := os.Create(path.Join(topo.DataDirectory, "topo-stderr.txt")) 218 topo.proc.Stderr = errFile 219 220 topo.proc.Env = append(topo.proc.Env, os.Environ()...) 221 222 log.Errorf("Starting consul with args %v", strings.Join(topo.proc.Args, " ")) 223 err = topo.proc.Start() 224 if err != nil { 225 return 226 } 227 228 topo.exit = make(chan error) 229 go func() { 230 topo.exit <- topo.proc.Wait() 231 close(topo.exit) 232 }() 233 234 timeout := time.Now().Add(60 * time.Second) 235 for time.Now().Before(timeout) { 236 if topo.IsHealthy() { 237 return 238 } 239 select { 240 case err := <-topo.exit: 241 return fmt.Errorf("process '%s' exited prematurely (err: %s)", topo.Binary, err) 242 default: 243 time.Sleep(300 * time.Millisecond) 244 } 245 } 246 247 return fmt.Errorf("process '%s' timed out after 60s (err: %s)", topo.Binary, <-topo.exit) 248 } 249 250 // TearDown shutdowns the running topo service 251 func (topo *TopoProcess) TearDown(Cell string, originalVtRoot string, currentRoot string, keepdata bool, topoFlavor string) error { 252 253 if topoFlavor == "zk2" { 254 cmd := "shutdown" 255 if keepdata { 256 cmd = "teardown" 257 } 258 topo.proc = exec.Command( 259 topo.Binary, 260 "--log_dir", topo.LogDirectory, 261 "--zk.cfg", fmt.Sprintf("1@%v:%s", topo.Host, topo.ZKPorts), 262 cmd, 263 ) 264 265 err := topo.proc.Run() 266 if err != nil { 267 return err 268 } 269 } else { 270 if topo.proc == nil || topo.exit == nil { 271 return nil 272 } 273 274 if !(*keepData || keepdata) { 275 topo.removeTopoDirectories(Cell) 276 } 277 278 // Attempt graceful shutdown with SIGTERM first 279 _ = topo.proc.Process.Signal(syscall.SIGTERM) 280 281 if !(*keepData || keepdata) { 282 _ = os.RemoveAll(topo.DataDirectory) 283 _ = os.RemoveAll(currentRoot) 284 _ = os.Setenv("VTDATAROOT", originalVtRoot) 285 } 286 287 select { 288 case <-topo.exit: 289 topo.proc = nil 290 return nil 291 292 case <-time.After(10 * time.Second): 293 topo.proc.Process.Kill() 294 err := <-topo.exit 295 topo.proc = nil 296 return err 297 } 298 } 299 300 return nil 301 } 302 303 // IsHealthy function checks if topo server is up and running 304 func (topo *TopoProcess) IsHealthy() bool { 305 resp, err := http.Get(topo.VerifyURL) 306 if err != nil { 307 return false 308 } 309 defer resp.Body.Close() 310 return resp.StatusCode == 200 311 } 312 313 func (topo *TopoProcess) removeTopoDirectories(Cell string) { 314 if err := topo.ManageTopoDir("rmdir", "/vitess/global"); err != nil { 315 log.Errorf("Failed to remove global topo directory: %v", err) 316 } 317 if err := topo.ManageTopoDir("rmdir", "/vitess/"+Cell); err != nil { 318 log.Errorf("Failed to remove local topo directory: %v", err) 319 } 320 } 321 322 // ManageTopoDir creates global and zone in etcd2 323 func (topo *TopoProcess) ManageTopoDir(command string, directory string) (err error) { 324 url := topo.VerifyURL + directory 325 payload := strings.NewReader(`{"dir":"true"}`) 326 if command == "mkdir" { 327 req, _ := http.NewRequest("PUT", url, payload) 328 req.Header.Add("content-type", "application/json") 329 resp, err := http.DefaultClient.Do(req) 330 if err == nil { 331 defer resp.Body.Close() 332 } 333 return err 334 } else if command == "rmdir" { 335 req, _ := http.NewRequest("DELETE", url+"?dir=true", payload) 336 resp, err := http.DefaultClient.Do(req) 337 if err == nil { 338 defer resp.Body.Close() 339 } 340 return err 341 } else { 342 return nil 343 } 344 } 345 346 // TopoProcessInstance returns a TopoProcess handle for a etcd sevice, 347 // configured with the given Config. 348 // The process must be manually started by calling setup() 349 func TopoProcessInstance(port int, peerPort int, hostname string, flavor string, name string) *TopoProcess { 350 binary := "etcd" 351 if flavor == "zk2" { 352 binary = "zkctl" 353 } 354 if flavor == "consul" { 355 binary = "consul" 356 } 357 358 topo := &TopoProcess{ 359 Name: name, 360 Binary: binary, 361 Port: port, 362 Host: hostname, 363 } 364 365 topo.AdvertiseClientURL = fmt.Sprintf("http://%s:%d", topo.Host, topo.Port) 366 topo.ListenClientURL = fmt.Sprintf("http://%s:%d", topo.Host, topo.Port) 367 topo.DataDirectory = path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("%s_%d", "topo", port)) 368 topo.LogDirectory = path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("%s_%d", "topo", port), "logs") 369 topo.VerifyURL = fmt.Sprintf("http://%s:%d/v2/keys", topo.Host, topo.Port) 370 topo.PeerURL = fmt.Sprintf("http://%s:%d", hostname, peerPort) 371 return topo 372 }