vitess.io/vitess@v0.16.2/go/vt/zkctl/zkctl.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Commands for controlling an external zookeeper process. 19 */ 20 21 package zkctl 22 23 import ( 24 "bytes" 25 "fmt" 26 "net" 27 "os" 28 "os/exec" 29 "path" 30 "strconv" 31 "syscall" 32 "time" 33 34 zookeeper "github.com/z-division/go-zookeeper/zk" 35 36 "vitess.io/vitess/go/vt/env" 37 "vitess.io/vitess/go/vt/log" 38 ) 39 40 const ( 41 // startWaitTime is how long to wait at Start. 42 startWaitTime = 30 * time.Second 43 // shutdownWaitTime is how long to wait at Shutdown. 44 shutdownWaitTime = 20 * time.Second 45 ) 46 47 // Zkd manages the running of ZooKeeper servers. 48 type Zkd struct { 49 config *ZkConfig 50 done chan struct{} 51 } 52 53 // NewZkd creates a Zkd. 54 func NewZkd(config *ZkConfig) *Zkd { 55 return &Zkd{config: config} 56 } 57 58 // Done returns a channel that is closed when the underlying process started 59 // by this Zkd has terminated. If the process was started by someone else, this 60 // channel will never be closed. 61 func (zkd *Zkd) Done() <-chan struct{} { 62 return zkd.done 63 } 64 65 /* 66 ZOO_LOG_DIR="" 67 ZOO_CFG="/.../zoo.cfg" 68 ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain" 69 java -DZOO_LOG_DIR=${ZOO_LOG_DIR} -cp $CLASSPATH $ZOOMAIN $YT_ZK_CFG 70 */ 71 72 // Start runs an already initialized ZooKeeper server. 73 func (zkd *Zkd) Start() error { 74 log.Infof("zkctl.Start") 75 // NOTE(msolomon) use a script here so we can detach and continue to run 76 // if the wrangler process dies. this pretty much the same as mysqld_safe. 77 args := []string{ 78 zkd.config.LogDir(), 79 zkd.config.ConfigFile(), 80 zkd.config.PidFile(), 81 } 82 root, err := env.VtRoot() 83 if err != nil { 84 return err 85 } 86 dir := path.Join(root, "bin") 87 cmd := exec.Command(path.Join(dir, "zksrv.sh"), args...) 88 cmd.Env = os.Environ() 89 cmd.Dir = dir 90 91 if err = cmd.Start(); err != nil { 92 return err 93 } 94 95 // give it some time to succeed - usually by the time the socket emerges 96 // we are in good shape, but not always. So let's continue to retry until 97 // we get an imok response from the socket or we timeout. 98 timeout := time.Now().Add(startWaitTime) 99 zkAddr := fmt.Sprintf(":%v", zkd.config.ClientPort) 100 for time.Now().Before(timeout) { 101 conn, connErr := net.Dial("tcp", zkAddr) 102 if connErr != nil { 103 err = connErr 104 } else { 105 conn.Write([]byte("ruok")) 106 reply := make([]byte, 4) 107 conn.Read(reply) 108 conn.Close() 109 if string(reply) == "imok" { 110 err = nil 111 break 112 } 113 err = fmt.Errorf("local zk unhealthy: %v %v", zkAddr, reply) 114 } 115 time.Sleep(time.Second) 116 } 117 if err != nil { 118 return err 119 } 120 zkd.done = make(chan struct{}) 121 go func(done chan<- struct{}) { 122 // wait so we don't get a bunch of defunct processes 123 cmd.Wait() 124 close(done) 125 }(zkd.done) 126 return err 127 } 128 129 // Shutdown kills a ZooKeeper server, but keeps its data dir intact. 130 func (zkd *Zkd) Shutdown() error { 131 log.Infof("zkctl.Shutdown") 132 pidData, err := os.ReadFile(zkd.config.PidFile()) 133 if err != nil { 134 return err 135 } 136 pid, err := strconv.Atoi(string(bytes.TrimSpace(pidData))) 137 if err != nil { 138 return err 139 } 140 err = syscall.Kill(pid, syscall.SIGKILL) 141 if err != nil && err != syscall.ESRCH { 142 return err 143 } 144 timeout := time.Now().Add(shutdownWaitTime) 145 for time.Now().Before(timeout) { 146 if syscall.Kill(pid, syscall.SIGKILL) == syscall.ESRCH { 147 return nil 148 } 149 time.Sleep(time.Second) 150 } 151 return fmt.Errorf("Shutdown didn't kill process %v", pid) 152 } 153 154 func (zkd *Zkd) makeCfg() (string, error) { 155 root, err := env.VtRoot() 156 if err != nil { 157 return "", err 158 } 159 cnfTemplatePaths := []string{path.Join(root, "config/zkcfg/zoo.cfg")} 160 return MakeZooCfg(cnfTemplatePaths, zkd.config, "# generated by vt") 161 } 162 163 // Init generates a new config and then starts ZooKeeper. 164 func (zkd *Zkd) Init() error { 165 if zkd.Inited() { 166 return fmt.Errorf("zk already inited") 167 } 168 169 log.Infof("zkd.Init") 170 for _, path := range zkd.config.DirectoryList() { 171 if err := os.MkdirAll(path, 0775); err != nil { 172 log.Errorf("%v", err) 173 return err 174 } 175 // FIXME(msolomon) validate permissions? 176 } 177 178 configData, err := zkd.makeCfg() 179 if err == nil { 180 err = os.WriteFile(zkd.config.ConfigFile(), []byte(configData), 0664) 181 } 182 if err != nil { 183 log.Errorf("failed creating %v: %v", zkd.config.ConfigFile(), err) 184 return err 185 } 186 187 err = zkd.config.WriteMyid() 188 if err != nil { 189 log.Errorf("failed creating %v: %v", zkd.config.MyidFile(), err) 190 return err 191 } 192 193 if err = zkd.Start(); err != nil { 194 log.Errorf("failed starting, check %v", zkd.config.LogDir()) 195 return err 196 } 197 198 var ( 199 zk *zookeeper.Conn 200 session <-chan zookeeper.Event 201 zkAddr = fmt.Sprintf("localhost:%v", zkd.config.ClientPort) 202 ) 203 204 // Let's retry to deal with ephemeral network issues or CI slowness. 205 timeout := time.Now().Add(startWaitTime) 206 for time.Now().Before(timeout) { 207 zk, session, err = zookeeper.Connect([]string{zkAddr}, startWaitTime) 208 if err == nil { 209 break 210 } 211 time.Sleep(1 * time.Second) 212 } 213 if err != nil { 214 return err 215 } 216 event := <-session 217 if event.State != zookeeper.StateConnecting { 218 return event.Err 219 } 220 event = <-session 221 if event.State != zookeeper.StateConnected { 222 return event.Err 223 } 224 defer zk.Close() 225 226 return nil 227 } 228 229 // Teardown shuts down the server and removes its data dir. 230 func (zkd *Zkd) Teardown() error { 231 log.Infof("zkctl.Teardown") 232 if err := zkd.Shutdown(); err != nil { 233 log.Warningf("failed zookeeper shutdown: %v", err.Error()) 234 } 235 var removalErr error 236 for _, dir := range zkd.config.DirectoryList() { 237 log.V(6).Infof("remove data dir %v", dir) 238 if err := os.RemoveAll(dir); err != nil { 239 log.Errorf("failed removing %v: %v", dir, err.Error()) 240 removalErr = err 241 } 242 } 243 return removalErr 244 } 245 246 // Inited returns true if the server config has been initialized. 247 func (zkd *Zkd) Inited() bool { 248 myidFile := zkd.config.MyidFile() 249 _, statErr := os.Stat(myidFile) 250 if statErr == nil { 251 return true 252 } else if statErr.(*os.PathError).Err != syscall.ENOENT { 253 panic("can't access file " + myidFile + ": " + statErr.Error()) 254 } 255 return false 256 }