github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/scripts/leadershipclaimer/leadershipclaimer.go (about) 1 // Copyright 2019 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package main 5 6 import ( 7 "fmt" 8 "math/rand" 9 "net" 10 "os" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/gnuflag" 17 "github.com/juju/loggo" 18 "gopkg.in/juju/names.v2" 19 20 "github.com/juju/juju/api" 21 "github.com/juju/juju/api/base" 22 "github.com/juju/juju/api/leadership" 23 "github.com/juju/juju/apiserver/params" 24 coreleadership "github.com/juju/juju/core/leadership" 25 "github.com/juju/juju/core/lease" 26 ) 27 28 var unit = gnuflag.String("unit", "ubuntu-lite/0", "set the unit name that we will connect as") 29 var password = gnuflag.String("password", "", "the password for this agent") 30 var hosts = gnuflag.String("hosts", "localhost", "the hosts to connect to (comma separated)") 31 var port = gnuflag.Int("port", 17070, "the apiserver port") 32 var uuid = gnuflag.String("uuid", "", "model-uuid to connect to") 33 var claimtime = gnuflag.String("claimtime", "10s", "time that we will request to hold the lease") 34 var renewtime = gnuflag.String("renewtime", "", "how often we will renew the lease (default 1/2 the claim time)") 35 var quiet = gnuflag.Bool("quiet", false, "print only when the leases are claimed") 36 var initSleep = gnuflag.String("sleep", "1s", "time to sleep before starting processing") 37 38 var agentStart time.Time 39 40 func main() { 41 loggo.GetLogger("").SetLogLevel(loggo.INFO) 42 start := time.Now() 43 rand.Seed(int64(start.Nanosecond() + os.Getpid())) 44 gnuflag.Parse(true) 45 // make it a little bit easier to have all of the processes start closer to the same time. 46 // don't start doing any real work for the first second. 47 sleepDuration, err := time.ParseDuration(*initSleep) 48 if err != nil { 49 panic(err) 50 } 51 time.Sleep(sleepDuration) 52 claimDuration, err := time.ParseDuration(*claimtime) 53 if err != nil { 54 panic(err) 55 } 56 renewDuration := claimDuration / 2 57 if *renewtime != "" { 58 renewDuration, err = time.ParseDuration(*renewtime) 59 if err != nil { 60 panic(err) 61 } 62 } 63 if !names.IsValidUnit(*unit) { 64 panic(fmt.Sprintf("must supply a valid unit name, not: %q", *unit)) 65 } 66 modelTag := names.NewModelTag(*uuid) 67 unitTag := names.NewUnitTag(*unit) 68 if err != nil { 69 panic(err) 70 } 71 holders := gnuflag.Args() 72 if len(holders) == 0 { 73 holders = []string{*unit} 74 } 75 holderTags := make([]names.UnitTag, len(holders)) 76 for i := range holders { 77 holderTags[i] = names.NewUnitTag(holders[i]) 78 } 79 hostNames := strings.Split(*hosts, ",") 80 infos := make([]*api.Info, len(hostNames)) 81 for i := range hostNames { 82 info := &api.Info{ 83 Addrs: []string{net.JoinHostPort(hostNames[i], fmt.Sprint(*port))}, 84 ModelTag: modelTag, 85 Tag: unitTag, 86 Password: *password, 87 } 88 infos[i] = info 89 } 90 agentStart = time.Now() 91 var wg sync.WaitGroup 92 for htCount, holderTag := range holderTags { 93 hostCounter := holderTag.Number() % len(infos) 94 info := infos[hostCounter] 95 var conn api.Connection 96 for i := 0; i < 5; i++ { 97 var err error 98 start := time.Now() 99 conn, err = connect(info) 100 sinceStart := time.Since(agentStart).Round(time.Millisecond).Seconds() 101 fmt.Fprintf(os.Stdout, "%9.3fs connected [%6d] %4d %s in %s\n", 102 sinceStart, os.Getpid(), htCount, holderTag.Id(), time.Since(start).Round(time.Millisecond)) 103 delay := time.Second 104 if err == nil { 105 break 106 } else { 107 if strings.Contains(strings.ToLower(err.Error()), "try again") { 108 fmt.Fprintf(os.Stderr, "%d failed to connect to %v for %v (retrying): %v\n", htCount, info, holderTag.Id(), err) 109 if i < 4 { 110 time.Sleep(delay) 111 delay *= 2 112 } 113 continue 114 } 115 // fmt.Fprintf(os.Stderr, "failed to connect to %v for %v: %v\n", info, holderTag.Id(), err) 116 fmt.Fprintf(os.Stdout, "%d failed to connect to %v for %v: %v\n", htCount, info, holderTag.Id(), err) 117 } 118 } 119 if conn == nil { 120 continue 121 } 122 wg.Add(1) 123 go func(tag names.UnitTag, conn api.Connection) { 124 defer wg.Done() 125 defer conn.Close() 126 claimLoop(tag, leadership.NewClient(conn), claimDuration, renewDuration) 127 }(holderTag, conn) 128 } 129 wg.Wait() 130 } 131 132 func connect(info *api.Info) (api.Connection, error) { 133 opts := api.DefaultDialOpts() 134 opts.InsecureSkipVerify = true 135 conn, err := api.Open(info, opts) 136 if err != nil { 137 return nil, err 138 } 139 return conn, nil 140 } 141 142 func leaderSet(facadeCaller base.FacadeCaller, holderTag names.UnitTag, keys map[string]string) error { 143 appId, err := names.UnitApplication(holderTag.Id()) 144 if err != nil { 145 return errors.Trace(err) 146 } 147 applicationTag := names.NewApplicationTag(appId) 148 args := params.MergeLeadershipSettingsBulkParams{ 149 Params: []params.MergeLeadershipSettingsParam{{ 150 ApplicationTag: applicationTag.String(), 151 UnitTag: holderTag.String(), 152 Settings: keys, 153 }}, 154 } 155 var results params.ErrorResults 156 err = facadeCaller.FacadeCall("Merge", args, &results) 157 if err != nil { 158 return errors.Trace(err) 159 } 160 err = results.OneError() 161 if err != nil { 162 return errors.Trace(err) 163 } 164 return nil 165 } 166 167 func claimLoop(holderTag names.UnitTag, claimer coreleadership.Claimer, claimDuration, renewDuration time.Duration) { 168 next := time.After(0) 169 leaseName, err := names.UnitApplication(holderTag.Id()) 170 if err != nil { 171 panic(err) 172 } 173 isLeader := false 174 var isLeaderTime time.Time 175 for { 176 select { 177 case <-next: 178 start := time.Now() 179 err := claimer.ClaimLeadership(leaseName, holderTag.Id(), claimDuration) 180 now := time.Now() 181 sinceStart := now.Sub(agentStart).Round(time.Millisecond).Seconds() 182 reqDuration := now.Sub(start).Round(time.Millisecond) 183 if err == nil { 184 next = time.After(renewDuration) 185 if isLeader { 186 heldFor := now.Sub(isLeaderTime).Round(time.Second) 187 if *quiet { 188 fmt.Fprintf(os.Stdout, "%9.3fs extended %s held for %s in %s\n", 189 sinceStart, holderTag.Id(), heldFor, reqDuration) 190 } else { 191 fmt.Fprintf(os.Stdout, "%9.3fs extended leadership of %q for %q for %v in %s, held for %s, renewing after %v\n", 192 sinceStart, leaseName, holderTag.Id(), claimDuration, reqDuration, heldFor, renewDuration) 193 } 194 } else { 195 if *quiet { 196 fmt.Fprintf(os.Stdout, "%9.3fs claimed %s in %s\n", sinceStart, holderTag.Id(), reqDuration) 197 } else { 198 fmt.Fprintf(os.Stdout, "%9.3fs claimed leadership of %q for %q for %v in %s, renewing after %v\n", 199 sinceStart, leaseName, holderTag.Id(), claimDuration, reqDuration, renewDuration) 200 } 201 isLeaderTime = time.Now() 202 } 203 isLeader = true 204 } else { 205 if errors.Cause(err) == coreleadership.ErrClaimDenied { 206 now := time.Now() 207 sinceStart := now.Sub(agentStart).Round(time.Millisecond).Seconds() 208 if isLeader { 209 heldFor := now.Sub(isLeaderTime).Round(time.Second) 210 if *quiet { 211 fmt.Fprintf(os.Stdout, "%9.3fs lost %s after %s in %s\n", 212 sinceStart, holderTag.Id(), heldFor, reqDuration) 213 } else { 214 fmt.Fprintf(os.Stdout, "%9.3fs lost leadership of %q for %q after %s in %s, blocking until released\n", 215 sinceStart, leaseName, holderTag.Id(), heldFor, reqDuration) 216 } 217 } else { 218 if !*quiet { 219 fmt.Fprintf(os.Stdout, "%9.3fs claim of %q for %q denied in %s, blocking until released\n", 220 sinceStart, leaseName, holderTag.Id(), reqDuration) 221 } 222 } 223 isLeader = false 224 isLeaderTime = time.Time{} 225 // Note: the 'cancel' channel does nothing 226 start := now 227 err := claimer.BlockUntilLeadershipReleased(leaseName, nil) 228 now = time.Now() 229 sinceStart = now.Sub(agentStart).Round(time.Millisecond).Seconds() 230 reqDuration := now.Sub(start).Round(time.Millisecond) 231 if err != nil { 232 fmt.Fprintf(os.Stderr, "%9.3fs blocking for leadership of %q for %q failed in %s with %v\n", 233 sinceStart, leaseName, holderTag.Id(), reqDuration, err) 234 return 235 } 236 if !*quiet { 237 fmt.Fprintf(os.Stdout, "%9.3fs blocking of %q for %q returned after %s, attempting to claim\n", 238 sinceStart, leaseName, holderTag.Id(), reqDuration) 239 } 240 next = time.After(0) 241 } else if errors.Cause(err) == lease.ErrTimeout { 242 fmt.Fprintf(os.Stderr, "%9.3fs claim of %q for %q timed out in %s, retrying\n", 243 sinceStart, leaseName, holderTag.Id(), reqDuration) 244 fmt.Fprintf(os.Stdout, "%9.3fs claim of %q for %q timed out in %s, retrying\n", 245 sinceStart, leaseName, holderTag.Id(), reqDuration) 246 } else { 247 fmt.Fprintf(os.Stderr, "%9.3fs claim of %q for %q failed in %s: %v\n", 248 sinceStart, leaseName, holderTag.Id(), reqDuration, err) 249 return 250 } 251 } 252 } 253 } 254 }