github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/scripts/leadershipclaimer/leadershipclaimer.go (about)

     1  // Copyright 2019 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package main
     5  
     6  import (
     7  	"fmt"
     8  	"math/rand"
     9  	"net"
    10  	"os"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/gnuflag"
    17  	"github.com/juju/loggo"
    18  	"gopkg.in/juju/names.v2"
    19  
    20  	"github.com/juju/juju/api"
    21  	"github.com/juju/juju/api/base"
    22  	"github.com/juju/juju/api/leadership"
    23  	"github.com/juju/juju/apiserver/params"
    24  	coreleadership "github.com/juju/juju/core/leadership"
    25  	"github.com/juju/juju/core/lease"
    26  )
    27  
    28  var unit = gnuflag.String("unit", "ubuntu-lite/0", "set the unit name that we will connect as")
    29  var password = gnuflag.String("password", "", "the password for this agent")
    30  var hosts = gnuflag.String("hosts", "localhost", "the hosts to connect to (comma separated)")
    31  var port = gnuflag.Int("port", 17070, "the apiserver port")
    32  var uuid = gnuflag.String("uuid", "", "model-uuid to connect to")
    33  var claimtime = gnuflag.String("claimtime", "10s", "time that we will request to hold the lease")
    34  var renewtime = gnuflag.String("renewtime", "", "how often we will renew the lease (default 1/2 the claim time)")
    35  var quiet = gnuflag.Bool("quiet", false, "print only when the leases are claimed")
    36  var initSleep = gnuflag.String("sleep", "1s", "time to sleep before starting processing")
    37  
    38  var agentStart time.Time
    39  
    40  func main() {
    41  	loggo.GetLogger("").SetLogLevel(loggo.INFO)
    42  	start := time.Now()
    43  	rand.Seed(int64(start.Nanosecond() + os.Getpid()))
    44  	gnuflag.Parse(true)
    45  	// make it a little bit easier to have all of the processes start closer to the same time.
    46  	// don't start doing any real work for the first second.
    47  	sleepDuration, err := time.ParseDuration(*initSleep)
    48  	if err != nil {
    49  		panic(err)
    50  	}
    51  	time.Sleep(sleepDuration)
    52  	claimDuration, err := time.ParseDuration(*claimtime)
    53  	if err != nil {
    54  		panic(err)
    55  	}
    56  	renewDuration := claimDuration / 2
    57  	if *renewtime != "" {
    58  		renewDuration, err = time.ParseDuration(*renewtime)
    59  		if err != nil {
    60  			panic(err)
    61  		}
    62  	}
    63  	if !names.IsValidUnit(*unit) {
    64  		panic(fmt.Sprintf("must supply a valid unit name, not: %q", *unit))
    65  	}
    66  	modelTag := names.NewModelTag(*uuid)
    67  	unitTag := names.NewUnitTag(*unit)
    68  	if err != nil {
    69  		panic(err)
    70  	}
    71  	holders := gnuflag.Args()
    72  	if len(holders) == 0 {
    73  		holders = []string{*unit}
    74  	}
    75  	holderTags := make([]names.UnitTag, len(holders))
    76  	for i := range holders {
    77  		holderTags[i] = names.NewUnitTag(holders[i])
    78  	}
    79  	hostNames := strings.Split(*hosts, ",")
    80  	infos := make([]*api.Info, len(hostNames))
    81  	for i := range hostNames {
    82  		info := &api.Info{
    83  			Addrs:    []string{net.JoinHostPort(hostNames[i], fmt.Sprint(*port))},
    84  			ModelTag: modelTag,
    85  			Tag:      unitTag,
    86  			Password: *password,
    87  		}
    88  		infos[i] = info
    89  	}
    90  	agentStart = time.Now()
    91  	var wg sync.WaitGroup
    92  	for htCount, holderTag := range holderTags {
    93  		hostCounter := holderTag.Number() % len(infos)
    94  		info := infos[hostCounter]
    95  		var conn api.Connection
    96  		for i := 0; i < 5; i++ {
    97  			var err error
    98  			start := time.Now()
    99  			conn, err = connect(info)
   100  			sinceStart := time.Since(agentStart).Round(time.Millisecond).Seconds()
   101  			fmt.Fprintf(os.Stdout, "%9.3fs connected [%6d] %4d %s in %s\n",
   102  				sinceStart, os.Getpid(), htCount, holderTag.Id(), time.Since(start).Round(time.Millisecond))
   103  			delay := time.Second
   104  			if err == nil {
   105  				break
   106  			} else {
   107  				if strings.Contains(strings.ToLower(err.Error()), "try again") {
   108  					fmt.Fprintf(os.Stderr, "%d failed to connect to %v for %v (retrying): %v\n", htCount, info, holderTag.Id(), err)
   109  					if i < 4 {
   110  						time.Sleep(delay)
   111  						delay *= 2
   112  					}
   113  					continue
   114  				}
   115  				// fmt.Fprintf(os.Stderr, "failed to connect to %v for %v: %v\n", info, holderTag.Id(), err)
   116  				fmt.Fprintf(os.Stdout, "%d failed to connect to %v for %v: %v\n", htCount, info, holderTag.Id(), err)
   117  			}
   118  		}
   119  		if conn == nil {
   120  			continue
   121  		}
   122  		wg.Add(1)
   123  		go func(tag names.UnitTag, conn api.Connection) {
   124  			defer wg.Done()
   125  			defer conn.Close()
   126  			claimLoop(tag, leadership.NewClient(conn), claimDuration, renewDuration)
   127  		}(holderTag, conn)
   128  	}
   129  	wg.Wait()
   130  }
   131  
   132  func connect(info *api.Info) (api.Connection, error) {
   133  	opts := api.DefaultDialOpts()
   134  	opts.InsecureSkipVerify = true
   135  	conn, err := api.Open(info, opts)
   136  	if err != nil {
   137  		return nil, err
   138  	}
   139  	return conn, nil
   140  }
   141  
   142  func leaderSet(facadeCaller base.FacadeCaller, holderTag names.UnitTag, keys map[string]string) error {
   143  	appId, err := names.UnitApplication(holderTag.Id())
   144  	if err != nil {
   145  		return errors.Trace(err)
   146  	}
   147  	applicationTag := names.NewApplicationTag(appId)
   148  	args := params.MergeLeadershipSettingsBulkParams{
   149  		Params: []params.MergeLeadershipSettingsParam{{
   150  			ApplicationTag: applicationTag.String(),
   151  			UnitTag:        holderTag.String(),
   152  			Settings:       keys,
   153  		}},
   154  	}
   155  	var results params.ErrorResults
   156  	err = facadeCaller.FacadeCall("Merge", args, &results)
   157  	if err != nil {
   158  		return errors.Trace(err)
   159  	}
   160  	err = results.OneError()
   161  	if err != nil {
   162  		return errors.Trace(err)
   163  	}
   164  	return nil
   165  }
   166  
   167  func claimLoop(holderTag names.UnitTag, claimer coreleadership.Claimer, claimDuration, renewDuration time.Duration) {
   168  	next := time.After(0)
   169  	leaseName, err := names.UnitApplication(holderTag.Id())
   170  	if err != nil {
   171  		panic(err)
   172  	}
   173  	isLeader := false
   174  	var isLeaderTime time.Time
   175  	for {
   176  		select {
   177  		case <-next:
   178  			start := time.Now()
   179  			err := claimer.ClaimLeadership(leaseName, holderTag.Id(), claimDuration)
   180  			now := time.Now()
   181  			sinceStart := now.Sub(agentStart).Round(time.Millisecond).Seconds()
   182  			reqDuration := now.Sub(start).Round(time.Millisecond)
   183  			if err == nil {
   184  				next = time.After(renewDuration)
   185  				if isLeader {
   186  					heldFor := now.Sub(isLeaderTime).Round(time.Second)
   187  					if *quiet {
   188  						fmt.Fprintf(os.Stdout, "%9.3fs extended %s held for %s in %s\n",
   189  							sinceStart, holderTag.Id(), heldFor, reqDuration)
   190  					} else {
   191  						fmt.Fprintf(os.Stdout, "%9.3fs extended leadership of %q for %q for %v in %s, held for %s, renewing after %v\n",
   192  							sinceStart, leaseName, holderTag.Id(), claimDuration, reqDuration, heldFor, renewDuration)
   193  					}
   194  				} else {
   195  					if *quiet {
   196  						fmt.Fprintf(os.Stdout, "%9.3fs claimed  %s in %s\n", sinceStart, holderTag.Id(), reqDuration)
   197  					} else {
   198  						fmt.Fprintf(os.Stdout, "%9.3fs claimed leadership of %q for %q for %v in %s, renewing after %v\n",
   199  							sinceStart, leaseName, holderTag.Id(), claimDuration, reqDuration, renewDuration)
   200  					}
   201  					isLeaderTime = time.Now()
   202  				}
   203  				isLeader = true
   204  			} else {
   205  				if errors.Cause(err) == coreleadership.ErrClaimDenied {
   206  					now := time.Now()
   207  					sinceStart := now.Sub(agentStart).Round(time.Millisecond).Seconds()
   208  					if isLeader {
   209  						heldFor := now.Sub(isLeaderTime).Round(time.Second)
   210  						if *quiet {
   211  							fmt.Fprintf(os.Stdout, "%9.3fs lost     %s after %s in %s\n",
   212  								sinceStart, holderTag.Id(), heldFor, reqDuration)
   213  						} else {
   214  							fmt.Fprintf(os.Stdout, "%9.3fs lost leadership of %q for %q after %s in %s, blocking until released\n",
   215  								sinceStart, leaseName, holderTag.Id(), heldFor, reqDuration)
   216  						}
   217  					} else {
   218  						if !*quiet {
   219  							fmt.Fprintf(os.Stdout, "%9.3fs claim of %q for %q denied in %s, blocking until released\n",
   220  								sinceStart, leaseName, holderTag.Id(), reqDuration)
   221  						}
   222  					}
   223  					isLeader = false
   224  					isLeaderTime = time.Time{}
   225  					// Note: the 'cancel' channel does nothing
   226  					start := now
   227  					err := claimer.BlockUntilLeadershipReleased(leaseName, nil)
   228  					now = time.Now()
   229  					sinceStart = now.Sub(agentStart).Round(time.Millisecond).Seconds()
   230  					reqDuration := now.Sub(start).Round(time.Millisecond)
   231  					if err != nil {
   232  						fmt.Fprintf(os.Stderr, "%9.3fs blocking for leadership of %q for %q failed in %s with %v\n",
   233  							sinceStart, leaseName, holderTag.Id(), reqDuration, err)
   234  						return
   235  					}
   236  					if !*quiet {
   237  						fmt.Fprintf(os.Stdout, "%9.3fs blocking of %q for %q returned after %s, attempting to claim\n",
   238  							sinceStart, leaseName, holderTag.Id(), reqDuration)
   239  					}
   240  					next = time.After(0)
   241  				} else if errors.Cause(err) == lease.ErrTimeout {
   242  					fmt.Fprintf(os.Stderr, "%9.3fs claim of %q for %q timed out in %s, retrying\n",
   243  						sinceStart, leaseName, holderTag.Id(), reqDuration)
   244  					fmt.Fprintf(os.Stdout, "%9.3fs claim of %q for %q timed out in %s, retrying\n",
   245  						sinceStart, leaseName, holderTag.Id(), reqDuration)
   246  				} else {
   247  					fmt.Fprintf(os.Stderr, "%9.3fs claim of %q for %q failed in %s: %v\n",
   248  						sinceStart, leaseName, holderTag.Id(), reqDuration, err)
   249  					return
   250  				}
   251  			}
   252  		}
   253  	}
   254  }