go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/tokenserver/cmd/luci_machine_tokend/main.go (about) 1 // Copyright 2016 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Command luci_machine_tokend runs on all machines via cron. 16 // 17 // It wakes up each ~10 min, checks whether it needs to refresh existing machine 18 // token, and refreshes it if necessary. 19 // 20 // It also dumps information about its run into a status file (as JSON), that 21 // can be picked up sysmon and transformed into ts_mon metrics (most important 22 // one being "time since last successful token refresh"). 23 package main 24 25 import ( 26 "context" 27 "crypto/sha1" 28 "encoding/hex" 29 "flag" 30 "fmt" 31 "os" 32 "sort" 33 "strings" 34 "time" 35 36 "go.chromium.org/luci/common/clock" 37 "go.chromium.org/luci/common/logging" 38 "go.chromium.org/luci/common/logging/gologger" 39 "go.chromium.org/luci/common/logging/memlogger" 40 "go.chromium.org/luci/common/logging/teelogger" 41 "go.chromium.org/luci/common/retry" 42 "go.chromium.org/luci/common/system/signals" 43 "go.chromium.org/luci/common/tsmon" 44 "go.chromium.org/luci/common/tsmon/target" 45 46 tokenserver "go.chromium.org/luci/tokenserver/api" 47 "go.chromium.org/luci/tokenserver/api/minter/v1" 48 49 "go.chromium.org/luci/tokenserver/client" 50 ) 51 52 // Version identifies the major revision of the tokend code. 53 // 54 // It is put in the status file (and subsequently reported to monitoring). 55 const Version = "1.2" 56 57 // commandLine contains all command line flags. 58 // 59 // See registerFlags() for description of each individual flag. 60 type commandLine struct { 61 PrivateKeyPath string 62 CertificatePath string 63 Backend string 64 TokenFile string 65 StatusFile string 66 Timeout time.Duration 67 ForceRefresh bool 68 } 69 70 func defaults() commandLine { 71 return commandLine{ 72 Timeout: 60 * time.Second, 73 } 74 } 75 76 func (c *commandLine) registerFlags(f *flag.FlagSet) { 77 f.StringVar(&c.PrivateKeyPath, "pkey-pem", c.PrivateKeyPath, "path to a private key file") 78 f.StringVar(&c.CertificatePath, "cert-pem", c.CertificatePath, "path to a certificate file") 79 f.StringVar(&c.Backend, "backend", c.Backend, "hostname of the backend to use") 80 f.StringVar(&c.TokenFile, "token-file", c.TokenFile, "where to put the token file") 81 f.StringVar(&c.StatusFile, "status-file", c.StatusFile, "where to put details about this run (optional)") 82 f.DurationVar(&c.Timeout, "timeout", c.Timeout, "how long to retry on errors before giving up") 83 f.BoolVar(&c.ForceRefresh, "force-refresh", c.ForceRefresh, "forcefully refresh the token even if it is still valid") 84 } 85 86 func (c *commandLine) check() error { 87 if c.PrivateKeyPath == "" { 88 return fmt.Errorf("-pkey-pem is required") 89 } 90 if c.CertificatePath == "" { 91 return fmt.Errorf("-cert-pem is required") 92 } 93 if c.Backend == "" { 94 return fmt.Errorf("-backend is required") 95 } 96 if c.TokenFile == "" { 97 return fmt.Errorf("-token-file is required") 98 } 99 return nil 100 } 101 102 func main() { 103 os.Exit(realMain()) 104 } 105 106 func realMain() int { 107 opts := defaults() 108 opts.registerFlags(flag.CommandLine) 109 110 tsmonFlags := tsmon.NewFlags() 111 tsmonFlags.Target.TargetType = target.TaskType 112 tsmonFlags.Target.TaskServiceName = "luci_machine_tokend" 113 tsmonFlags.Target.TaskJobName = "default" 114 tsmonFlags.Flush = "manual" 115 tsmonFlags.Register(flag.CommandLine) 116 117 flag.Parse() 118 119 if err := opts.check(); err != nil { 120 fmt.Fprintln(os.Stderr, err) 121 flag.Usage() 122 return 2 123 } 124 125 clientParams := client.Parameters{ 126 PrivateKeyPath: opts.PrivateKeyPath, 127 CertificatePath: opts.CertificatePath, 128 Backend: opts.Backend, 129 Retry: func() retry.Iterator { 130 return &retry.ExponentialBackoff{ 131 Limited: retry.Limited{ 132 Delay: 200 * time.Millisecond, 133 Retries: 100000, // limit only by time, not number of retries 134 }, 135 MaxDelay: opts.Timeout, 136 Multiplier: 1.5, 137 } 138 }, 139 } 140 if strings.HasPrefix(clientParams.Backend, "localhost:") { 141 clientParams.Insecure = true 142 } 143 144 log := &memlogger.MemLogger{} 145 146 // Write Debug log to both memlogger and gologger. 147 memLogFactory := func(context.Context) logging.Logger { 148 return log 149 } 150 root := teelogger.Use(context.Background(), memLogFactory, gologger.StdConfig.NewLogger) 151 root = logging.SetLevel(root, logging.Debug) 152 153 // Apply tsmon config. A failure here is non-fatal. 154 if err := tsmon.InitializeFromFlags(root, &tsmonFlags); err != nil { 155 logging.Errorf(root, "Failed to initialize tsmon - %s", err) 156 } 157 158 ctx, cancel := context.WithTimeout(root, opts.Timeout) 159 defer cancel() 160 signals.HandleInterrupt(cancel) 161 162 statusReport := StatusReport{ 163 Version: Version, 164 Started: clock.Now(ctx), 165 } 166 defer func() { 167 // Dump the status of this run. It's picked up by monitoring. Ignore errors 168 // here, they are not important compared to 'run' errors. Use root context 169 // to be to flush errors to monitoring even if 'ctx' has expired. 170 statusReport.Finished = clock.Now(ctx) 171 if err := statusReport.SendMetrics(root); err != nil { 172 logging.Errorf(root, "Failed to send tsmon metrics - %s", err) 173 } 174 if opts.StatusFile != "" { 175 if err := statusReport.SaveToFile(root, log, opts.StatusFile); err != nil { 176 logging.Errorf(root, "Failed to save the status - %s", err) 177 } 178 } 179 }() 180 if err := run(ctx, clientParams, opts, &statusReport); err != nil { 181 return 1 182 } 183 return 0 184 } 185 186 func run(ctx context.Context, clientParams client.Parameters, opts commandLine, status *StatusReport) error { 187 // Read existing token file on disk to check whether we really need to update 188 // it. We update the token if it is missing, close to expiration, or when 189 // parameters change. 190 existingToken, existingState := readTokenFile(ctx, opts.TokenFile) 191 192 // Record the info about existing token in status report, it is useful even if 193 // we fail to refresh the token. 194 status.LastToken = existingToken 195 196 // Initialize the client. It will read private key and certificate file into 197 // memory and validate them. 198 cl, err := client.New(clientParams) 199 if err != nil { 200 logging.Errorf(ctx, "Failed to initialize the client - %s", err) 201 status.FailureError = err 202 status.UpdateOutcome = OutcomeCantReadKey 203 // Fill in some update reason to avoid "" as metric value. 204 if existingToken.NextUpdate == 0 { 205 status.UpdateReason = UpdateReasonNewToken 206 } else { 207 // We successfully updated the token in the past, but now the keys are 208 // suddenly unreadable, they probably changed. 209 status.UpdateReason = UpdateReasonParametersChange 210 } 211 return err 212 } 213 214 // Generate a hash of all input parameters. It is used to detect that we 215 // need to refresh the token file even if the token is still valid. It 216 // happens if we change a key or backend URL. 217 signer := cl.Signer.(*client.X509Signer) 218 inputsDigest := calcDigest(map[string][]byte{ 219 "forceBump": {1}, // bump this to forcefully regenerate all tokens 220 "pkey": signer.PrivateKeyPEM, 221 "cert": signer.CertificatePEM, 222 "backend": []byte(clientParams.Backend), 223 }) 224 225 // Record a reason for token update (if we need to update the token). 226 now := clock.Now(ctx) 227 switch { 228 case existingToken.NextUpdate == 0: 229 status.UpdateReason = UpdateReasonNewToken 230 case now.After(time.Unix(existingToken.NextUpdate, 0)): 231 status.UpdateReason = UpdateReasonExpiration 232 case existingState.InputsDigest != inputsDigest: 233 status.UpdateReason = UpdateReasonParametersChange 234 case opts.ForceRefresh: 235 status.UpdateReason = UpdateReasonForceRefresh 236 default: 237 logging.Infof(ctx, "The token is valid, skipping the update") 238 status.UpdateReason = UpdateReasonTokenIsGood 239 status.UpdateOutcome = OutcomeTokenIsGood 240 return nil 241 } 242 243 // Grab a new token. MintMachineToken does retries internally, until success 244 // or context deadline. 245 resp, err := cl.MintMachineToken(ctx, &minter.MachineTokenRequest{ 246 TokenType: tokenserver.MachineTokenType_LUCI_MACHINE_TOKEN, 247 }) 248 status.MintTokenDuration = clock.Now(ctx).Sub(now) 249 if err != nil { 250 logging.Errorf(ctx, "Failed to generate a new token - %s", err) 251 status.FailureError = err 252 status.UpdateOutcome = OutcomeFromRPCError(err) 253 if details, ok := err.(client.RPCError); ok { 254 status.ServiceVersion = details.ServiceVersion 255 } 256 return err 257 } 258 status.ServiceVersion = resp.ServiceVersion 259 260 // Grab machine_token field. 261 var tok *minter.LuciMachineToken 262 if tt, _ := resp.TokenType.(*minter.MachineTokenResponse_LuciMachineToken); tt != nil { 263 tok = tt.LuciMachineToken 264 } 265 if tok == nil { 266 err = fmt.Errorf("bad response, empty luci_machine_token field") 267 logging.Errorf(ctx, "%s", err) 268 status.FailureError = err 269 status.UpdateOutcome = OutcomeMalformedReponse 270 return err 271 } 272 273 now = clock.Now(ctx) 274 expiry := tok.Expiry.AsTime() 275 lifetime := expiry.Sub(now) 276 277 // lifetime should usually be 1h, add a safeguard to avoid hammering 278 // the backend in case the lifetime is unexpectedly wrong. 279 if lifetime < 5*time.Minute { 280 logging.Warningf(ctx, "Returned token lifetime is unexpectedly too short (%s)", lifetime) 281 lifetime = 5 * time.Minute 282 } 283 284 // We start to attempt to refresh the token after half of its lifetime has 285 // passed, to be able survive short (~30 min) backend outages in exchange for 286 // 2x RPC rate. 287 newTokenFile := tokenserver.TokenFile{ 288 LuciMachineToken: tok.MachineToken, 289 Expiry: expiry.Unix(), 290 LastUpdate: now.Unix(), 291 NextUpdate: now.Add(lifetime / 2).Unix(), 292 } 293 newState := stateInToken{ 294 InputsDigest: inputsDigest, 295 Version: Version, 296 } 297 if err = writeTokenFile(ctx, &newTokenFile, &newState, opts.TokenFile); err != nil { 298 logging.Errorf(ctx, "Failed to save token file - %s", err) 299 status.FailureError = err 300 if os.IsPermission(err) { 301 status.UpdateOutcome = OutcomePermissionError 302 } else { 303 status.UpdateOutcome = OutcomeUnknownSaveTokenError 304 } 305 return err 306 } 307 308 status.LastToken = &newTokenFile 309 status.UpdateOutcome = OutcomeUpdateSuccess 310 return nil 311 } 312 313 // calcDigest produces a digest of a given map using some stable serialization. 314 func calcDigest(inputs map[string][]byte) string { 315 keys := make([]string, 0, len(inputs)) 316 for k := range inputs { 317 keys = append(keys, k) 318 } 319 sort.Strings(keys) 320 h := sha1.New() 321 for _, k := range keys { 322 v := inputs[k] 323 fmt.Fprintf(h, "%s\n%d\n", k, len(v)) 324 h.Write(v) 325 } 326 blob := h.Sum(nil) 327 return hex.EncodeToString(blob[:]) 328 }