go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luci_notify/notify/tree_status.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package notify 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "io" 22 "net/http" 23 "net/url" 24 "strconv" 25 "strings" 26 "sync" 27 "time" 28 29 "go.chromium.org/luci/common/data/rand/mathrand" 30 "go.chromium.org/luci/common/data/stringset" 31 "go.chromium.org/luci/common/errors" 32 "go.chromium.org/luci/common/lhttp" 33 "go.chromium.org/luci/common/logging" 34 "go.chromium.org/luci/common/retry/transient" 35 "go.chromium.org/luci/common/sync/parallel" 36 "go.chromium.org/luci/gae/service/datastore" 37 "go.chromium.org/luci/grpc/prpc" 38 "go.chromium.org/luci/server/auth" 39 tspb "go.chromium.org/luci/tree_status/proto/v1" 40 41 "go.chromium.org/luci/luci_notify/config" 42 ) 43 44 const botUsername = "luci-notify@appspot.gserviceaccount.com" 45 const legacyBotUsername = "buildbot@chromium.org" 46 47 type treeStatus struct { 48 username string 49 message string 50 key int64 51 status config.TreeCloserStatus 52 timestamp time.Time 53 } 54 55 type treeStatusClient interface { 56 getStatus(c context.Context, host string) (*treeStatus, error) 57 postStatus(c context.Context, host, message string, prevKey int64, treeName string, status config.TreeCloserStatus) error 58 } 59 60 type httpTreeStatusClient struct { 61 getFunc func(context.Context, string) ([]byte, error) 62 postFunc func(context.Context, string) error 63 client tspb.TreeStatusClient 64 } 65 66 func NewHTTPTreeStatusClient(ctx context.Context, luciTreeStatusHost string) (*httpTreeStatusClient, error) { 67 transport, err := auth.GetRPCTransport(ctx, auth.AsSelf) 68 if err != nil { 69 return nil, err 70 } 71 rpcOpts := prpc.DefaultOptions() 72 rpcOpts.Insecure = lhttp.IsLocalHost(luciTreeStatusHost) 73 prpcClient := &prpc.Client{ 74 C: &http.Client{Transport: transport}, 75 Host: luciTreeStatusHost, 76 Options: rpcOpts, 77 MaxConcurrentRequests: 100, 78 } 79 80 return &httpTreeStatusClient{ 81 getFunc: getHttp, 82 postFunc: postHttp, 83 client: tspb.NewTreeStatusPRPCClient(prpcClient), 84 }, nil 85 } 86 87 func (ts *httpTreeStatusClient) getStatus(c context.Context, host string) (*treeStatus, error) { 88 // TODO(mwarton): transition to the new tree status app RPC after the migration. 89 respJSON, err := ts.getFunc(c, fmt.Sprintf("https://%s/current?format=json", host)) 90 if err != nil { 91 return nil, err 92 } 93 94 var r struct { 95 Username string 96 CanCommitFreely bool `json:"can_commit_freely"` 97 Key int64 98 Date string 99 Message string 100 } 101 if err = json.Unmarshal(respJSON, &r); err != nil { 102 return nil, errors.Annotate(err, "failed to unmarshal JSON").Err() 103 } 104 105 var status config.TreeCloserStatus = config.Closed 106 if r.CanCommitFreely { 107 status = config.Open 108 } 109 110 // Similar to RFC3339, but not quite the same. No time zone is specified, 111 // so this will default to UTC, which is correct here. 112 const dateFormat = "2006-01-02 15:04:05.999999" 113 t, err := time.Parse(dateFormat, r.Date) 114 if err != nil { 115 return nil, errors.Annotate(err, "failed to parse date from tree status").Err() 116 } 117 118 return &treeStatus{ 119 username: r.Username, 120 message: r.Message, 121 key: r.Key, 122 status: status, 123 timestamp: t, 124 }, nil 125 } 126 127 func (ts *httpTreeStatusClient) postStatus(ctx context.Context, host, message string, prevKey int64, treeName string, status config.TreeCloserStatus) error { 128 // During the tree status migration, we will update both the old (HTTP) status 129 // and the new (PRPC) status. We always attempt to update both despite whatever 130 // errors may occur. 131 // TODO(mwarton): Remove the HTTP post after the migration. 132 logging.Infof(ctx, "Updating status for %s: %q", host, message) 133 134 q := url.Values{} 135 q.Add("message", message) 136 q.Add("last_status_key", strconv.FormatInt(prevKey, 10)) 137 u := url.URL{ 138 Host: host, 139 Scheme: "https", 140 Path: "/", 141 RawQuery: q.Encode(), 142 } 143 144 httpErr := ts.postFunc(ctx, u.String()) 145 146 generalState := tspb.GeneralState_OPEN 147 if status == config.Closed { 148 generalState = tspb.GeneralState_CLOSED 149 } 150 request := &tspb.CreateStatusRequest{ 151 Parent: fmt.Sprintf("trees/%s/status", treeName), 152 Status: &tspb.Status{ 153 GeneralState: generalState, 154 Message: message, 155 }, 156 } 157 _, prpcErr := ts.client.CreateStatus(ctx, request) 158 159 // If the PRPC worked, we don't really mind what the status of the HTTP request was, as 160 // we expect it to start failing during the migration. 161 if prpcErr == nil { 162 if httpErr != nil { 163 logging.Infof(ctx, "Error updating status by HTTP: %s", httpErr) 164 } 165 return nil 166 } 167 // Log any PRPC errors, but allow HTTP success to override us during the migration. 168 logging.Errorf(ctx, "Error updating status by PRPC: %s", prpcErr) 169 return httpErr 170 171 } 172 173 func getHttp(c context.Context, url string) ([]byte, error) { 174 response, err := makeHttpRequest(c, url, "GET") 175 if err != nil { 176 return nil, err 177 } 178 179 defer response.Body.Close() 180 bytes, err := io.ReadAll(response.Body) 181 if err != nil { 182 return nil, errors.Annotate(err, "failed to read response body from %q", url).Err() 183 } 184 185 return bytes, nil 186 } 187 188 func postHttp(c context.Context, url string) error { 189 response, err := makeHttpRequest(c, url, "POST") 190 if err != nil { 191 return err 192 } 193 194 response.Body.Close() 195 196 // If the operation succeeded, the status app will apply the update, and 197 // then redirect back to the main page. Let's also check for a 200, as this 198 // is a reasonable response and we don't want to depend too heavily on 199 // particular implementation details. 200 if response.StatusCode == http.StatusFound || response.StatusCode == http.StatusOK { 201 return nil 202 } 203 return fmt.Errorf("POST to %q returned unexpected status code %d", url, response.StatusCode) 204 } 205 206 func makeHttpRequest(c context.Context, url, method string) (*http.Response, error) { 207 transport, err := auth.GetRPCTransport(c, auth.AsSelf) 208 if err != nil { 209 return nil, err 210 } 211 212 req, err := http.NewRequest(method, url, nil) 213 if err != nil { 214 return nil, err 215 } 216 req = req.WithContext(c) 217 218 response, err := (&http.Client{Transport: transport}).Do(req) 219 if err != nil { 220 return nil, errors.Annotate(err, "%s request to %q failed", method, url).Err() 221 } 222 223 return response, nil 224 } 225 226 // UpdateTreeStatus is the HTTP handler triggered by cron when it's time to 227 // check tree closers and update tree status if necessary. 228 func UpdateTreeStatus(ctx context.Context) error { 229 ctx, cancel := context.WithTimeout(ctx, time.Minute) 230 defer cancel() 231 232 settings, err := config.FetchSettings(ctx) 233 if err != nil { 234 return errors.Annotate(err, "fetching settings").Err() 235 } 236 client, err := NewHTTPTreeStatusClient(ctx, settings.LuciTreeStatusHost) 237 if err != nil { 238 return errors.Annotate(err, "creating tree status client").Err() 239 } 240 241 return transient.Tag.Apply(updateTrees(ctx, client)) 242 } 243 244 // updateTrees fetches all TreeClosers from datastore, uses this to determine if 245 // any trees should be opened or closed, and makes the necessary updates. 246 func updateTrees(c context.Context, ts treeStatusClient) error { 247 // The goal here is, for every project, to atomically fetch the config 248 // for that project along with all TreeClosers within it. So if the 249 // project config and the set of TreeClosers are updated at the same 250 // time, we should always see either both updates, or neither. Also, we 251 // want to do it without XG transactions. 252 // 253 // First we fetch keys for all the projects. Second, for every project, 254 // we fetch the full config and all TreeClosers in a transaction. Since 255 // these two steps aren't within a transaction, it's possible that 256 // changes have occurred in between. But all cases are dealt with: 257 // 258 // * Updates to project config or TreeClosers aren't a problem since we 259 // only fetch them in the second step anyway. 260 // * Deletions of projects are fine, since if we don't find them in the 261 // second fetch we just ignore that project and carry on. 262 // * New projects are ignored, and picked up the next time we run. 263 q := datastore.NewQuery("Project").KeysOnly(true) 264 var projects []*config.Project 265 if err := datastore.GetAll(c, q, &projects); err != nil { 266 return errors.Annotate(err, "failed to get project keys").Err() 267 } 268 269 // Guards access to both treeClosers and closingEnabledProjects. 270 mu := sync.Mutex{} 271 var treeClosers []*config.TreeCloser 272 closingEnabledProjects := stringset.New(0) 273 274 err := parallel.WorkPool(32, func(ch chan<- func() error) { 275 for _, project := range projects { 276 project := project 277 ch <- func() error { 278 return datastore.RunInTransaction(c, func(c context.Context) error { 279 switch err := datastore.Get(c, project); { 280 // The project was deleted since the previous time we fetched it just above. 281 // In this case, just move on, since the project is no more. 282 case err == datastore.ErrNoSuchEntity: 283 logging.Infof(c, "Project %s removed between queries, ignoring it", project.Name) 284 return nil 285 case err != nil: 286 return errors.Annotate(err, "failed to get project").Tag(transient.Tag).Err() 287 } 288 289 q := datastore.NewQuery("TreeCloser").Ancestor(datastore.KeyForObj(c, project)) 290 var treeClosersForProject []*config.TreeCloser 291 if err := datastore.GetAll(c, q, &treeClosersForProject); err != nil { 292 return errors.Annotate(err, "failed to get tree closers").Tag(transient.Tag).Err() 293 } 294 295 mu.Lock() 296 defer mu.Unlock() 297 logging.Debugf(c, "Appending tree closers for project: %v", project) 298 treeClosers = append(treeClosers, treeClosersForProject...) 299 if project.TreeClosingEnabled { 300 closingEnabledProjects.Add(project.Name) 301 } 302 303 return nil 304 }, nil) 305 } 306 } 307 }) 308 if err != nil { 309 return err 310 } 311 312 logging.Debugf(c, "closingEnabledProjects: %v", closingEnabledProjects) 313 return parallel.WorkPool(32, func(ch chan<- func() error) { 314 for host, treeClosers := range groupTreeClosers(treeClosers) { 315 host, treeClosers := host, treeClosers 316 ch <- func() error { 317 c := logging.SetField(c, "tree-status-host", host) 318 return updateHost(c, ts, host, treeClosers, closingEnabledProjects, treeNameOrDefault(treeClosers)) 319 } 320 } 321 }) 322 } 323 324 func groupTreeClosers(treeClosers []*config.TreeCloser) map[string][]*config.TreeCloser { 325 byHost := map[string][]*config.TreeCloser{} 326 for _, tc := range treeClosers { 327 byHost[tc.TreeStatusHost] = append(byHost[tc.TreeStatusHost], tc) 328 } 329 330 return byHost 331 } 332 333 func treeNameOrDefault(treeClosers []*config.TreeCloser) string { 334 for _, closer := range treeClosers { 335 if closer.TreeCloser.TreeName != "" { 336 return closer.TreeCloser.TreeName 337 } 338 } 339 for _, closer := range treeClosers { 340 if closer.TreeStatusHost != "" { 341 return strings.TrimSuffix(strings.TrimSuffix(closer.TreeStatusHost, ".appspot.com"), "-status") 342 } 343 if closer.TreeCloser.TreeStatusHost != "" { 344 return strings.TrimSuffix(strings.TrimSuffix(closer.TreeCloser.TreeStatusHost, ".appspot.com"), "-status") 345 } 346 } 347 panic("Should not have gotten here, invalid project configuration contains neither host nor tree name") 348 } 349 350 func tcProject(tc *config.TreeCloser) string { 351 return tc.BuilderKey.Parent().StringID() 352 } 353 354 func updateHost(c context.Context, ts treeStatusClient, host string, treeClosers []*config.TreeCloser, closingEnabledProjects stringset.Set, treeName string) error { 355 treeStatus, err := ts.getStatus(c, host) 356 if err != nil { 357 return err 358 } 359 360 if treeStatus.status == config.Closed && treeStatus.username != botUsername && treeStatus.username != legacyBotUsername { 361 // Don't do anything if the tree was manually closed. 362 logging.Debugf(c, "Tree is closed and last update was from non-bot user %s; not doing anything", treeStatus.username) 363 return nil 364 } 365 366 logging.Debugf(c, "Scanning treeClosers for any belonging to a project with tree closing enabled: %v", treeClosers) 367 anyEnabled := false 368 for _, tc := range treeClosers { 369 if closingEnabledProjects.Has(tcProject(tc)) { 370 logging.Debugf(c, "Found such a treeCloser: %v", tc) 371 anyEnabled = true 372 break 373 } 374 } 375 logging.Debugf(c, "anyEnabled = %v", anyEnabled) 376 377 anyFailingBuild := false 378 anyNewBuild := false 379 var oldestClosed *config.TreeCloser 380 for _, tc := range treeClosers { 381 // If any TreeClosers are from projects with tree closing enabled, 382 // ignore any TreeClosers *not* from such projects. In general we don't 383 // expect different projects to close the same tree, so we're okay with 384 // not seeing dry run logging for these TreeClosers in this rare case. 385 if anyEnabled && !closingEnabledProjects.Has(tcProject(tc)) { 386 continue 387 } 388 389 // For opening the tree, we need to make sure *all* builders are 390 // passing, not just those that have had new builds. Otherwise we'll 391 // open the tree after any new green build, even if the builder that 392 // caused us to close it is still failing. 393 if tc.Status == config.Closed { 394 logging.Debugf(c, "Found failing builder with message: %s", tc.Message) 395 anyFailingBuild = true 396 } 397 398 // Only pay attention to failing builds from after the last update to 399 // the tree. Otherwise we'll close the tree even after people manually 400 // open it. 401 if tc.Timestamp.Before(treeStatus.timestamp) { 402 continue 403 } 404 405 anyNewBuild = true 406 407 if tc.Status == config.Closed && (oldestClosed == nil || tc.Timestamp.Before(oldestClosed.Timestamp)) { 408 logging.Debugf(c, "Updating oldest failing builder") 409 oldestClosed = tc 410 } 411 } 412 413 var newStatus config.TreeCloserStatus 414 if !anyNewBuild { 415 // Don't do anything if all the builds are older than the last update 416 // to the tree - nothing has changed, so there's no reason to take any 417 // action. 418 logging.Debugf(c, "No builds newer than last tree update (%s); not doing anything", 419 treeStatus.timestamp.Format(time.RFC1123Z)) 420 return nil 421 } 422 if !anyFailingBuild { 423 // We can open the tree, as no builders are failing, including builders 424 // that haven't run since the last update to the tree. 425 logging.Debugf(c, "No failing builders; new status is Open") 426 newStatus = config.Open 427 } else if oldestClosed != nil { 428 // We can close the tree, as at least one builder has failed since the 429 // last update to the tree. 430 logging.Debugf(c, "At least one failing builder; new status is Closed") 431 newStatus = config.Closed 432 } else { 433 // Some builders are failing, but they were already failing before the 434 // last update. Don't do anything, so as not to close the tree after a 435 // sheriff has manually opened it. 436 logging.Debugf(c, "At least one failing builder, but there's a more recent update; not doing anything") 437 return nil 438 } 439 440 if treeStatus.status == newStatus { 441 // Don't do anything if the current status is already correct. 442 logging.Debugf(c, "Current status is already correct; not doing anything") 443 return nil 444 } 445 446 var message string 447 if newStatus == config.Open { 448 message = fmt.Sprintf("Tree is open (Automatic: %s)", randomMessage(c)) 449 } else { 450 message = fmt.Sprintf("Tree is closed (Automatic: %s)", oldestClosed.Message) 451 } 452 453 if anyEnabled { 454 return ts.postStatus(c, host, message, treeStatus.key, treeName, newStatus) 455 } 456 logging.Infof(c, "Would update status for %s to %q", host, message) 457 return nil 458 } 459 460 // Want more messages? CLs welcome! 461 var messages = []string{ 462 "('o')", 463 "(。>﹏<。)", 464 "☃", 465 "☀ Tree is open ☀", 466 "٩◔̯◔۶", 467 "☺", 468 "(´・ω・`)", 469 "(`・ω・´)", 470 "(΄◞ิ౪◟ิ‵ )", 471 "(╹◡╹)", 472 "♩‿♩", 473 "(/・ω・)/", 474 " ʅ(◔౪◔ ) ʃ", 475 "ᕙ(`▿´)ᕗ", 476 "ヽ(^o^)丿", 477 "\\(・ω・)/", 478 "\(^o^)/", 479 "キタ━━━━(゚∀゚)━━━━ッ!!", 480 "ヽ(^。^)ノ", 481 "(゚д゚)", 482 "ヽ(´ω`*人*´ω`)ノ", 483 " ゚+。:.゚ヽ(*´∀`)ノ゚.:。+゚", 484 "(゜ー゜*)ネッ!", 485 " ♪d(´▽`)b♪オールオッケィ♪", 486 "(ノ≧∀≦)ノ・‥…", 487 "☆(ゝω・)vキャピ", 488 "ლ(╹◡╹ლ)", 489 "ƪ(•̃͡ε•̃͡)∫ʃ", 490 "(•_•)", 491 "( ་ ⍸ ་ )", 492 "(☉౪ ⊙)", 493 "˙ ͜ʟ˙", 494 "( ఠൠఠ )", 495 "☆.。.:*・゚☆.。.:*・゚☆祝☆゚・*:.。.☆゚・*:.。.☆", 496 "༼ꉺɷꉺ༽", 497 "◉_◉", 498 "ϵ( ‘Θ’ )϶", 499 "ヾ(⌐■_■)ノ♪", 500 "(◡‿◡✿)", 501 "★.:゚+。☆ (●´v`○)bォメデトd(○´v`●)☆.:゚+。★", 502 "(☆.☆)", 503 "オメデトー♪c(*゚ー^)ノ*・'゚☆。.:*:・'☆'・:*:.", 504 "☆.。.:*・°☆.。.:*・°☆", 505 "ʕ •ᴥ•ʔ", 506 "☼.☼", 507 "⊂(・(ェ)・)⊃", 508 "(ノ≧∇≦)ノ ミ ┸━┸", 509 "¯\\_(ツ)_/¯", 510 "UwU", 511 "Paç fat!", 512 "Sretno", 513 "Hodně štěstí!", 514 "Held og lykke!", 515 "Veel geluk!", 516 "Edu!", 517 "lykkyä tykö", 518 "Viel Glück!", 519 "Καλή τύχη!", 520 "Sok szerencsét kivánok!", 521 "Gangi þér vel!", 522 "Go n-éirí an t-ádh leat!", 523 "Buona fortuna!", 524 "Laimīgs gadījums!", 525 "Sėkmės!", 526 "Vill Gléck!", 527 "Со среќа!", 528 "Powodzenia!", 529 "Boa sorte!", 530 "Noroc!", 531 "Срећно", 532 "Veľa šťastia!", 533 "Lycka till!", 534 "Bona sort!", 535 "Zorte on!", 536 "Góða eydnu", 537 "¡Boa fortuna!", 538 "Bona fortuna!", 539 "Xewqat sbieħ", 540 "Aigh vie!", 541 "Pob lwc!", 542 " موفق باشيد", 543 "İyi şanslar!", 544 "Bonŝancon!", 545 "祝你好运!", 546 "祝你好運!", 547 "頑張って!", 548 "សំណាងល្អ ", 549 "행운을 빌어요", 550 "शुभ कामना ", 551 "โชคดี!", 552 "Chúc may mắn!", 553 "بالتوفيق!", 554 "Sterkte!", 555 "Ke o lakaletsa mohlohonolo", 556 "Uve nemhanza yakanaka", 557 "Kila la kheri!", 558 "Amathamsanqa", 559 "Ngikufisela iwela!", 560 "Bonne chance!", 561 "¡Buena suerte!", 562 "Good luck!", 563 "Semoga Beruntung!", 564 "Selamat Maju Jaya!", 565 "Ia manuia", 566 "Suwertehin ka sana", 567 "Հաջողությո'ւն", 568 "Іске сәт", 569 "Амжилт хүсье", 570 "удачі!", 571 "Da legst di nieda!", 572 "Gell, da schaugst?", 573 "Ois Guade", 574 "शुभ कामना!", 575 "நல் வாழ்த்துக்கள் ", 576 "అంతా శుభం కలగాలి! ", 577 ":')", 578 ":'D", 579 "`,;)", 580 "Tree is open (^O^)", 581 "Thượng lộ bình an", 582 "Tree is open now (ง '̀͜ '́ )ง", 583 "ヽ(^o^)ノ", 584 "Ahoy all is good!", 585 "All's right with the world!", 586 "Aloha", 587 } 588 589 func randomMessage(c context.Context) string { 590 message := messages[mathrand.Intn(c, len(messages))] 591 if message[len(message)-1] == ')' { 592 return message + " " 593 } 594 return message 595 }