golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/fetchlogs/fetchlogs.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Fetchlogs downloads build failure logs from the Go dashboard so 6 // they can be accessed and searched from the local file system. 7 // 8 // It organizes these logs into two directories created in the 9 // directory specified by the -dir flag (which typically defaults to 10 // ~/.cache/fetchlogs). The log/ directory contains all log files 11 // named the same way they are named by the dashboard (which happens 12 // to be the SHA-1 of their contents). The rev/ directory contains 13 // symlinks back to these logs named 14 // 15 // rev/<ISO 8601 commit date>-<git revision>/<builder> 16 // 17 // Fetchlogs will reuse existing log files and revision symlinks, so 18 // it only has to download logs that are new since the last time it 19 // was run. 20 // 21 // This makes failures easily searchable with standard tools. For 22 // example, to list the revisions and builders with a particular 23 // failure, use: 24 // 25 // grep -lR <regexp> rev | sort 26 package main 27 28 import ( 29 "bytes" 30 "context" 31 "encoding/json" 32 "flag" 33 "fmt" 34 "io" 35 "log" 36 "net/http" 37 "net/url" 38 "os" 39 "path/filepath" 40 "sort" 41 "strings" 42 "sync" 43 "time" 44 45 "golang.org/x/build/maintner" 46 "golang.org/x/build/maintner/godata" 47 "golang.org/x/build/repos" 48 "golang.org/x/build/types" 49 ) 50 51 var defaultDir = filepath.Join(xdgCacheDir(), "fetchlogs") 52 53 var ( 54 flagN = flag.Int("n", 300, "limit to most recent `N` commits per repo") 55 flagPar = flag.Int("j", 5, "number of concurrent download `jobs`") 56 flagDir = flag.String("dir", defaultDir, "`directory` to save logs to") 57 flagRepo = flag.String("repo", "go", `comma-separated list of repos to fetch logs for, or "all" for all known repos`) 58 flagBranch = flag.String("branch", "", `comma-separated list of Go repo branches to fetch logs for; default branch if empty`) 59 flagDashboard = flag.String("dashboard", "https://build.golang.org", `the dashboard root url`) 60 ) 61 62 func main() { 63 log.SetPrefix("fetchlogs: ") 64 log.SetFlags(0) 65 66 flag.Parse() 67 if flag.NArg() != 0 { 68 flag.Usage() 69 os.Exit(2) 70 } 71 72 // If the top-level directory is the default XDG cache 73 // directory, make sure it exists. 74 if *flagDir == defaultDir { 75 if err := xdgCreateDir(*flagDir); err != nil { 76 log.Fatal(err) 77 } 78 } 79 80 // Create directory structure. 81 if err := os.Chdir(*flagDir); err != nil { 82 log.Fatal(err) 83 } 84 ensureDir("log") 85 ensureDir("rev") 86 87 // Set up fetchers. 88 fetcher := newFetcher(*flagPar) 89 wg := sync.WaitGroup{} 90 91 // Fetch dashboard pages. 92 for _, repo := range parseRepoFlag() { 93 for _, branch := range strings.Split(*flagBranch, ",") { 94 project := repo.GoGerritProject 95 haveCommits := 0 96 for page := 0; haveCommits < *flagN; page++ { 97 dashURL := fmt.Sprintf("%s/?mode=json&page=%d", *flagDashboard, page) 98 if project != "go" { 99 dashURL += "&repo=" + url.QueryEscape(repo.ImportPath) 100 } 101 if branch != "" { 102 dashURL += "&branch=" + url.QueryEscape(branch) 103 } 104 index, err := fetcher.get(dashURL) 105 if err != nil { 106 log.Fatal(err) 107 } 108 109 var status types.BuildStatus 110 if err = json.NewDecoder(index).Decode(&status); err != nil { 111 log.Fatal("error unmarshalling result: ", err) 112 } 113 index.Close() 114 115 if len(status.Revisions) == 0 { 116 // We asked for a page of revisions and received a valid reply with none. 117 // Assume that there are no more beyond this. 118 break 119 } 120 121 for _, rev := range status.Revisions { 122 if haveCommits >= *flagN { 123 break 124 } 125 if rev.Repo != project { 126 // The results for the "go" repo (fetched without the "&repo" query 127 // parameter) empirically include some subrepo results for release 128 // branches. 129 // 130 // Those aren't really relevant to the "go" repo — and they should be 131 // included when we fetch the subrepo explicitly anyway — so filter 132 // them out here. 133 continue 134 } 135 haveCommits++ 136 137 // Create a revision directory. This way we 138 // have a record of commits with no failures. 139 date, err := parseRevDate(rev.Date) 140 if err != nil { 141 log.Fatal("malformed revision date: ", err) 142 } 143 var goDate time.Time 144 if rev.GoRevision != "" { 145 commit, err := goProject(useCached).GitCommit(rev.GoRevision) 146 if err != nil { 147 // A rare race is possible here: if a commit is added to the Go repo 148 // after the initial maintner load, and a dashboard test run completes 149 // for that commit before we're done fetching logs, the maintner data 150 // might not include that commit. To rule out that possibility, refresh 151 // the local maintner data before bailing out. 152 commit, err = goProject(forceRefresh).GitCommit(rev.GoRevision) 153 if err != nil { 154 log.Fatal("invalid GoRevision: ", err) 155 } 156 } 157 goDate = commit.CommitTime 158 } 159 revDir, revDirDepth := revToDir(rev.Revision, date, rev.GoRevision, goDate) 160 ensureDir(revDir) 161 162 if rev.GoRevision != "" { 163 // In October 2021 we started creating a separate subdirectory for 164 // each Go repo commit. (Previously, we overwrote the link for each 165 // subrepo commit when downloading a new Go commit.) Remove the 166 // previous links, if any, so that greplogs won't double-count them. 167 prevRevDir, _ := revToDir(rev.Revision, date, "", time.Time{}) 168 if err := os.RemoveAll(prevRevDir); err != nil { 169 log.Fatal(err) 170 } 171 } 172 173 // Save revision metadata. 174 buf := bytes.Buffer{} 175 enc := json.NewEncoder(&buf) 176 if err = enc.Encode(rev); err != nil { 177 log.Fatal(err) 178 } 179 if err = writeFileAtomic(filepath.Join(revDir, ".rev.json"), &buf); err != nil { 180 log.Fatal("error saving revision metadata: ", err) 181 } 182 183 // Save builders list so Results list can be 184 // interpreted. 185 if err = enc.Encode(status.Builders); err != nil { 186 log.Fatal(err) 187 } 188 if err = writeFileAtomic(filepath.Join(revDir, ".builders.json"), &buf); err != nil { 189 log.Fatal("error saving builders metadata: ", err) 190 } 191 192 // Fetch revision logs. 193 for i, res := range rev.Results { 194 if res == "" || res == "ok" { 195 continue 196 } 197 198 wg.Add(1) 199 go func(builder, logURL string) { 200 defer wg.Done() 201 logPath := filepath.Join("log", filepath.Base(logURL)) 202 err := fetcher.getFile(logURL, logPath) 203 if err != nil { 204 log.Fatal("error fetching log: ", err) 205 } 206 if err := linkLog(revDir, revDirDepth, builder, logPath); err != nil { 207 log.Fatal("error linking log: ", err) 208 } 209 }(status.Builders[i], res) 210 } 211 } 212 } 213 } 214 } 215 216 wg.Wait() 217 } 218 219 func parseRepoFlag() (rs []*repos.Repo) { 220 if *flagRepo == "all" { 221 for p, repo := range repos.ByGerritProject { 222 if p == "go" || repo.ShowOnDashboard() { 223 rs = append(rs, repo) 224 } 225 } 226 } else { 227 for _, p := range strings.Split(*flagRepo, ",") { 228 p = strings.TrimSpace(p) 229 repo := repos.ByGerritProject[p] 230 if repo == nil { 231 log.Fatalf("unknown repo %s", *flagRepo) 232 } 233 rs = append(rs, repo) 234 } 235 } 236 sort.Slice(rs, func(i, j int) bool { 237 pi := rs[i].GoGerritProject 238 pj := rs[j].GoGerritProject 239 240 // Read "go" first because it doesn't require maintner data. 241 if pj == "go" { 242 return false // Nothing is before "go". 243 } else if pi == "go" { 244 return true // "go" is before everything else. 245 } 246 247 return pi < pj 248 }) 249 250 if len(rs) == 0 { 251 log.Fatal("-repo flag does not contain any repos") 252 } 253 if rs[0].GoGerritProject == "go" && len(rs) > 1 { 254 go func() { 255 // Prefetch maintner data, since we'll likely need it and can hide 256 // some of the latency behind processing the "go" project 257 // (which does not need it). 258 // 259 // If the first repo is not "go", then we'll either need the maintner data 260 // right away (in which case we can't hide any substantial latency) or not 261 // at all (in which case we shouldn't bother churning memory and disk 262 // pages to load it). 263 _ = goProject(useCached) 264 }() 265 } 266 267 return rs 268 } 269 270 // A fetcher downloads files over HTTP concurrently. It allows 271 // limiting the number of concurrent downloads and correctly handles 272 // multiple (possibly concurrent) fetches from the same URL to the 273 // same file. 274 type fetcher struct { 275 tokens chan struct{} 276 277 pending struct { 278 sync.Mutex 279 m map[string]*pendingFetch 280 } 281 } 282 283 type pendingFetch struct { 284 wchan chan struct{} // closed when fetch completes 285 286 // err is the error, if any, that occurred during this fetch. 287 // It will be set before wchan is closed. 288 err error 289 } 290 291 func newFetcher(jobs int) *fetcher { 292 f := new(fetcher) 293 294 f.tokens = make(chan struct{}, *flagPar) 295 for i := 0; i < jobs; i++ { 296 f.tokens <- struct{}{} 297 } 298 299 f.pending.m = make(map[string]*pendingFetch) 300 301 return f 302 } 303 304 // get performs an HTTP GET for URL and returns the body, while 305 // obeying the job limit on fetcher. 306 func (f *fetcher) get(url string) (io.ReadCloser, error) { 307 <-f.tokens 308 fmt.Println("fetching", url) 309 resp, err := http.Get(url) 310 f.tokens <- struct{}{} 311 if err != nil { 312 return nil, err 313 } 314 if resp.StatusCode != 200 { 315 return nil, fmt.Errorf("GET %s: %v %s", url, resp.StatusCode, http.StatusText(resp.StatusCode)) 316 } 317 318 return resp.Body, nil 319 } 320 321 // getFile performs an HTTP GET for URL and writes it to filename. If 322 // the destination file already exists, this returns immediately. If 323 // another goroutine is currently fetching filename, this blocks until 324 // the fetch is done and then returns. 325 func (f *fetcher) getFile(url string, filename string) error { 326 // Do we already have it? 327 if _, err := os.Stat(filename); err == nil { 328 return nil 329 } else if !os.IsNotExist(err) { 330 return err 331 } 332 333 // Check if another fetcher is working on it. 334 f.pending.Lock() 335 if p, ok := f.pending.m[filename]; ok { 336 f.pending.Unlock() 337 <-p.wchan 338 return p.err 339 } 340 341 p := &pendingFetch{wchan: make(chan struct{})} 342 f.pending.m[filename] = p 343 f.pending.Unlock() 344 345 r, err := f.get(url) 346 if err == nil { 347 err = writeFileAtomic(filename, r) 348 r.Close() 349 } 350 p.err = err 351 352 close(p.wchan) 353 return p.err 354 } 355 356 var ( 357 goProjectMu sync.Mutex 358 cachedGoProject *maintner.GerritProject 359 goProjectErr error 360 ) 361 362 func getGoProject(ctx context.Context) (*maintner.GerritProject, error) { 363 corpus, err := godata.Get(ctx) 364 if err != nil { 365 return nil, err 366 } 367 368 gp := corpus.Gerrit().Project("go.googlesource.com", "go") 369 if gp == nil { 370 return nil, fmt.Errorf("go.googlesource.com/go Gerrit project not found") 371 } 372 373 return gp, nil 374 } 375 376 func goProject(policy refreshPolicy) *maintner.GerritProject { 377 goProjectMu.Lock() 378 defer goProjectMu.Unlock() 379 if policy == forceRefresh || (cachedGoProject == nil && goProjectErr == nil) { 380 cachedGoProject, goProjectErr = getGoProject(context.Background()) 381 } 382 383 if goProjectErr != nil { 384 log.Fatal(goProjectErr) 385 } 386 return cachedGoProject 387 } 388 389 type refreshPolicy int8 390 391 const ( 392 useCached refreshPolicy = iota 393 forceRefresh 394 ) 395 396 // ensureDir creates directory name if it does not exist. 397 func ensureDir(name string) { 398 err := os.MkdirAll(name, 0777) 399 if err != nil { 400 log.Fatal("error creating directory ", name, ": ", err) 401 } 402 } 403 404 // writeFileAtomic atomically creates a file called filename and 405 // copies the data from r to the file. 406 func writeFileAtomic(filename string, r io.Reader) error { 407 tmpPath := filename + ".tmp" 408 if f, err := os.Create(tmpPath); err != nil { 409 return err 410 } else { 411 _, err := io.Copy(f, r) 412 if err == nil { 413 err = f.Sync() 414 } 415 err2 := f.Close() 416 if err == nil { 417 err = err2 418 } 419 if err != nil { 420 os.Remove(tmpPath) 421 return err 422 } 423 } 424 if err := os.Rename(tmpPath, filename); err != nil { 425 os.Remove(tmpPath) 426 return err 427 } 428 return nil 429 } 430 431 // linkLog creates a symlink for finding logPath based on its git 432 // revision and builder. 433 func linkLog(revDir string, revDirDepth int, builder, logPath string) error { 434 // Create symlink. 435 err := os.Symlink(strings.Repeat("../", revDirDepth)+logPath, filepath.Join(revDir, builder)) 436 if err != nil && !os.IsExist(err) { 437 return err 438 } 439 440 return nil 441 } 442 443 // parseRevDate parses a revision date in RFC3339. 444 func parseRevDate(date string) (time.Time, error) { 445 return time.Parse(time.RFC3339, date) 446 } 447 448 // revToDir returns the path of the revision directory for revision. 449 func revToDir(revision string, date time.Time, goRev string, goDate time.Time) (dir string, depth int) { 450 if goDate.After(date) { 451 date = goDate 452 } 453 dateStr := date.Format("2006-01-02T15:04:05") 454 455 parts := []string{dateStr, revision[:7]} 456 if goRev != "" { 457 parts = append(parts, goRev[:7]) 458 } 459 460 return filepath.Join("rev", strings.Join(parts, "-")), 2 461 }