golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/fetchlogs/fetchlogs.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Fetchlogs downloads build failure logs from the Go dashboard so
     6  // they can be accessed and searched from the local file system.
     7  //
     8  // It organizes these logs into two directories created in the
     9  // directory specified by the -dir flag (which typically defaults to
    10  // ~/.cache/fetchlogs). The log/ directory contains all log files
    11  // named the same way they are named by the dashboard (which happens
    12  // to be the SHA-1 of their contents). The rev/ directory contains
    13  // symlinks back to these logs named
    14  //
    15  //	rev/<ISO 8601 commit date>-<git revision>/<builder>
    16  //
    17  // Fetchlogs will reuse existing log files and revision symlinks, so
    18  // it only has to download logs that are new since the last time it
    19  // was run.
    20  //
    21  // This makes failures easily searchable with standard tools. For
    22  // example, to list the revisions and builders with a particular
    23  // failure, use:
    24  //
    25  //	grep -lR <regexp> rev | sort
    26  package main
    27  
    28  import (
    29  	"bytes"
    30  	"context"
    31  	"encoding/json"
    32  	"flag"
    33  	"fmt"
    34  	"io"
    35  	"log"
    36  	"net/http"
    37  	"net/url"
    38  	"os"
    39  	"path/filepath"
    40  	"sort"
    41  	"strings"
    42  	"sync"
    43  	"time"
    44  
    45  	"golang.org/x/build/maintner"
    46  	"golang.org/x/build/maintner/godata"
    47  	"golang.org/x/build/repos"
    48  	"golang.org/x/build/types"
    49  )
    50  
    51  var defaultDir = filepath.Join(xdgCacheDir(), "fetchlogs")
    52  
    53  var (
    54  	flagN         = flag.Int("n", 300, "limit to most recent `N` commits per repo")
    55  	flagPar       = flag.Int("j", 5, "number of concurrent download `jobs`")
    56  	flagDir       = flag.String("dir", defaultDir, "`directory` to save logs to")
    57  	flagRepo      = flag.String("repo", "go", `comma-separated list of repos to fetch logs for, or "all" for all known repos`)
    58  	flagBranch    = flag.String("branch", "", `comma-separated list of Go repo branches to fetch logs for; default branch if empty`)
    59  	flagDashboard = flag.String("dashboard", "https://build.golang.org", `the dashboard root url`)
    60  )
    61  
    62  func main() {
    63  	log.SetPrefix("fetchlogs: ")
    64  	log.SetFlags(0)
    65  
    66  	flag.Parse()
    67  	if flag.NArg() != 0 {
    68  		flag.Usage()
    69  		os.Exit(2)
    70  	}
    71  
    72  	// If the top-level directory is the default XDG cache
    73  	// directory, make sure it exists.
    74  	if *flagDir == defaultDir {
    75  		if err := xdgCreateDir(*flagDir); err != nil {
    76  			log.Fatal(err)
    77  		}
    78  	}
    79  
    80  	// Create directory structure.
    81  	if err := os.Chdir(*flagDir); err != nil {
    82  		log.Fatal(err)
    83  	}
    84  	ensureDir("log")
    85  	ensureDir("rev")
    86  
    87  	// Set up fetchers.
    88  	fetcher := newFetcher(*flagPar)
    89  	wg := sync.WaitGroup{}
    90  
    91  	// Fetch dashboard pages.
    92  	for _, repo := range parseRepoFlag() {
    93  		for _, branch := range strings.Split(*flagBranch, ",") {
    94  			project := repo.GoGerritProject
    95  			haveCommits := 0
    96  			for page := 0; haveCommits < *flagN; page++ {
    97  				dashURL := fmt.Sprintf("%s/?mode=json&page=%d", *flagDashboard, page)
    98  				if project != "go" {
    99  					dashURL += "&repo=" + url.QueryEscape(repo.ImportPath)
   100  				}
   101  				if branch != "" {
   102  					dashURL += "&branch=" + url.QueryEscape(branch)
   103  				}
   104  				index, err := fetcher.get(dashURL)
   105  				if err != nil {
   106  					log.Fatal(err)
   107  				}
   108  
   109  				var status types.BuildStatus
   110  				if err = json.NewDecoder(index).Decode(&status); err != nil {
   111  					log.Fatal("error unmarshalling result: ", err)
   112  				}
   113  				index.Close()
   114  
   115  				if len(status.Revisions) == 0 {
   116  					// We asked for a page of revisions and received a valid reply with none.
   117  					// Assume that there are no more beyond this.
   118  					break
   119  				}
   120  
   121  				for _, rev := range status.Revisions {
   122  					if haveCommits >= *flagN {
   123  						break
   124  					}
   125  					if rev.Repo != project {
   126  						// The results for the "go" repo (fetched without the "&repo" query
   127  						// parameter) empirically include some subrepo results for release
   128  						// branches.
   129  						//
   130  						// Those aren't really relevant to the "go" repo — and they should be
   131  						// included when we fetch the subrepo explicitly anyway — so filter
   132  						// them out here.
   133  						continue
   134  					}
   135  					haveCommits++
   136  
   137  					// Create a revision directory. This way we
   138  					// have a record of commits with no failures.
   139  					date, err := parseRevDate(rev.Date)
   140  					if err != nil {
   141  						log.Fatal("malformed revision date: ", err)
   142  					}
   143  					var goDate time.Time
   144  					if rev.GoRevision != "" {
   145  						commit, err := goProject(useCached).GitCommit(rev.GoRevision)
   146  						if err != nil {
   147  							// A rare race is possible here: if a commit is added to the Go repo
   148  							// after the initial maintner load, and a dashboard test run completes
   149  							// for that commit before we're done fetching logs, the maintner data
   150  							// might not include that commit. To rule out that possibility, refresh
   151  							// the local maintner data before bailing out.
   152  							commit, err = goProject(forceRefresh).GitCommit(rev.GoRevision)
   153  							if err != nil {
   154  								log.Fatal("invalid GoRevision: ", err)
   155  							}
   156  						}
   157  						goDate = commit.CommitTime
   158  					}
   159  					revDir, revDirDepth := revToDir(rev.Revision, date, rev.GoRevision, goDate)
   160  					ensureDir(revDir)
   161  
   162  					if rev.GoRevision != "" {
   163  						// In October 2021 we started creating a separate subdirectory for
   164  						// each Go repo commit. (Previously, we overwrote the link for each
   165  						// subrepo commit when downloading a new Go commit.) Remove the
   166  						// previous links, if any, so that greplogs won't double-count them.
   167  						prevRevDir, _ := revToDir(rev.Revision, date, "", time.Time{})
   168  						if err := os.RemoveAll(prevRevDir); err != nil {
   169  							log.Fatal(err)
   170  						}
   171  					}
   172  
   173  					// Save revision metadata.
   174  					buf := bytes.Buffer{}
   175  					enc := json.NewEncoder(&buf)
   176  					if err = enc.Encode(rev); err != nil {
   177  						log.Fatal(err)
   178  					}
   179  					if err = writeFileAtomic(filepath.Join(revDir, ".rev.json"), &buf); err != nil {
   180  						log.Fatal("error saving revision metadata: ", err)
   181  					}
   182  
   183  					// Save builders list so Results list can be
   184  					// interpreted.
   185  					if err = enc.Encode(status.Builders); err != nil {
   186  						log.Fatal(err)
   187  					}
   188  					if err = writeFileAtomic(filepath.Join(revDir, ".builders.json"), &buf); err != nil {
   189  						log.Fatal("error saving builders metadata: ", err)
   190  					}
   191  
   192  					// Fetch revision logs.
   193  					for i, res := range rev.Results {
   194  						if res == "" || res == "ok" {
   195  							continue
   196  						}
   197  
   198  						wg.Add(1)
   199  						go func(builder, logURL string) {
   200  							defer wg.Done()
   201  							logPath := filepath.Join("log", filepath.Base(logURL))
   202  							err := fetcher.getFile(logURL, logPath)
   203  							if err != nil {
   204  								log.Fatal("error fetching log: ", err)
   205  							}
   206  							if err := linkLog(revDir, revDirDepth, builder, logPath); err != nil {
   207  								log.Fatal("error linking log: ", err)
   208  							}
   209  						}(status.Builders[i], res)
   210  					}
   211  				}
   212  			}
   213  		}
   214  	}
   215  
   216  	wg.Wait()
   217  }
   218  
   219  func parseRepoFlag() (rs []*repos.Repo) {
   220  	if *flagRepo == "all" {
   221  		for p, repo := range repos.ByGerritProject {
   222  			if p == "go" || repo.ShowOnDashboard() {
   223  				rs = append(rs, repo)
   224  			}
   225  		}
   226  	} else {
   227  		for _, p := range strings.Split(*flagRepo, ",") {
   228  			p = strings.TrimSpace(p)
   229  			repo := repos.ByGerritProject[p]
   230  			if repo == nil {
   231  				log.Fatalf("unknown repo %s", *flagRepo)
   232  			}
   233  			rs = append(rs, repo)
   234  		}
   235  	}
   236  	sort.Slice(rs, func(i, j int) bool {
   237  		pi := rs[i].GoGerritProject
   238  		pj := rs[j].GoGerritProject
   239  
   240  		// Read "go" first because it doesn't require maintner data.
   241  		if pj == "go" {
   242  			return false // Nothing is before "go".
   243  		} else if pi == "go" {
   244  			return true // "go" is before everything else.
   245  		}
   246  
   247  		return pi < pj
   248  	})
   249  
   250  	if len(rs) == 0 {
   251  		log.Fatal("-repo flag does not contain any repos")
   252  	}
   253  	if rs[0].GoGerritProject == "go" && len(rs) > 1 {
   254  		go func() {
   255  			// Prefetch maintner data, since we'll likely need it and can hide
   256  			// some of the latency behind processing the "go" project
   257  			// (which does not need it).
   258  			//
   259  			// If the first repo is not "go", then we'll either need the maintner data
   260  			// right away (in which case we can't hide any substantial latency) or not
   261  			// at all (in which case we shouldn't bother churning memory and disk
   262  			// pages to load it).
   263  			_ = goProject(useCached)
   264  		}()
   265  	}
   266  
   267  	return rs
   268  }
   269  
   270  // A fetcher downloads files over HTTP concurrently. It allows
   271  // limiting the number of concurrent downloads and correctly handles
   272  // multiple (possibly concurrent) fetches from the same URL to the
   273  // same file.
   274  type fetcher struct {
   275  	tokens chan struct{}
   276  
   277  	pending struct {
   278  		sync.Mutex
   279  		m map[string]*pendingFetch
   280  	}
   281  }
   282  
   283  type pendingFetch struct {
   284  	wchan chan struct{} // closed when fetch completes
   285  
   286  	// err is the error, if any, that occurred during this fetch.
   287  	// It will be set before wchan is closed.
   288  	err error
   289  }
   290  
   291  func newFetcher(jobs int) *fetcher {
   292  	f := new(fetcher)
   293  
   294  	f.tokens = make(chan struct{}, *flagPar)
   295  	for i := 0; i < jobs; i++ {
   296  		f.tokens <- struct{}{}
   297  	}
   298  
   299  	f.pending.m = make(map[string]*pendingFetch)
   300  
   301  	return f
   302  }
   303  
   304  // get performs an HTTP GET for URL and returns the body, while
   305  // obeying the job limit on fetcher.
   306  func (f *fetcher) get(url string) (io.ReadCloser, error) {
   307  	<-f.tokens
   308  	fmt.Println("fetching", url)
   309  	resp, err := http.Get(url)
   310  	f.tokens <- struct{}{}
   311  	if err != nil {
   312  		return nil, err
   313  	}
   314  	if resp.StatusCode != 200 {
   315  		return nil, fmt.Errorf("GET %s: %v %s", url, resp.StatusCode, http.StatusText(resp.StatusCode))
   316  	}
   317  
   318  	return resp.Body, nil
   319  }
   320  
   321  // getFile performs an HTTP GET for URL and writes it to filename. If
   322  // the destination file already exists, this returns immediately. If
   323  // another goroutine is currently fetching filename, this blocks until
   324  // the fetch is done and then returns.
   325  func (f *fetcher) getFile(url string, filename string) error {
   326  	// Do we already have it?
   327  	if _, err := os.Stat(filename); err == nil {
   328  		return nil
   329  	} else if !os.IsNotExist(err) {
   330  		return err
   331  	}
   332  
   333  	// Check if another fetcher is working on it.
   334  	f.pending.Lock()
   335  	if p, ok := f.pending.m[filename]; ok {
   336  		f.pending.Unlock()
   337  		<-p.wchan
   338  		return p.err
   339  	}
   340  
   341  	p := &pendingFetch{wchan: make(chan struct{})}
   342  	f.pending.m[filename] = p
   343  	f.pending.Unlock()
   344  
   345  	r, err := f.get(url)
   346  	if err == nil {
   347  		err = writeFileAtomic(filename, r)
   348  		r.Close()
   349  	}
   350  	p.err = err
   351  
   352  	close(p.wchan)
   353  	return p.err
   354  }
   355  
   356  var (
   357  	goProjectMu     sync.Mutex
   358  	cachedGoProject *maintner.GerritProject
   359  	goProjectErr    error
   360  )
   361  
   362  func getGoProject(ctx context.Context) (*maintner.GerritProject, error) {
   363  	corpus, err := godata.Get(ctx)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  
   368  	gp := corpus.Gerrit().Project("go.googlesource.com", "go")
   369  	if gp == nil {
   370  		return nil, fmt.Errorf("go.googlesource.com/go Gerrit project not found")
   371  	}
   372  
   373  	return gp, nil
   374  }
   375  
   376  func goProject(policy refreshPolicy) *maintner.GerritProject {
   377  	goProjectMu.Lock()
   378  	defer goProjectMu.Unlock()
   379  	if policy == forceRefresh || (cachedGoProject == nil && goProjectErr == nil) {
   380  		cachedGoProject, goProjectErr = getGoProject(context.Background())
   381  	}
   382  
   383  	if goProjectErr != nil {
   384  		log.Fatal(goProjectErr)
   385  	}
   386  	return cachedGoProject
   387  }
   388  
   389  type refreshPolicy int8
   390  
   391  const (
   392  	useCached refreshPolicy = iota
   393  	forceRefresh
   394  )
   395  
   396  // ensureDir creates directory name if it does not exist.
   397  func ensureDir(name string) {
   398  	err := os.MkdirAll(name, 0777)
   399  	if err != nil {
   400  		log.Fatal("error creating directory ", name, ": ", err)
   401  	}
   402  }
   403  
   404  // writeFileAtomic atomically creates a file called filename and
   405  // copies the data from r to the file.
   406  func writeFileAtomic(filename string, r io.Reader) error {
   407  	tmpPath := filename + ".tmp"
   408  	if f, err := os.Create(tmpPath); err != nil {
   409  		return err
   410  	} else {
   411  		_, err := io.Copy(f, r)
   412  		if err == nil {
   413  			err = f.Sync()
   414  		}
   415  		err2 := f.Close()
   416  		if err == nil {
   417  			err = err2
   418  		}
   419  		if err != nil {
   420  			os.Remove(tmpPath)
   421  			return err
   422  		}
   423  	}
   424  	if err := os.Rename(tmpPath, filename); err != nil {
   425  		os.Remove(tmpPath)
   426  		return err
   427  	}
   428  	return nil
   429  }
   430  
   431  // linkLog creates a symlink for finding logPath based on its git
   432  // revision and builder.
   433  func linkLog(revDir string, revDirDepth int, builder, logPath string) error {
   434  	// Create symlink.
   435  	err := os.Symlink(strings.Repeat("../", revDirDepth)+logPath, filepath.Join(revDir, builder))
   436  	if err != nil && !os.IsExist(err) {
   437  		return err
   438  	}
   439  
   440  	return nil
   441  }
   442  
   443  // parseRevDate parses a revision date in RFC3339.
   444  func parseRevDate(date string) (time.Time, error) {
   445  	return time.Parse(time.RFC3339, date)
   446  }
   447  
   448  // revToDir returns the path of the revision directory for revision.
   449  func revToDir(revision string, date time.Time, goRev string, goDate time.Time) (dir string, depth int) {
   450  	if goDate.After(date) {
   451  		date = goDate
   452  	}
   453  	dateStr := date.Format("2006-01-02T15:04:05")
   454  
   455  	parts := []string{dateStr, revision[:7]}
   456  	if goRev != "" {
   457  		parts = append(parts, goRev[:7])
   458  	}
   459  
   460  	return filepath.Join("rev", strings.Join(parts, "-")), 2
   461  }