golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/maintnerd/maintnerd.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // The maintnerd command serves project maintainer data from Git,
     6  // Github, and/or Gerrit.
     7  package main
     8  
     9  import (
    10  	"context"
    11  	"flag"
    12  	"fmt"
    13  	"io"
    14  	"log"
    15  	"net/http"
    16  	"os"
    17  	"path/filepath"
    18  	"runtime"
    19  	"sort"
    20  	"strings"
    21  	"time"
    22  
    23  	"cloud.google.com/go/compute/metadata"
    24  	"golang.org/x/build/internal/gitauth"
    25  	"golang.org/x/build/internal/https"
    26  	"golang.org/x/build/internal/secret"
    27  	"golang.org/x/build/maintner"
    28  	"golang.org/x/build/maintner/godata"
    29  	"golang.org/x/build/maintner/maintnerd/apipb"
    30  	"golang.org/x/build/maintner/maintnerd/gcslog"
    31  	"golang.org/x/build/maintner/maintnerd/maintapi"
    32  	"golang.org/x/build/repos"
    33  	"golang.org/x/crypto/acme/autocert"
    34  	"golang.org/x/time/rate"
    35  	"google.golang.org/grpc"
    36  )
    37  
    38  var (
    39  	syncQuit        = flag.Bool("sync-and-quit", false, "sync once and quit; don't run a server")
    40  	initQuit        = flag.Bool("init-and-quit", false, "load the mutation log and quit; don't run a server")
    41  	verbose         = flag.Bool("verbose", false, "enable verbose debug output")
    42  	genMut          = flag.Bool("generate-mutations", true, "whether this instance should read from upstream git/gerrit/github and generate new mutations to the end of the log. This requires network access and only one instance can be generating mutation")
    43  	watchGithub     = flag.String("watch-github", "", "Comma-separated list of owner/repo pairs to slurp")
    44  	watchGerrit     = flag.String("watch-gerrit", "", `Comma-separated list of Gerrit projects to watch, each of form "hostname/project" (e.g. "go.googlesource.com/go")`)
    45  	pubsub          = flag.String("pubsub", "", "If non-empty, the golang.org/x/build/cmd/pubsubhelper URL scheme and hostname, without path")
    46  	config          = flag.String("config", "", "If non-empty, the name of a pre-defined config. Valid options are 'go' to be the primary Go server; 'godata' to run the server locally using the godata package, and 'devgo' to act like 'go', but mirror from godata at start-up.")
    47  	dataDir         = flag.String("data-dir", "", "Local directory to write protobuf files to (default $HOME/var/maintnerd)")
    48  	debug           = flag.Bool("debug", false, "Print debug logging information")
    49  	githubRateLimit = flag.Int("github-rate", 10, "Rate to limit GitHub requests (in queries per second, 0 is treated as unlimited)")
    50  
    51  	bucket         = flag.String("bucket", "", "if non-empty, Google Cloud Storage bucket to use for log storage. If the bucket name contains a \"/\", the part after the slash will be a prefix for the segments.")
    52  	migrateGCSFlag = flag.Bool("migrate-disk-to-gcs", false, "[dev] If true, migrate from disk-based logs to GCS logs on start-up, then quit.")
    53  )
    54  
    55  func init() {
    56  	flag.Usage = func() {
    57  		os.Stderr.WriteString(`Maintner mirrors, searches, syncs, and serves data from Gerrit, Github, and Git repos.
    58  
    59  Maintner gathers data about projects that you want to watch and holds it all in
    60  memory. This way it's easy and fast to search, and you don't have to worry about
    61  retrieving that data from remote APIs.
    62  
    63  Maintner is short for "maintainer."
    64  
    65  `)
    66  		flag.PrintDefaults()
    67  	}
    68  }
    69  
    70  var autocertManager *autocert.Manager
    71  
    72  func main() {
    73  	https.RegisterFlags(flag.CommandLine)
    74  	flag.Parse()
    75  	ctx := context.Background()
    76  
    77  	if *dataDir == "" {
    78  		*dataDir = filepath.Join(os.Getenv("HOME"), "var", "maintnerd")
    79  		if *bucket == "" {
    80  			if err := os.MkdirAll(*dataDir, 0755); err != nil {
    81  				log.Fatal(err)
    82  			}
    83  			log.Printf("Storing data in implicit directory %s", *dataDir)
    84  		}
    85  	}
    86  	if *migrateGCSFlag && *bucket == "" {
    87  		log.Fatalf("--bucket flag required with --migrate-disk-to-gcs")
    88  	}
    89  
    90  	type storage interface {
    91  		maintner.MutationSource
    92  		maintner.MutationLogger
    93  	}
    94  	var logger storage
    95  
    96  	corpus := new(maintner.Corpus)
    97  	switch *config {
    98  	case "":
    99  		// Nothing
   100  	case "devgo":
   101  		dir := godata.Dir()
   102  		if err := os.MkdirAll(dir, 0700); err != nil {
   103  			log.Fatal(err)
   104  		}
   105  		log.Printf("Syncing from https://maintner.golang.org/logs to %s", dir)
   106  		mutSrc := maintner.NewNetworkMutationSource("https://maintner.golang.org/logs", dir)
   107  		for evt := range mutSrc.GetMutations(ctx) {
   108  			if evt.Err != nil {
   109  				log.Fatal(evt.Err)
   110  			}
   111  			if evt.End {
   112  				break
   113  			}
   114  		}
   115  		syncProdToDevMutationLogs()
   116  		log.Printf("Synced from https://maintner.golang.org/logs.")
   117  		setGoConfig()
   118  	case "go":
   119  		if err := gitauth.Init(); err != nil {
   120  			log.Fatalf("gitauth: %v", err)
   121  		}
   122  		setGoConfig()
   123  	case "godata":
   124  		setGodataConfig()
   125  		var err error
   126  		log.Printf("Using godata corpus...")
   127  		corpus, err = godata.Get(ctx)
   128  		if err != nil {
   129  			log.Fatal(err)
   130  		}
   131  	default:
   132  		log.Fatalf("unknown --config=%s", *config)
   133  	}
   134  	if *genMut {
   135  		if *bucket != "" {
   136  			ctx := context.Background()
   137  			gl, err := gcslog.NewGCSLog(ctx, *bucket)
   138  			if err != nil {
   139  				log.Fatalf("newGCSLog: %v", err)
   140  			}
   141  			gl.SetDebug(*debug)
   142  			gl.RegisterHandlers(http.DefaultServeMux)
   143  			if *migrateGCSFlag {
   144  				diskLog := maintner.NewDiskMutationLogger(*dataDir)
   145  				if err := gl.CopyFrom(diskLog); err != nil {
   146  					log.Fatalf("migrate: %v", err)
   147  				}
   148  				log.Printf("Success.")
   149  				return
   150  			}
   151  			logger = gl
   152  		} else {
   153  			logger = maintner.NewDiskMutationLogger(*dataDir)
   154  		}
   155  		corpus.EnableLeaderMode(logger, *dataDir)
   156  	}
   157  	if *debug {
   158  		corpus.SetDebug()
   159  	}
   160  	corpus.SetVerbose(*verbose)
   161  
   162  	if *watchGithub != "" {
   163  		if *githubRateLimit > 0 {
   164  			limit := rate.Every(time.Second / time.Duration(*githubRateLimit))
   165  			corpus.SetGitHubLimiter(rate.NewLimiter(limit, *githubRateLimit))
   166  		}
   167  		for _, pair := range strings.Split(*watchGithub, ",") {
   168  			splits := strings.SplitN(pair, "/", 2)
   169  			if len(splits) != 2 || splits[1] == "" {
   170  				log.Fatalf("Invalid github repo: %s. Should be 'owner/repo,owner2/repo2'", pair)
   171  			}
   172  			token, err := getGithubToken(ctx)
   173  			if err != nil {
   174  				log.Fatalf("getting github token: %v", err)
   175  			}
   176  			corpus.TrackGitHub(splits[0], splits[1], token)
   177  		}
   178  	}
   179  	if *watchGerrit != "" {
   180  		for _, project := range strings.Split(*watchGerrit, ",") {
   181  			// token may be empty, that's OK.
   182  			corpus.TrackGerrit(project)
   183  		}
   184  	}
   185  
   186  	ctx, cancel := context.WithCancel(context.Background())
   187  	defer cancel()
   188  	t0 := time.Now()
   189  
   190  	if logger != nil {
   191  		if err := corpus.Initialize(ctx, logger); err != nil {
   192  			// TODO: if Initialize only partially syncs the data, we need to delete
   193  			// whatever files it created, since Github returns events newest first
   194  			// and we use the issue updated dates to check whether we need to keep
   195  			// syncing.
   196  			log.Fatal(err)
   197  		}
   198  		initDur := time.Since(t0)
   199  
   200  		runtime.GC()
   201  		var ms runtime.MemStats
   202  		runtime.ReadMemStats(&ms)
   203  		log.Printf("Loaded data in %v. Memory: %v MB (%v bytes)", initDur, ms.HeapAlloc>>20, ms.HeapAlloc)
   204  	}
   205  	if *initQuit {
   206  		return
   207  	}
   208  
   209  	if *syncQuit {
   210  		if err := corpus.Sync(ctx); err != nil {
   211  			log.Fatalf("corpus.Sync = %v", err)
   212  		}
   213  		if err := corpus.Check(); err != nil {
   214  			log.Fatalf("post-Sync Corpus.Check = %v", err)
   215  		}
   216  		return
   217  	}
   218  
   219  	if *pubsub != "" {
   220  		corpus.StartPubSubHelperSubscribe(*pubsub)
   221  	}
   222  
   223  	grpcServer := grpc.NewServer()
   224  	apipb.RegisterMaintnerServiceServer(grpcServer, maintapi.NewAPIService(corpus))
   225  	http.Handle("/apipb.MaintnerService/", grpcServer)
   226  
   227  	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
   228  		if strings.HasPrefix(r.Header.Get("Content-Type"), "application/grpc") {
   229  			grpcServer.ServeHTTP(w, r)
   230  			return
   231  		}
   232  		if r.URL.Path != "/" {
   233  			http.NotFound(w, r)
   234  			return
   235  		}
   236  		io.WriteString(w, `<html>
   237  <body>
   238  <p>
   239    This is <a href='https://godoc.org/golang.org/x/build/maintner/maintnerd'>maintnerd</a>,
   240    the <a href='https://godoc.org/golang.org/x/build/maintner'>maintner</a> server.
   241    See the <a href='https://godoc.org/golang.org/x/build/maintner/godata'>godata package</a> for
   242    a client.
   243  </p>
   244  <ul>
   245     <li><a href='/logs'>/logs</a>
   246  </ul>
   247  </body></html>
   248  `)
   249  	})
   250  
   251  	if *genMut {
   252  		go func() { log.Fatalf("Corpus.SyncLoop = %v", corpus.SyncLoop(ctx)) }()
   253  	}
   254  	log.Fatalln(https.ListenAndServe(ctx, http.DefaultServeMux))
   255  }
   256  
   257  func setGoConfig() {
   258  	if *watchGithub != "" {
   259  		log.Fatalf("can't set both --config and --watch-github")
   260  	}
   261  	if *watchGerrit != "" {
   262  		log.Fatalf("can't set both --config and --watch-gerrit")
   263  	}
   264  	*pubsub = "https://pubsubhelper.golang.org"
   265  	*watchGithub = strings.Join(goGitHubProjects(), ",")
   266  	*watchGerrit = strings.Join(goGerritProjects(), ",")
   267  }
   268  
   269  // goGitHubProjects returns the GitHub repos to track in --config=go.
   270  // The strings are of form "<org-or-user>/<repo>".
   271  func goGitHubProjects() []string {
   272  	var ret []string
   273  	for _, r := range repos.ByGerritProject {
   274  		if gr := r.GitHubRepo; gr != "" {
   275  			ret = append(ret, gr)
   276  		}
   277  	}
   278  	sort.Strings(ret)
   279  	return ret
   280  }
   281  
   282  // goGerritProjects returns the Gerrit projects to track in --config=go.
   283  // The strings are of the form "<hostname>/<proj>".
   284  func goGerritProjects() []string {
   285  	var ret []string
   286  	// TODO: add these to the repos package at some point? Or
   287  	// maybe just stop maintaining them in maintner if nothing's
   288  	// using them? I think the only thing that uses them is the
   289  	// stats tooling, to see where gophers are working. That's
   290  	// probably enough reason to keep them in. So just keep hard-coding
   291  	// them here for now.
   292  	ret = append(ret,
   293  		"code.googlesource.com/gocloud",
   294  		"code.googlesource.com/google-api-go-client",
   295  	)
   296  	for p := range repos.ByGerritProject {
   297  		ret = append(ret, "go.googlesource.com/"+p)
   298  	}
   299  	sort.Strings(ret)
   300  	return ret
   301  }
   302  
   303  func setGodataConfig() {
   304  	if *watchGithub != "" {
   305  		log.Fatalf("can't set both --config and --watch-github")
   306  	}
   307  	if *watchGerrit != "" {
   308  		log.Fatalf("can't set both --config and --watch-gerrit")
   309  	}
   310  	*genMut = false
   311  }
   312  
   313  func getGithubToken(ctx context.Context) (string, error) {
   314  	if metadata.OnGCE() {
   315  		sc := secret.MustNewClient()
   316  
   317  		ctxSc, cancel := context.WithTimeout(ctx, 10*time.Second)
   318  		defer cancel()
   319  
   320  		token, err := sc.Retrieve(ctxSc, secret.NameMaintnerGitHubToken)
   321  		if err == nil {
   322  			return token, nil
   323  		}
   324  		log.Printf("unable to retrieve secret manager %q: %v", secret.NameMaintnerGitHubToken, err)
   325  		log.Printf("falling back to github token from file.")
   326  	}
   327  
   328  	tokenFile := filepath.Join(os.Getenv("HOME"), ".github-issue-token")
   329  	slurp, err := os.ReadFile(tokenFile)
   330  	if err != nil {
   331  		return "", err
   332  	}
   333  	f := strings.SplitN(strings.TrimSpace(string(slurp)), ":", 2)
   334  	if len(f) != 2 || f[0] == "" || f[1] == "" {
   335  		return "", fmt.Errorf("Expected token file %s to be of form <username>:<token>", tokenFile)
   336  	}
   337  	token := f[1]
   338  	return token, nil
   339  }
   340  
   341  func syncProdToDevMutationLogs() {
   342  	src := godata.Dir()
   343  	dst := *dataDir
   344  
   345  	want := map[string]int64{} // basename => size
   346  
   347  	srcDEs, err := os.ReadDir(src)
   348  	if err != nil {
   349  		log.Fatal(err)
   350  	}
   351  	dstDEs, err := os.ReadDir(dst)
   352  	if err != nil {
   353  		log.Fatal(err)
   354  	}
   355  
   356  	for _, de := range srcDEs {
   357  		name := de.Name()
   358  		if !strings.HasSuffix(name, ".mutlog") {
   359  			continue
   360  		}
   361  		fi, err := de.Info()
   362  		if err != nil {
   363  			log.Fatal(err)
   364  		}
   365  		// The DiskMutationLogger (as we'l use in the dst dir)
   366  		// prepends "maintner-".  So prepend that here ahead
   367  		// of time, even though the network mutation source's
   368  		// cache doesn't.
   369  		want["maintner-"+name] = fi.Size()
   370  	}
   371  
   372  	for _, de := range dstDEs {
   373  		name := de.Name()
   374  		if !strings.HasSuffix(name, ".mutlog") {
   375  			continue
   376  		}
   377  		fi, err := de.Info()
   378  		if err != nil {
   379  			log.Fatal(err)
   380  		}
   381  		if want[name] == fi.Size() {
   382  			delete(want, name)
   383  			continue
   384  		}
   385  		log.Printf("dst file %q unwanted", name)
   386  		if err := os.Remove(filepath.Join(dst, name)); err != nil {
   387  			log.Fatal(err)
   388  		}
   389  	}
   390  
   391  	for name := range want {
   392  		log.Printf("syncing %s from %s to %s", name, src, dst)
   393  		slurp, err := os.ReadFile(filepath.Join(src, strings.TrimPrefix(name, "maintner-")))
   394  		if err != nil {
   395  			log.Fatal(err)
   396  		}
   397  		if err := os.WriteFile(filepath.Join(dst, name), slurp, 0644); err != nil {
   398  			log.Fatal(err)
   399  		}
   400  	}
   401  }