golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/maintnerd/maintnerd.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // The maintnerd command serves project maintainer data from Git, 6 // Github, and/or Gerrit. 7 package main 8 9 import ( 10 "context" 11 "flag" 12 "fmt" 13 "io" 14 "log" 15 "net/http" 16 "os" 17 "path/filepath" 18 "runtime" 19 "sort" 20 "strings" 21 "time" 22 23 "cloud.google.com/go/compute/metadata" 24 "golang.org/x/build/internal/gitauth" 25 "golang.org/x/build/internal/https" 26 "golang.org/x/build/internal/secret" 27 "golang.org/x/build/maintner" 28 "golang.org/x/build/maintner/godata" 29 "golang.org/x/build/maintner/maintnerd/apipb" 30 "golang.org/x/build/maintner/maintnerd/gcslog" 31 "golang.org/x/build/maintner/maintnerd/maintapi" 32 "golang.org/x/build/repos" 33 "golang.org/x/crypto/acme/autocert" 34 "golang.org/x/time/rate" 35 "google.golang.org/grpc" 36 ) 37 38 var ( 39 syncQuit = flag.Bool("sync-and-quit", false, "sync once and quit; don't run a server") 40 initQuit = flag.Bool("init-and-quit", false, "load the mutation log and quit; don't run a server") 41 verbose = flag.Bool("verbose", false, "enable verbose debug output") 42 genMut = flag.Bool("generate-mutations", true, "whether this instance should read from upstream git/gerrit/github and generate new mutations to the end of the log. This requires network access and only one instance can be generating mutation") 43 watchGithub = flag.String("watch-github", "", "Comma-separated list of owner/repo pairs to slurp") 44 watchGerrit = flag.String("watch-gerrit", "", `Comma-separated list of Gerrit projects to watch, each of form "hostname/project" (e.g. "go.googlesource.com/go")`) 45 pubsub = flag.String("pubsub", "", "If non-empty, the golang.org/x/build/cmd/pubsubhelper URL scheme and hostname, without path") 46 config = flag.String("config", "", "If non-empty, the name of a pre-defined config. Valid options are 'go' to be the primary Go server; 'godata' to run the server locally using the godata package, and 'devgo' to act like 'go', but mirror from godata at start-up.") 47 dataDir = flag.String("data-dir", "", "Local directory to write protobuf files to (default $HOME/var/maintnerd)") 48 debug = flag.Bool("debug", false, "Print debug logging information") 49 githubRateLimit = flag.Int("github-rate", 10, "Rate to limit GitHub requests (in queries per second, 0 is treated as unlimited)") 50 51 bucket = flag.String("bucket", "", "if non-empty, Google Cloud Storage bucket to use for log storage. If the bucket name contains a \"/\", the part after the slash will be a prefix for the segments.") 52 migrateGCSFlag = flag.Bool("migrate-disk-to-gcs", false, "[dev] If true, migrate from disk-based logs to GCS logs on start-up, then quit.") 53 ) 54 55 func init() { 56 flag.Usage = func() { 57 os.Stderr.WriteString(`Maintner mirrors, searches, syncs, and serves data from Gerrit, Github, and Git repos. 58 59 Maintner gathers data about projects that you want to watch and holds it all in 60 memory. This way it's easy and fast to search, and you don't have to worry about 61 retrieving that data from remote APIs. 62 63 Maintner is short for "maintainer." 64 65 `) 66 flag.PrintDefaults() 67 } 68 } 69 70 var autocertManager *autocert.Manager 71 72 func main() { 73 https.RegisterFlags(flag.CommandLine) 74 flag.Parse() 75 ctx := context.Background() 76 77 if *dataDir == "" { 78 *dataDir = filepath.Join(os.Getenv("HOME"), "var", "maintnerd") 79 if *bucket == "" { 80 if err := os.MkdirAll(*dataDir, 0755); err != nil { 81 log.Fatal(err) 82 } 83 log.Printf("Storing data in implicit directory %s", *dataDir) 84 } 85 } 86 if *migrateGCSFlag && *bucket == "" { 87 log.Fatalf("--bucket flag required with --migrate-disk-to-gcs") 88 } 89 90 type storage interface { 91 maintner.MutationSource 92 maintner.MutationLogger 93 } 94 var logger storage 95 96 corpus := new(maintner.Corpus) 97 switch *config { 98 case "": 99 // Nothing 100 case "devgo": 101 dir := godata.Dir() 102 if err := os.MkdirAll(dir, 0700); err != nil { 103 log.Fatal(err) 104 } 105 log.Printf("Syncing from https://maintner.golang.org/logs to %s", dir) 106 mutSrc := maintner.NewNetworkMutationSource("https://maintner.golang.org/logs", dir) 107 for evt := range mutSrc.GetMutations(ctx) { 108 if evt.Err != nil { 109 log.Fatal(evt.Err) 110 } 111 if evt.End { 112 break 113 } 114 } 115 syncProdToDevMutationLogs() 116 log.Printf("Synced from https://maintner.golang.org/logs.") 117 setGoConfig() 118 case "go": 119 if err := gitauth.Init(); err != nil { 120 log.Fatalf("gitauth: %v", err) 121 } 122 setGoConfig() 123 case "godata": 124 setGodataConfig() 125 var err error 126 log.Printf("Using godata corpus...") 127 corpus, err = godata.Get(ctx) 128 if err != nil { 129 log.Fatal(err) 130 } 131 default: 132 log.Fatalf("unknown --config=%s", *config) 133 } 134 if *genMut { 135 if *bucket != "" { 136 ctx := context.Background() 137 gl, err := gcslog.NewGCSLog(ctx, *bucket) 138 if err != nil { 139 log.Fatalf("newGCSLog: %v", err) 140 } 141 gl.SetDebug(*debug) 142 gl.RegisterHandlers(http.DefaultServeMux) 143 if *migrateGCSFlag { 144 diskLog := maintner.NewDiskMutationLogger(*dataDir) 145 if err := gl.CopyFrom(diskLog); err != nil { 146 log.Fatalf("migrate: %v", err) 147 } 148 log.Printf("Success.") 149 return 150 } 151 logger = gl 152 } else { 153 logger = maintner.NewDiskMutationLogger(*dataDir) 154 } 155 corpus.EnableLeaderMode(logger, *dataDir) 156 } 157 if *debug { 158 corpus.SetDebug() 159 } 160 corpus.SetVerbose(*verbose) 161 162 if *watchGithub != "" { 163 if *githubRateLimit > 0 { 164 limit := rate.Every(time.Second / time.Duration(*githubRateLimit)) 165 corpus.SetGitHubLimiter(rate.NewLimiter(limit, *githubRateLimit)) 166 } 167 for _, pair := range strings.Split(*watchGithub, ",") { 168 splits := strings.SplitN(pair, "/", 2) 169 if len(splits) != 2 || splits[1] == "" { 170 log.Fatalf("Invalid github repo: %s. Should be 'owner/repo,owner2/repo2'", pair) 171 } 172 token, err := getGithubToken(ctx) 173 if err != nil { 174 log.Fatalf("getting github token: %v", err) 175 } 176 corpus.TrackGitHub(splits[0], splits[1], token) 177 } 178 } 179 if *watchGerrit != "" { 180 for _, project := range strings.Split(*watchGerrit, ",") { 181 // token may be empty, that's OK. 182 corpus.TrackGerrit(project) 183 } 184 } 185 186 ctx, cancel := context.WithCancel(context.Background()) 187 defer cancel() 188 t0 := time.Now() 189 190 if logger != nil { 191 if err := corpus.Initialize(ctx, logger); err != nil { 192 // TODO: if Initialize only partially syncs the data, we need to delete 193 // whatever files it created, since Github returns events newest first 194 // and we use the issue updated dates to check whether we need to keep 195 // syncing. 196 log.Fatal(err) 197 } 198 initDur := time.Since(t0) 199 200 runtime.GC() 201 var ms runtime.MemStats 202 runtime.ReadMemStats(&ms) 203 log.Printf("Loaded data in %v. Memory: %v MB (%v bytes)", initDur, ms.HeapAlloc>>20, ms.HeapAlloc) 204 } 205 if *initQuit { 206 return 207 } 208 209 if *syncQuit { 210 if err := corpus.Sync(ctx); err != nil { 211 log.Fatalf("corpus.Sync = %v", err) 212 } 213 if err := corpus.Check(); err != nil { 214 log.Fatalf("post-Sync Corpus.Check = %v", err) 215 } 216 return 217 } 218 219 if *pubsub != "" { 220 corpus.StartPubSubHelperSubscribe(*pubsub) 221 } 222 223 grpcServer := grpc.NewServer() 224 apipb.RegisterMaintnerServiceServer(grpcServer, maintapi.NewAPIService(corpus)) 225 http.Handle("/apipb.MaintnerService/", grpcServer) 226 227 http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 228 if strings.HasPrefix(r.Header.Get("Content-Type"), "application/grpc") { 229 grpcServer.ServeHTTP(w, r) 230 return 231 } 232 if r.URL.Path != "/" { 233 http.NotFound(w, r) 234 return 235 } 236 io.WriteString(w, `<html> 237 <body> 238 <p> 239 This is <a href='https://godoc.org/golang.org/x/build/maintner/maintnerd'>maintnerd</a>, 240 the <a href='https://godoc.org/golang.org/x/build/maintner'>maintner</a> server. 241 See the <a href='https://godoc.org/golang.org/x/build/maintner/godata'>godata package</a> for 242 a client. 243 </p> 244 <ul> 245 <li><a href='/logs'>/logs</a> 246 </ul> 247 </body></html> 248 `) 249 }) 250 251 if *genMut { 252 go func() { log.Fatalf("Corpus.SyncLoop = %v", corpus.SyncLoop(ctx)) }() 253 } 254 log.Fatalln(https.ListenAndServe(ctx, http.DefaultServeMux)) 255 } 256 257 func setGoConfig() { 258 if *watchGithub != "" { 259 log.Fatalf("can't set both --config and --watch-github") 260 } 261 if *watchGerrit != "" { 262 log.Fatalf("can't set both --config and --watch-gerrit") 263 } 264 *pubsub = "https://pubsubhelper.golang.org" 265 *watchGithub = strings.Join(goGitHubProjects(), ",") 266 *watchGerrit = strings.Join(goGerritProjects(), ",") 267 } 268 269 // goGitHubProjects returns the GitHub repos to track in --config=go. 270 // The strings are of form "<org-or-user>/<repo>". 271 func goGitHubProjects() []string { 272 var ret []string 273 for _, r := range repos.ByGerritProject { 274 if gr := r.GitHubRepo; gr != "" { 275 ret = append(ret, gr) 276 } 277 } 278 sort.Strings(ret) 279 return ret 280 } 281 282 // goGerritProjects returns the Gerrit projects to track in --config=go. 283 // The strings are of the form "<hostname>/<proj>". 284 func goGerritProjects() []string { 285 var ret []string 286 // TODO: add these to the repos package at some point? Or 287 // maybe just stop maintaining them in maintner if nothing's 288 // using them? I think the only thing that uses them is the 289 // stats tooling, to see where gophers are working. That's 290 // probably enough reason to keep them in. So just keep hard-coding 291 // them here for now. 292 ret = append(ret, 293 "code.googlesource.com/gocloud", 294 "code.googlesource.com/google-api-go-client", 295 ) 296 for p := range repos.ByGerritProject { 297 ret = append(ret, "go.googlesource.com/"+p) 298 } 299 sort.Strings(ret) 300 return ret 301 } 302 303 func setGodataConfig() { 304 if *watchGithub != "" { 305 log.Fatalf("can't set both --config and --watch-github") 306 } 307 if *watchGerrit != "" { 308 log.Fatalf("can't set both --config and --watch-gerrit") 309 } 310 *genMut = false 311 } 312 313 func getGithubToken(ctx context.Context) (string, error) { 314 if metadata.OnGCE() { 315 sc := secret.MustNewClient() 316 317 ctxSc, cancel := context.WithTimeout(ctx, 10*time.Second) 318 defer cancel() 319 320 token, err := sc.Retrieve(ctxSc, secret.NameMaintnerGitHubToken) 321 if err == nil { 322 return token, nil 323 } 324 log.Printf("unable to retrieve secret manager %q: %v", secret.NameMaintnerGitHubToken, err) 325 log.Printf("falling back to github token from file.") 326 } 327 328 tokenFile := filepath.Join(os.Getenv("HOME"), ".github-issue-token") 329 slurp, err := os.ReadFile(tokenFile) 330 if err != nil { 331 return "", err 332 } 333 f := strings.SplitN(strings.TrimSpace(string(slurp)), ":", 2) 334 if len(f) != 2 || f[0] == "" || f[1] == "" { 335 return "", fmt.Errorf("Expected token file %s to be of form <username>:<token>", tokenFile) 336 } 337 token := f[1] 338 return token, nil 339 } 340 341 func syncProdToDevMutationLogs() { 342 src := godata.Dir() 343 dst := *dataDir 344 345 want := map[string]int64{} // basename => size 346 347 srcDEs, err := os.ReadDir(src) 348 if err != nil { 349 log.Fatal(err) 350 } 351 dstDEs, err := os.ReadDir(dst) 352 if err != nil { 353 log.Fatal(err) 354 } 355 356 for _, de := range srcDEs { 357 name := de.Name() 358 if !strings.HasSuffix(name, ".mutlog") { 359 continue 360 } 361 fi, err := de.Info() 362 if err != nil { 363 log.Fatal(err) 364 } 365 // The DiskMutationLogger (as we'l use in the dst dir) 366 // prepends "maintner-". So prepend that here ahead 367 // of time, even though the network mutation source's 368 // cache doesn't. 369 want["maintner-"+name] = fi.Size() 370 } 371 372 for _, de := range dstDEs { 373 name := de.Name() 374 if !strings.HasSuffix(name, ".mutlog") { 375 continue 376 } 377 fi, err := de.Info() 378 if err != nil { 379 log.Fatal(err) 380 } 381 if want[name] == fi.Size() { 382 delete(want, name) 383 continue 384 } 385 log.Printf("dst file %q unwanted", name) 386 if err := os.Remove(filepath.Join(dst, name)); err != nil { 387 log.Fatal(err) 388 } 389 } 390 391 for name := range want { 392 log.Printf("syncing %s from %s to %s", name, src, dst) 393 slurp, err := os.ReadFile(filepath.Join(src, strings.TrimPrefix(name, "maintner-"))) 394 if err != nil { 395 log.Fatal(err) 396 } 397 if err := os.WriteFile(filepath.Join(dst, name), slurp, 0644); err != nil { 398 log.Fatal(err) 399 } 400 } 401 }