github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/tools/syz-lore/query_lkml.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"encoding/json"
    10  	"flag"
    11  	"log"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/google/syzkaller/dashboard/dashapi"
    19  	"github.com/google/syzkaller/pkg/email"
    20  	"github.com/google/syzkaller/pkg/email/lore"
    21  	"github.com/google/syzkaller/pkg/hash"
    22  	"github.com/google/syzkaller/pkg/osutil"
    23  	"github.com/google/syzkaller/pkg/tool"
    24  	"golang.org/x/sync/errgroup"
    25  )
    26  
    27  // The syz-lore tool can parse Lore archives and extract syzbot-related conversations from there.
    28  
    29  var (
    30  	flagArchives  = flag.String("archives", "", "path to the folder with git archives")
    31  	flagEmails    = flag.String("emails", "", "comma-separated list of own emails")
    32  	flagDomains   = flag.String("domains", "", "comma-separated list of own domains")
    33  	flagOutDir    = flag.String("out_dir", "", "a directory to save discussions as JSON files")
    34  	flagDashboard = flag.String("dashboard", "", "dashboard address")
    35  	flagAPIClient = flag.String("client", "", "the name of the API client")
    36  	flagAPIKey    = flag.String("key", "", "api key")
    37  	flagVerbose   = flag.Bool("v", false, "print more debug info")
    38  )
    39  
    40  func main() {
    41  	defer tool.Init()()
    42  	if !osutil.IsDir(*flagArchives) {
    43  		tool.Failf("the arhives parameter must be a valid directory")
    44  	}
    45  	emails := strings.Split(*flagEmails, ",")
    46  	domains := strings.Split(*flagDomains, ",")
    47  	threads := processArchives(*flagArchives, emails, domains)
    48  	for i, thread := range threads {
    49  		messages := []dashapi.DiscussionMessage{}
    50  		for _, m := range thread.Messages {
    51  			messages = append(messages, dashapi.DiscussionMessage{
    52  				ID:       m.MessageID,
    53  				External: !m.OwnEmail,
    54  				Time:     m.Date,
    55  				Email:    m.Author,
    56  			})
    57  		}
    58  		discussion := &dashapi.Discussion{
    59  			ID:       thread.MessageID,
    60  			Source:   dashapi.DiscussionLore,
    61  			Type:     thread.Type,
    62  			Subject:  thread.Subject,
    63  			BugIDs:   thread.BugIDs,
    64  			Messages: messages,
    65  		}
    66  		log.Printf("saving %d/%d", i+1, len(threads))
    67  		err := saveDiscussion(discussion)
    68  		if err != nil {
    69  			tool.Fail(err)
    70  		}
    71  	}
    72  }
    73  
    74  var dash *dashapi.Dashboard
    75  
    76  func saveDiscussion(d *dashapi.Discussion) error {
    77  	var err error
    78  	if *flagDashboard != "" && dash == nil {
    79  		dash, err = dashapi.New(*flagAPIClient, *flagDashboard, *flagAPIKey)
    80  		if err != nil {
    81  			tool.Failf("dashapi failed: %v", err)
    82  		}
    83  	}
    84  	if *flagOutDir != "" {
    85  		bytes, err := json.Marshal(d)
    86  		if err != nil {
    87  			return err
    88  		}
    89  		path := filepath.Join(*flagOutDir, hash.String([]byte(d.ID))+".json")
    90  		err = osutil.WriteFile(path, bytes)
    91  		if err != nil {
    92  			return err
    93  		}
    94  	}
    95  	if dash != nil {
    96  		return dash.SaveDiscussion(&dashapi.SaveDiscussionReq{
    97  			Discussion: d,
    98  		})
    99  	}
   100  	return nil
   101  }
   102  
   103  func processArchives(dir string, emails, domains []string) []*lore.Thread {
   104  	entries, err := os.ReadDir(dir)
   105  	if err != nil {
   106  		tool.Failf("failed to read directory: %v", err)
   107  	}
   108  	threads := runtime.NumCPU()
   109  	messages := make(chan *lore.EmailReader, threads*2)
   110  	wg := sync.WaitGroup{}
   111  	g, _ := errgroup.WithContext(context.Background())
   112  
   113  	// Generate per-email jobs.
   114  	for _, entry := range entries {
   115  		if !entry.IsDir() {
   116  			continue
   117  		}
   118  		path := filepath.Join(dir, entry.Name())
   119  		log.Printf("reading %s", path)
   120  		wg.Add(1)
   121  		g.Go(func() error {
   122  			defer wg.Done()
   123  			return lore.ReadArchive(path, messages)
   124  		})
   125  	}
   126  
   127  	// Set up some worker threads.
   128  	var repoEmails []*email.Email
   129  	var mu sync.Mutex
   130  	for i := 0; i < threads; i++ {
   131  		g.Go(func() error {
   132  			for rawMsg := range messages {
   133  				body, err := rawMsg.Extract()
   134  				if err != nil {
   135  					continue
   136  				}
   137  				msg, err := email.Parse(bytes.NewReader(body), emails, nil, domains)
   138  				if err != nil {
   139  					continue
   140  				}
   141  				// Keep memory consumption low.
   142  				msg.Body = ""
   143  				msg.Patch = ""
   144  
   145  				mu.Lock()
   146  				repoEmails = append(repoEmails, msg)
   147  				mu.Unlock()
   148  			}
   149  			return nil
   150  		})
   151  	}
   152  
   153  	// Once all jobs are generated, close the processing channel.
   154  	wg.Wait()
   155  	close(messages)
   156  	if err := g.Wait(); err != nil {
   157  		tool.Failf("%s", err)
   158  	}
   159  
   160  	list := lore.Threads(repoEmails)
   161  	log.Printf("collected %d email threads", len(list))
   162  
   163  	ret := []*lore.Thread{}
   164  	for _, d := range list {
   165  		if d.BugIDs == nil {
   166  			continue
   167  		}
   168  		ret = append(ret, d)
   169  		if *flagVerbose {
   170  			log.Printf("discussion ID=%s BugID=%s Type=%s Subject=%s Messages=%d",
   171  				d.MessageID, d.BugIDs, d.Subject, d.Type, len(d.Messages))
   172  		}
   173  	}
   174  	log.Printf("%d threads are related to syzbot", len(ret))
   175  	return ret
   176  }