github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/tools/syz-lore/query_lkml.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"flag"
    10  	"log"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strings"
    14  	"sync"
    15  	"sync/atomic"
    16  	"time"
    17  
    18  	"github.com/google/syzkaller/dashboard/dashapi"
    19  	"github.com/google/syzkaller/pkg/email/lore"
    20  	"github.com/google/syzkaller/pkg/hash"
    21  	"github.com/google/syzkaller/pkg/osutil"
    22  	"github.com/google/syzkaller/pkg/tool"
    23  	"github.com/google/syzkaller/pkg/vcs"
    24  	"golang.org/x/sync/errgroup"
    25  )
    26  
    27  // The syz-lore tool can parse Lore archives and extract syzbot-related conversations from there.
    28  
    29  var (
    30  	flagEmails    = flag.String("emails", "", "comma-separated list of own emails")
    31  	flagDomains   = flag.String("domains", "", "comma-separated list of own domains")
    32  	flagOutDir    = flag.String("out_dir", "", "a directory to save discussions as JSON files")
    33  	flagDashboard = flag.String("dashboard", "", "dashboard address")
    34  	flagAPIClient = flag.String("client", "", "the name of the API client")
    35  	flagAPIKey    = flag.String("key", "", "api key")
    36  	flagVerbose   = flag.Bool("v", false, "print more debug info")
    37  )
    38  
    39  func main() {
    40  	defer tool.Init()()
    41  	if len(flag.Args()) == 0 {
    42  		tool.Failf("format: syz-lore [flags] dir1 [dir2 ...]")
    43  	}
    44  	emails := strings.Split(*flagEmails, ",")
    45  	domains := strings.Split(*flagDomains, ",")
    46  	threads := processArchives(flag.Args(), emails, domains)
    47  	for i, thread := range threads {
    48  		messages := []dashapi.DiscussionMessage{}
    49  		for _, m := range thread.Messages {
    50  			messages = append(messages, dashapi.DiscussionMessage{
    51  				ID:       m.MessageID,
    52  				External: !m.OwnEmail,
    53  				Time:     m.Date,
    54  				Email:    m.Author,
    55  			})
    56  		}
    57  		discussion := &dashapi.Discussion{
    58  			ID:       thread.MessageID,
    59  			Source:   dashapi.DiscussionLore,
    60  			Type:     thread.Type,
    61  			Subject:  thread.Subject,
    62  			BugIDs:   thread.BugIDs,
    63  			Messages: messages,
    64  		}
    65  		log.Printf("saving %d/%d", i+1, len(threads))
    66  		err := saveDiscussion(discussion)
    67  		if err != nil {
    68  			tool.Fail(err)
    69  		}
    70  	}
    71  }
    72  
    73  var dash *dashapi.Dashboard
    74  
    75  func saveDiscussion(d *dashapi.Discussion) error {
    76  	var err error
    77  	if *flagDashboard != "" && dash == nil {
    78  		dash, err = dashapi.New(*flagAPIClient, *flagDashboard, *flagAPIKey)
    79  		if err != nil {
    80  			tool.Failf("dashapi failed: %v", err)
    81  		}
    82  	}
    83  	if *flagOutDir != "" {
    84  		bytes, err := json.Marshal(d)
    85  		if err != nil {
    86  			return err
    87  		}
    88  		path := filepath.Join(*flagOutDir, hash.String([]byte(d.ID))+".json")
    89  		err = osutil.WriteFile(path, bytes)
    90  		if err != nil {
    91  			return err
    92  		}
    93  	}
    94  	if dash != nil {
    95  		return dash.SaveDiscussion(&dashapi.SaveDiscussionReq{
    96  			Discussion: d,
    97  		})
    98  	}
    99  	return nil
   100  }
   101  
   102  func processArchives(paths, emails, domains []string) []*lore.Thread {
   103  	threads := runtime.NumCPU()
   104  	messages := make(chan lore.EmailReader, threads*2)
   105  	wg := sync.WaitGroup{}
   106  	g, _ := errgroup.WithContext(context.Background())
   107  
   108  	// Generate per-email jobs.
   109  	for _, path := range paths {
   110  		log.Printf("reading %s", path)
   111  		wg.Add(1)
   112  		g.Go(func() error {
   113  			defer wg.Done()
   114  			repo := vcs.NewLKMLRepo(path)
   115  			list, err := lore.ReadArchive(repo, "", time.Time{})
   116  			if err != nil {
   117  				return err
   118  			}
   119  			for _, reader := range list {
   120  				messages <- reader
   121  			}
   122  			return nil
   123  		})
   124  	}
   125  
   126  	// Set up some worker threads.
   127  	var repoEmails []*lore.Email
   128  	var mu sync.Mutex
   129  	var skipped atomic.Int64
   130  	for i := 0; i < threads; i++ {
   131  		g.Go(func() error {
   132  			for rawMsg := range messages {
   133  				msg, err := rawMsg.Parse(emails, domains)
   134  				if err != nil {
   135  					// There are many broken messages in LKML,
   136  					// no sense to print them all each time.
   137  					skipped.Add(1)
   138  					continue
   139  				}
   140  				mu.Lock()
   141  				repoEmails = append(repoEmails, msg)
   142  				mu.Unlock()
   143  			}
   144  			return nil
   145  		})
   146  	}
   147  
   148  	// Once all jobs are generated, close the processing channel.
   149  	wg.Wait()
   150  	close(messages)
   151  	if err := g.Wait(); err != nil {
   152  		tool.Failf("%s", err)
   153  	}
   154  	if cnt := skipped.Load(); cnt > 0 {
   155  		log.Printf("skipped %d messages because of parsing errors", cnt)
   156  	}
   157  
   158  	list := lore.Threads(repoEmails)
   159  	log.Printf("collected %d email threads", len(list))
   160  
   161  	ret := []*lore.Thread{}
   162  	for _, d := range list {
   163  		if d.BugIDs == nil {
   164  			continue
   165  		}
   166  		ret = append(ret, d)
   167  		if *flagVerbose {
   168  			log.Printf("discussion ID=%s BugID=%s Type=%s Subject=%s Messages=%d",
   169  				d.MessageID, d.BugIDs, d.Subject, d.Type, len(d.Messages))
   170  		}
   171  	}
   172  	log.Printf("%d threads are related to syzbot", len(ret))
   173  	return ret
   174  }