github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-cluster/email-reporter/stream.go (about)

     1  // Copyright 2025 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"log"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/google/syzkaller/pkg/email/lore"
    14  	"github.com/google/syzkaller/pkg/vcs"
    15  	"github.com/google/syzkaller/syz-cluster/pkg/api"
    16  	"github.com/google/syzkaller/syz-cluster/pkg/app"
    17  )
    18  
    19  type LKMLEmailStream struct {
    20  	cfg            *app.EmailConfig
    21  	ownEmails      []string
    22  	reporterName   string
    23  	repoFolder     string
    24  	client         *api.ReporterClient
    25  	newMessages    chan *lore.Email
    26  	lastCommitDate time.Time
    27  	lastCommit     string
    28  }
    29  
    30  func NewLKMLEmailStream(repoFolder string, client *api.ReporterClient,
    31  	cfg *app.EmailConfig, writeTo chan *lore.Email) *LKMLEmailStream {
    32  	var ownEmails []string
    33  	if cfg.Dashapi != nil {
    34  		ownEmails = append(ownEmails, cfg.Dashapi.From)
    35  	}
    36  	if cfg.SMTP != nil {
    37  		ownEmails = append(ownEmails, cfg.SMTP.From)
    38  	}
    39  	return &LKMLEmailStream{
    40  		cfg:          cfg,
    41  		ownEmails:    ownEmails,
    42  		reporterName: api.LKMLReporter,
    43  		repoFolder:   repoFolder,
    44  		client:       client,
    45  		newMessages:  writeTo,
    46  	}
    47  }
    48  
    49  const (
    50  	// Don't consider older replies.
    51  	relevantPeriod = 7 * 24 * time.Hour
    52  )
    53  
    54  func (s *LKMLEmailStream) Loop(ctx context.Context, pollPeriod time.Duration) error {
    55  	defer log.Printf("lore archive polling aborted")
    56  	log.Printf("lore archive %s polling started", s.cfg.LoreArchiveURL)
    57  
    58  	last, err := s.client.LastReply(ctx, s.reporterName)
    59  	if err != nil {
    60  		return fmt.Errorf("failed to query the last reply: %w", err)
    61  	}
    62  	// We assume that the archive mostly consists of relevant emails, so after the restart
    63  	// we just start with the last saved message's date.
    64  	s.lastCommitDate = time.Now().Add(-relevantPeriod)
    65  	if last != nil && last.Time.After(s.lastCommitDate) {
    66  		s.lastCommitDate = last.Time
    67  	}
    68  	for {
    69  		err := s.fetchMessages(ctx)
    70  		if err != nil {
    71  			// Occasional errors are fine.
    72  			log.Printf("failed to poll the lore archive messages: %v", err)
    73  		}
    74  		select {
    75  		case <-ctx.Done():
    76  			return nil
    77  		case <-time.After(pollPeriod):
    78  		}
    79  	}
    80  }
    81  
    82  func (s *LKMLEmailStream) fetchMessages(ctx context.Context) error {
    83  	gitRepo := vcs.NewLKMLRepo(s.repoFolder)
    84  	_, err := gitRepo.Poll(s.cfg.LoreArchiveURL, "master")
    85  	if err != nil {
    86  		return err
    87  	}
    88  	var messages []lore.EmailReader
    89  	if s.lastCommit != "" {
    90  		// If it's not the first iteration, it's better to rely on the last commit hash.
    91  		messages, err = lore.ReadArchive(gitRepo, s.lastCommit, time.Time{})
    92  	} else {
    93  		messages, err = lore.ReadArchive(gitRepo, "", s.lastCommitDate)
    94  	}
    95  	if err != nil {
    96  		return err
    97  	}
    98  	// From oldest to newest.
    99  	for i := len(messages) - 1; i >= 0; i-- {
   100  		msg := messages[i]
   101  		parsed, err := msg.Parse(s.ownEmails, nil)
   102  		if err != nil || parsed == nil {
   103  			log.Printf("failed to parse the email from hash %q: %v", msg.Hash, err)
   104  			continue
   105  		}
   106  		if msg.CommitDate.After(s.lastCommitDate) {
   107  			s.lastCommitDate = msg.CommitDate
   108  		}
   109  		s.lastCommit = msg.Hash
   110  
   111  		// We cannot fully trust the date specified in the message itself, so let's sanitize it
   112  		// using the commit date. It will at least help us prevent weird client.lastReply() responses.
   113  		messageDate := parsed.Date
   114  		if messageDate.After(msg.CommitDate) {
   115  			messageDate = msg.CommitDate
   116  		}
   117  		resp, err := s.client.RecordReply(ctx, &api.RecordReplyReq{
   118  			MessageID: parsed.MessageID,
   119  			ReportID:  s.extractMessageID(parsed),
   120  			InReplyTo: parsed.InReplyTo,
   121  			Reporter:  s.reporterName,
   122  			Time:      messageDate,
   123  		})
   124  		if err != nil || resp == nil {
   125  			// TODO: retry?
   126  			app.Errorf("failed to report email %q: %v", parsed.MessageID, err)
   127  			continue
   128  		} else if resp.ReportID != "" {
   129  			if !resp.New {
   130  				continue
   131  			}
   132  			parsed.BugIDs = []string{resp.ReportID}
   133  		}
   134  		select {
   135  		case s.newMessages <- parsed:
   136  		case <-ctx.Done():
   137  		}
   138  	}
   139  	return nil
   140  }
   141  
   142  // If the message was sent via the dashapi sender, the report ID wil be a part of the email address.
   143  func (s *LKMLEmailStream) extractMessageID(msg *lore.Email) string {
   144  	if s.cfg.Dashapi == nil {
   145  		// The mode is not configured.
   146  		return ""
   147  	}
   148  	for _, id := range msg.BugIDs {
   149  		if strings.HasPrefix(id, s.cfg.Dashapi.ContextPrefix) {
   150  			return strings.TrimPrefix(id, s.cfg.Dashapi.ContextPrefix)
   151  		}
   152  	}
   153  	return ""
   154  }