github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/email/lore/parse.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package lore
     5  
     6  import (
     7  	"fmt"
     8  	"regexp"
     9  	"sort"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/google/syzkaller/dashboard/dashapi"
    14  	"github.com/google/syzkaller/pkg/email"
    15  )
    16  
    17  // Thread is a generic representation of a single discussion in the mailing list.
    18  type Thread struct {
    19  	Subject   string
    20  	MessageID string
    21  	Type      dashapi.DiscussionType
    22  	BugIDs    []string
    23  	Messages  []*Email
    24  }
    25  
    26  // Series represents a single patch series sent over email.
    27  type Series struct {
    28  	Subject   string
    29  	MessageID string
    30  	Version   int
    31  	Corrupted string // If non-empty, contains a reason why the series better be ignored.
    32  	Tags      []string
    33  	Patches   []Patch
    34  }
    35  
    36  type Patch struct {
    37  	Seq int
    38  	*Email
    39  }
    40  
    41  // Threads extracts individual threads from a list of emails.
    42  func Threads(emails []*Email) []*Thread {
    43  	return listThreads(emails, 0)
    44  }
    45  
    46  func listThreads(emails []*Email, maxDepth int) []*Thread {
    47  	ctx := &parseCtx{
    48  		maxDepth: maxDepth,
    49  		messages: map[string]*Email{},
    50  		next:     map[*Email][]*Email{},
    51  	}
    52  	for _, email := range emails {
    53  		ctx.record(email)
    54  	}
    55  	ctx.process()
    56  	return ctx.threads
    57  }
    58  
    59  // PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists.
    60  func PatchSeries(emails []*Email) []*Series {
    61  	var ret []*Series
    62  	// Normally, all following series patches are sent in response to the first email sent.
    63  	// So there's no sense to look at deeper replies.
    64  	for _, thread := range listThreads(emails, 1) {
    65  		if thread.Type != dashapi.DiscussionPatch {
    66  			continue
    67  		}
    68  		patch, ok := parsePatchSubject(thread.Subject)
    69  		if !ok {
    70  			// It must never be happening.
    71  			panic("DiscussionPatch is set, but we fail to parse the thread subject")
    72  		}
    73  		total := patch.Total.ValueOr(1)
    74  		series := &Series{
    75  			Subject:   patch.Title,
    76  			MessageID: thread.MessageID,
    77  			Version:   patch.Version.ValueOr(1),
    78  			Tags:      patch.Tags,
    79  		}
    80  		ret = append(ret, series)
    81  		if patch.Seq.IsSet() && patch.Seq.Value() > 1 {
    82  			series.Corrupted = "the first patch has seq>1"
    83  			continue
    84  		}
    85  		hasSeq := map[int]bool{}
    86  		for _, email := range thread.Messages {
    87  			patch, ok := parsePatchSubject(email.Subject)
    88  			if !ok {
    89  				continue
    90  			}
    91  			seq := patch.Seq.ValueOr(1)
    92  			if seq == 0 {
    93  				// The cover email is not of interest.
    94  				continue
    95  			}
    96  			if !email.HasPatch {
    97  				// Sometimes users reply to the series keeping the original subject.
    98  				// Ignore such messages.
    99  				continue
   100  			}
   101  			if hasSeq[seq] {
   102  				// It's weird if that really happens, but let's skip for now.
   103  				continue
   104  			}
   105  			hasSeq[seq] = true
   106  			series.Patches = append(series.Patches, Patch{
   107  				Seq:   seq,
   108  				Email: email,
   109  			})
   110  		}
   111  		if len(hasSeq) != total {
   112  			series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found",
   113  				total, len(hasSeq))
   114  			continue
   115  		}
   116  		if len(series.Patches) == 0 {
   117  			series.Corrupted = "0 patches"
   118  			continue
   119  		}
   120  		sort.Slice(series.Patches, func(i, j int) bool {
   121  			return series.Patches[i].Seq < series.Patches[j].Seq
   122  		})
   123  	}
   124  	return ret
   125  }
   126  
   127  // DiscussionType extracts the specific discussion type from an email.
   128  func DiscussionType(msg *email.Email) dashapi.DiscussionType {
   129  	discType := dashapi.DiscussionMention
   130  	if msg.OwnEmail {
   131  		discType = dashapi.DiscussionReport
   132  	}
   133  	// This is very crude, but should work for now.
   134  	if _, ok := parsePatchSubject(msg.Subject); ok {
   135  		discType = dashapi.DiscussionPatch
   136  	} else if strings.Contains(msg.Subject, "Monthly") {
   137  		discType = dashapi.DiscussionReminder
   138  	}
   139  	return discType
   140  }
   141  
   142  type PatchSubject struct {
   143  	Title   string
   144  	Tags    []string // Sometimes there's e.g. "net" or "next-next" in the subject.
   145  	Version Optional[int]
   146  	Seq     Optional[int] // The "Seq/Total" part.
   147  	Total   Optional[int]
   148  }
   149  
   150  // nolint: lll
   151  var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`)
   152  
   153  func parsePatchSubject(subject string) (PatchSubject, bool) {
   154  	var ret PatchSubject
   155  	groups := patchSubjectRe.FindStringSubmatch(subject)
   156  	if len(groups) == 0 {
   157  		return ret, false
   158  	}
   159  	tags := strings.Fields(groups[1])
   160  	for _, tag := range append(tags, strings.Fields(groups[2])...) {
   161  		if strings.HasPrefix(tag, "v") {
   162  			val, err := strconv.Atoi(strings.TrimPrefix(tag, "v"))
   163  			if err == nil {
   164  				ret.Version.Set(val)
   165  				continue
   166  			}
   167  		}
   168  		ret.Tags = append(ret.Tags, tag)
   169  	}
   170  	sort.Strings(ret.Tags)
   171  	if groups[3] != "" {
   172  		if val, err := strconv.Atoi(groups[3]); err == nil {
   173  			ret.Seq.Set(val)
   174  		}
   175  	}
   176  	if groups[4] != "" {
   177  		if val, err := strconv.Atoi(groups[4]); err == nil {
   178  			ret.Total.Set(val)
   179  		}
   180  	}
   181  	ret.Title = groups[5]
   182  	return ret, true
   183  }
   184  
   185  type parseCtx struct {
   186  	maxDepth int
   187  	threads  []*Thread
   188  	messages map[string]*Email
   189  	next     map[*Email][]*Email
   190  }
   191  
   192  func (c *parseCtx) record(msg *Email) {
   193  	c.messages[msg.MessageID] = msg
   194  }
   195  
   196  func (c *parseCtx) process() {
   197  	// List messages for which we dont't have ancestors.
   198  	nodes := []*Email{}
   199  	for _, msg := range c.messages {
   200  		if msg.InReplyTo == "" || c.messages[msg.InReplyTo] == nil {
   201  			nodes = append(nodes, msg)
   202  		} else {
   203  			parent := c.messages[msg.InReplyTo]
   204  			c.next[parent] = append(c.next[parent], msg)
   205  		}
   206  	}
   207  	// Iterate starting from these tree nodes.
   208  	for _, node := range nodes {
   209  		c.visit(node, nil, 0)
   210  	}
   211  	// Collect BugIDs.
   212  	for _, thread := range c.threads {
   213  		unique := map[string]struct{}{}
   214  		for _, msg := range thread.Messages {
   215  			for _, id := range msg.BugIDs {
   216  				unique[id] = struct{}{}
   217  			}
   218  		}
   219  		var ids []string
   220  		for id := range unique {
   221  			ids = append(ids, id)
   222  		}
   223  		sort.Strings(ids)
   224  		thread.BugIDs = ids
   225  	}
   226  }
   227  
   228  func (c *parseCtx) visit(msg *Email, thread *Thread, depth int) {
   229  	var oldInfo *email.OldThreadInfo
   230  	if thread != nil {
   231  		oldInfo = &email.OldThreadInfo{
   232  			ThreadType: thread.Type,
   233  		}
   234  	}
   235  	msgType := DiscussionType(msg.Email)
   236  	switch email.NewMessageAction(msg.Email, msgType, oldInfo) {
   237  	case email.ActionIgnore:
   238  		thread = nil
   239  	case email.ActionAppend:
   240  		thread.Messages = append(thread.Messages, msg)
   241  	case email.ActionNewThread:
   242  		thread = &Thread{
   243  			MessageID: msg.MessageID,
   244  			Subject:   msg.Subject,
   245  			Type:      msgType,
   246  			Messages:  []*Email{msg},
   247  		}
   248  		c.threads = append(c.threads, thread)
   249  	}
   250  	if c.maxDepth == 0 || depth < c.maxDepth {
   251  		for _, nextMsg := range c.next[msg] {
   252  			c.visit(nextMsg, thread, depth+1)
   253  		}
   254  	}
   255  }
   256  
   257  type Optional[T any] struct {
   258  	val T
   259  	set bool
   260  }
   261  
   262  func value[T any](val T) Optional[T] {
   263  	return Optional[T]{val: val, set: true}
   264  }
   265  
   266  func (o Optional[T]) IsSet() bool {
   267  	return o.set
   268  }
   269  
   270  func (o Optional[T]) Value() T {
   271  	return o.val
   272  }
   273  
   274  func (o Optional[T]) ValueOr(def T) T {
   275  	if o.set {
   276  		return o.val
   277  	}
   278  	return def
   279  }
   280  
   281  func (o *Optional[T]) Set(val T) {
   282  	o.val = val
   283  	o.set = true
   284  }