github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/email/lore/parse.go (about) 1 // Copyright 2023 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package lore 5 6 import ( 7 "fmt" 8 "regexp" 9 "sort" 10 "strconv" 11 "strings" 12 13 "github.com/google/syzkaller/dashboard/dashapi" 14 "github.com/google/syzkaller/pkg/email" 15 ) 16 17 // Thread is a generic representation of a single discussion in the mailing list. 18 type Thread struct { 19 Subject string 20 MessageID string 21 Type dashapi.DiscussionType 22 BugIDs []string 23 Messages []*Email 24 } 25 26 // Series represents a single patch series sent over email. 27 type Series struct { 28 Subject string 29 MessageID string 30 Version int 31 Corrupted string // If non-empty, contains a reason why the series better be ignored. 32 Tags []string 33 Patches []Patch 34 } 35 36 type Patch struct { 37 Seq int 38 *Email 39 } 40 41 // Threads extracts individual threads from a list of emails. 42 func Threads(emails []*Email) []*Thread { 43 return listThreads(emails, 0) 44 } 45 46 func listThreads(emails []*Email, maxDepth int) []*Thread { 47 ctx := &parseCtx{ 48 maxDepth: maxDepth, 49 messages: map[string]*Email{}, 50 next: map[*Email][]*Email{}, 51 } 52 for _, email := range emails { 53 ctx.record(email) 54 } 55 ctx.process() 56 return ctx.threads 57 } 58 59 // PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists. 60 func PatchSeries(emails []*Email) []*Series { 61 var ret []*Series 62 // Normally, all following series patches are sent in response to the first email sent. 63 // So there's no sense to look at deeper replies. 64 for _, thread := range listThreads(emails, 1) { 65 if thread.Type != dashapi.DiscussionPatch { 66 continue 67 } 68 patch, ok := parsePatchSubject(thread.Subject) 69 if !ok { 70 // It must never be happening. 71 panic("DiscussionPatch is set, but we fail to parse the thread subject") 72 } 73 total := patch.Total.ValueOr(1) 74 series := &Series{ 75 Subject: patch.Title, 76 MessageID: thread.MessageID, 77 Version: patch.Version.ValueOr(1), 78 Tags: patch.Tags, 79 } 80 ret = append(ret, series) 81 if patch.Seq.IsSet() && patch.Seq.Value() > 1 { 82 series.Corrupted = "the first patch has seq>1" 83 continue 84 } 85 hasSeq := map[int]bool{} 86 for _, email := range thread.Messages { 87 patch, ok := parsePatchSubject(email.Subject) 88 if !ok { 89 continue 90 } 91 seq := patch.Seq.ValueOr(1) 92 if seq == 0 { 93 // The cover email is not of interest. 94 continue 95 } 96 if !email.HasPatch { 97 // Sometimes users reply to the series keeping the original subject. 98 // Ignore such messages. 99 continue 100 } 101 if hasSeq[seq] { 102 // It's weird if that really happens, but let's skip for now. 103 continue 104 } 105 hasSeq[seq] = true 106 series.Patches = append(series.Patches, Patch{ 107 Seq: seq, 108 Email: email, 109 }) 110 } 111 if len(hasSeq) != total { 112 series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found", 113 total, len(hasSeq)) 114 continue 115 } 116 if len(series.Patches) == 0 { 117 series.Corrupted = "0 patches" 118 continue 119 } 120 sort.Slice(series.Patches, func(i, j int) bool { 121 return series.Patches[i].Seq < series.Patches[j].Seq 122 }) 123 } 124 return ret 125 } 126 127 // DiscussionType extracts the specific discussion type from an email. 128 func DiscussionType(msg *email.Email) dashapi.DiscussionType { 129 discType := dashapi.DiscussionMention 130 if msg.OwnEmail { 131 discType = dashapi.DiscussionReport 132 } 133 // This is very crude, but should work for now. 134 if _, ok := parsePatchSubject(msg.Subject); ok { 135 discType = dashapi.DiscussionPatch 136 } else if strings.Contains(msg.Subject, "Monthly") { 137 discType = dashapi.DiscussionReminder 138 } 139 return discType 140 } 141 142 type PatchSubject struct { 143 Title string 144 Tags []string // Sometimes there's e.g. "net" or "next-next" in the subject. 145 Version Optional[int] 146 Seq Optional[int] // The "Seq/Total" part. 147 Total Optional[int] 148 } 149 150 // nolint: lll 151 var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`) 152 153 func parsePatchSubject(subject string) (PatchSubject, bool) { 154 var ret PatchSubject 155 groups := patchSubjectRe.FindStringSubmatch(subject) 156 if len(groups) == 0 { 157 return ret, false 158 } 159 tags := strings.Fields(groups[1]) 160 for _, tag := range append(tags, strings.Fields(groups[2])...) { 161 if strings.HasPrefix(tag, "v") { 162 val, err := strconv.Atoi(strings.TrimPrefix(tag, "v")) 163 if err == nil { 164 ret.Version.Set(val) 165 continue 166 } 167 } 168 ret.Tags = append(ret.Tags, tag) 169 } 170 sort.Strings(ret.Tags) 171 if groups[3] != "" { 172 if val, err := strconv.Atoi(groups[3]); err == nil { 173 ret.Seq.Set(val) 174 } 175 } 176 if groups[4] != "" { 177 if val, err := strconv.Atoi(groups[4]); err == nil { 178 ret.Total.Set(val) 179 } 180 } 181 ret.Title = groups[5] 182 return ret, true 183 } 184 185 type parseCtx struct { 186 maxDepth int 187 threads []*Thread 188 messages map[string]*Email 189 next map[*Email][]*Email 190 } 191 192 func (c *parseCtx) record(msg *Email) { 193 c.messages[msg.MessageID] = msg 194 } 195 196 func (c *parseCtx) process() { 197 // List messages for which we dont't have ancestors. 198 nodes := []*Email{} 199 for _, msg := range c.messages { 200 if msg.InReplyTo == "" || c.messages[msg.InReplyTo] == nil { 201 nodes = append(nodes, msg) 202 } else { 203 parent := c.messages[msg.InReplyTo] 204 c.next[parent] = append(c.next[parent], msg) 205 } 206 } 207 // Iterate starting from these tree nodes. 208 for _, node := range nodes { 209 c.visit(node, nil, 0) 210 } 211 // Collect BugIDs. 212 for _, thread := range c.threads { 213 unique := map[string]struct{}{} 214 for _, msg := range thread.Messages { 215 for _, id := range msg.BugIDs { 216 unique[id] = struct{}{} 217 } 218 } 219 var ids []string 220 for id := range unique { 221 ids = append(ids, id) 222 } 223 sort.Strings(ids) 224 thread.BugIDs = ids 225 } 226 } 227 228 func (c *parseCtx) visit(msg *Email, thread *Thread, depth int) { 229 var oldInfo *email.OldThreadInfo 230 if thread != nil { 231 oldInfo = &email.OldThreadInfo{ 232 ThreadType: thread.Type, 233 } 234 } 235 msgType := DiscussionType(msg.Email) 236 switch email.NewMessageAction(msg.Email, msgType, oldInfo) { 237 case email.ActionIgnore: 238 thread = nil 239 case email.ActionAppend: 240 thread.Messages = append(thread.Messages, msg) 241 case email.ActionNewThread: 242 thread = &Thread{ 243 MessageID: msg.MessageID, 244 Subject: msg.Subject, 245 Type: msgType, 246 Messages: []*Email{msg}, 247 } 248 c.threads = append(c.threads, thread) 249 } 250 if c.maxDepth == 0 || depth < c.maxDepth { 251 for _, nextMsg := range c.next[msg] { 252 c.visit(nextMsg, thread, depth+1) 253 } 254 } 255 } 256 257 type Optional[T any] struct { 258 val T 259 set bool 260 } 261 262 func value[T any](val T) Optional[T] { 263 return Optional[T]{val: val, set: true} 264 } 265 266 func (o Optional[T]) IsSet() bool { 267 return o.set 268 } 269 270 func (o Optional[T]) Value() T { 271 return o.val 272 } 273 274 func (o Optional[T]) ValueOr(def T) T { 275 if o.set { 276 return o.val 277 } 278 return def 279 } 280 281 func (o *Optional[T]) Set(val T) { 282 o.val = val 283 o.set = true 284 }