github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/Server/logging/analysis/analysis.go (about) 1 /* 2 * Copyright (c) 2018, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 // Package analysis implements heuristical frequency analysis of Psiphon Tunnel 21 // Core server logs. Log lines are parsed into 3 distinct log types: message, 22 // metrics and unknown. Under these log types the number of logs of each unique 23 // identifier is counted. The unique identifiers are as follows: 24 // message: "msg" field 25 // metrics: "event_name" field 26 // unknown: key graph 27 package analysis 28 29 import ( 30 "bufio" 31 "encoding/json" 32 "fmt" 33 "log" 34 "os" 35 "reflect" 36 "regexp" 37 "sort" 38 39 "github.com/sirupsen/logrus" 40 ) 41 42 type LogLevel int 43 44 const ( 45 LOG_LEVEL_UNKNOWN = -1 46 LOG_LEVEL_DEBUG LogLevel = iota 47 LOG_LEVEL_INFO 48 LOG_LEVEL_WARNING 49 LOG_LEVEL_ERROR 50 ) 51 52 func (l LogLevel) String() string { 53 switch l { 54 default: 55 return "Unknown" 56 case LOG_LEVEL_UNKNOWN: 57 return "Unknown" 58 case LOG_LEVEL_DEBUG: 59 return "Debug" 60 case LOG_LEVEL_INFO: 61 return "Info" 62 case LOG_LEVEL_WARNING: 63 return "Warning" 64 case LOG_LEVEL_ERROR: 65 return "Error" 66 } 67 } 68 69 type MetricsLogEventName string 70 type MessageLogKey string 71 type MessageLogName string 72 type MessageLogContext string 73 type MessageLogError string 74 type LogFields logrus.Fields 75 type node map[string]interface{} 76 77 // Models for each psiphond log type 78 79 type LogModel interface { 80 JsonString() string 81 Print(bool, bool) 82 } 83 84 type BaseLogModel struct { 85 Example string 86 Node node 87 } 88 89 type MessageLogModel struct { 90 BaseLogModel 91 Msg MessageLogName 92 Level LogLevel 93 MessageLogContext *MessageLogContext 94 MessageLogError *MessageLogError 95 } 96 97 type MetricsLogModel struct { 98 BaseLogModel 99 Event MetricsLogEventName 100 } 101 102 type UnknownLogModel struct { 103 BaseLogModel 104 } 105 106 func (a *BaseLogModel) equal(b BaseLogModel) bool { 107 return a.Node.equal(b.Node) 108 } 109 110 func (a *MessageLogModel) key() MessageLogKey { 111 var errorString string 112 var context string 113 114 if a.MessageLogError != nil { 115 errorString = string(*a.MessageLogError) 116 } 117 if a.MessageLogContext != nil { 118 context = string(*a.MessageLogContext) 119 } 120 121 return MessageLogKey(fmt.Sprintf("(%s,%d, %s,%s)", a.Msg, a.Level, errorString, context)) 122 } 123 124 func (a *MessageLogContext) equal(b *MessageLogContext) bool { 125 if a != nil && b != nil { 126 return *a == *b 127 } else if a == nil && b == nil { 128 return true 129 } 130 return false 131 } 132 133 func (a *MessageLogError) equal(b *MessageLogError) bool { 134 if a != nil && b != nil { 135 return *a == *b 136 } else if a == nil && b == nil { 137 return true 138 } 139 return false 140 } 141 142 func (a *MessageLogModel) equal(b MessageLogModel) bool { 143 if a.Msg != b.Msg { 144 return false 145 } else if a.Level != b.Level { 146 return false 147 } 148 149 return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError) 150 } 151 152 func (a *MetricsLogModel) equal(b MetricsLogModel) bool { 153 return a.Event == b.Event 154 } 155 156 func (a *UnknownLogModel) equal(b UnknownLogModel) bool { 157 return a.Node.equal(b.Node) 158 } 159 160 // equal returns true if both nodes have the same key graphs. 161 func (a *node) equal(b node) bool { 162 for k, v := range *a { 163 if val, ok := b[k]; ok { 164 if reflect.TypeOf(v) != reflect.TypeOf(val) { 165 return false 166 } 167 switch m := val.(type) { 168 case nil: 169 return true 170 case node: 171 vNode := v.(node) 172 return vNode.equal(m) 173 case []node: 174 vNode := v.([]node) 175 if len(vNode) != len(m) { 176 return false 177 } 178 for i := range m { 179 if !vNode[i].equal(m[i]) { 180 return false 181 } 182 } 183 default: 184 log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val)) 185 } 186 } else { 187 return false 188 } 189 } 190 return true 191 } 192 193 func (a *BaseLogModel) JsonString() string { 194 b, err := json.Marshal(a.Node) 195 if err != nil { 196 log.Fatal(err) 197 } 198 return string(b) 199 } 200 201 func (a *BaseLogModel) Print(printStructure, printExample bool) { 202 if printStructure { 203 fmt.Printf("Structure: %s\n", a.JsonString()) 204 } 205 if printExample { 206 fmt.Println("ExampleText: ", a.Example) 207 } 208 } 209 210 func (a *MessageLogModel) Print(printStructure, printExample bool) { 211 fmt.Printf("MessageLog\n") 212 fmt.Printf("MessageLogName: %s\n", a.Msg) 213 214 if a.MessageLogError != nil { 215 fmt.Printf("MessageLogError: %s\n", *a.MessageLogError) 216 } 217 if a.MessageLogContext != nil { 218 fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext) 219 } 220 221 if printStructure { 222 fmt.Printf("Structure: %s\n", a.JsonString()) 223 } 224 if printExample { 225 fmt.Println("ExampleText: ", a.Example) 226 } 227 } 228 229 func (a *MetricsLogModel) Print(printStructure, printExample bool) { 230 fmt.Printf("MetricsLog\n") 231 fmt.Printf("MetricsLogEventName: %s\n", a.Event) 232 if printStructure { 233 fmt.Printf("Structure: %s\n", a.JsonString()) 234 } 235 if printExample { 236 fmt.Println("ExampleText: ", a.Example) 237 } 238 } 239 240 func (a *UnknownLogModel) Print(printStructure, printExample bool) { 241 fmt.Printf("UnknownLog\n") 242 fmt.Printf("Structure: %s\n", a.JsonString()) 243 if printExample { 244 fmt.Println("ExampleText: ", a.Example) 245 } 246 } 247 248 // Stats for each log model 249 250 type LogModelStatsMetrics interface { 251 NumLogs() uint 252 } 253 254 type LogModelStats struct { 255 Count uint 256 } 257 258 type MessageLogModelStats struct { 259 LogModelStats 260 MessageLogModel 261 } 262 263 type MetricsLogModelStats struct { 264 LogModelStats 265 MetricsLogModel 266 } 267 268 type UnknownLogModelStats struct { 269 LogModelStats 270 UnknownLogModel 271 } 272 273 func (a MessageLogModelStats) NumLogs() uint { 274 return a.Count 275 } 276 277 func (a MetricsLogModelStats) NumLogs() uint { 278 return a.Count 279 } 280 281 func (a UnknownLogModelStats) NumLogs() uint { 282 return a.Count 283 } 284 285 func (a *MessageLogModelStats) Print(printStructure, printExample bool) { 286 a.MessageLogModel.Print(printStructure, printExample) 287 } 288 289 func (a *MetricsLogModelStats) Print(printStructure, printExample bool) { 290 a.MetricsLogModel.Print(printStructure, printExample) 291 } 292 293 func (a *UnknownLogModelStats) Print(printExample bool) { 294 a.UnknownLogModel.Print(true, printExample) 295 } 296 297 func safeDivide(a, b float64) float64 { 298 if b != 0 { 299 return a / b 300 } 301 return 0 302 } 303 304 func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) { 305 a.Print(printStructure, printExample) 306 fmt.Printf("Count: %d of %d\n", a.Count, count) 307 fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count))) 308 fmt.Printf("\n") 309 } 310 311 func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) { 312 a.Print(printStructure, printExample) 313 fmt.Printf("Count: %d of %d\n", a.Count, count) 314 fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count))) 315 fmt.Printf("\n") 316 } 317 318 func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) { 319 a.Print(printExample) 320 fmt.Printf("Count: %d of %d\n", a.Count, count) 321 fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count))) 322 fmt.Printf("\n") 323 } 324 325 // Log type stats 326 // Aggregate log models by log type 327 328 type LogTypeStats struct { 329 Count uint 330 } 331 332 type MessageLogStats struct { 333 LogTypeStats 334 modelStats map[MessageLogKey]*MessageLogModelStats 335 } 336 337 type MetricsLogStats struct { 338 LogTypeStats 339 modelStats map[MetricsLogEventName]*MetricsLogModelStats 340 } 341 342 type UnknownLogStats struct { 343 LogTypeStats 344 modelStats []UnknownLogModelStats 345 } 346 347 func (a *MessageLogStats) Print() { 348 for _, v := range a.Sort() { 349 v.PrintWithRelativePercent(a.Count, false, false) 350 } 351 } 352 353 func (a *MetricsLogStats) Print() { 354 for _, v := range a.Sort() { 355 v.PrintWithRelativePercent(a.Count, false, false) 356 } 357 } 358 359 func (a *UnknownLogStats) Print() { 360 for _, v := range a.Sort() { 361 v.PrintWithRelativePercent(a.Count, true) 362 } 363 } 364 365 func (a *MessageLogStats) Sort() []MessageLogModelStats { 366 var s []MessageLogModelStats 367 for _, v := range a.modelStats { 368 if v != nil { 369 s = append(s, *v) 370 } 371 } 372 373 sort.Slice(s, func(i, j int) bool { 374 return s[j].Count > s[i].Count 375 }) 376 377 return s 378 } 379 380 func (a *MetricsLogStats) Sort() []MetricsLogModelStats { 381 var s []MetricsLogModelStats 382 for _, v := range a.modelStats { 383 if v != nil { 384 s = append(s, *v) 385 } 386 } 387 388 sort.Slice(s, func(i, j int) bool { 389 return s[j].Count > s[i].Count 390 }) 391 return s 392 } 393 394 func (a *UnknownLogStats) Sort() []UnknownLogModelStats { 395 var s []UnknownLogModelStats 396 s = append(s, a.modelStats...) 397 398 sort.Slice(s, func(i, j int) bool { 399 return s[j].Count > s[i].Count 400 }) 401 return s 402 } 403 404 // Log file stats 405 406 type LogStats struct { 407 MessageLogModels MessageLogStats 408 MetricsLogModels MetricsLogStats 409 UnknownLogModels UnknownLogStats 410 } 411 412 // NewLogStats initializes a new LogStats structure. 413 func NewLogStats() (l *LogStats) { 414 l = &LogStats{ 415 MessageLogModels: MessageLogStats{ 416 modelStats: make(map[MessageLogKey]*MessageLogModelStats), 417 }, 418 MetricsLogModels: MetricsLogStats{ 419 modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats), 420 }, 421 UnknownLogModels: UnknownLogStats{ 422 modelStats: nil, 423 }, 424 } 425 426 return l 427 } 428 429 func NewLogStatsFromFiles(files []string) (l *LogStats, err error) { 430 l = NewLogStats() 431 432 for _, file := range files { 433 err = l.ParseFile(file) 434 if err != nil { 435 return nil, err 436 } 437 } 438 439 return l, nil 440 } 441 442 // ParseFile takes a psiphond log file as input, parses the log lines into log 443 // models and updates the LogStats structure. 444 func (l *LogStats) ParseFile(filename string) error { 445 file, err := os.Open(filename) 446 if err != nil { 447 return err 448 } 449 defer file.Close() 450 451 scanner := bufio.NewScanner(file) 452 for scanner.Scan() { 453 err := l.ParseLogLine(scanner.Text()) 454 if err != nil { 455 return err 456 } 457 } 458 459 if err := scanner.Err(); err != nil { 460 return err 461 } 462 463 return nil 464 } 465 466 // ParseLogLine attempts to parse a log line into a log model and then updates the 467 // LogStats structure. 468 func (l *LogStats) ParseLogLine(log string) error { 469 MessageLogModels := &l.MessageLogModels 470 MetricsLogModels := &l.MetricsLogModels 471 472 logModel, err := parseLogModel(log) 473 if err != nil { 474 return err 475 } 476 477 switch v := logModel.(type) { 478 case *MessageLogModel: 479 MessageLogModels.Count += 1 480 481 if m, ok := MessageLogModels.modelStats[v.key()]; ok { 482 m.Count += 1 483 } else { 484 MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v} 485 } 486 case *MetricsLogModel: 487 l.MetricsLogModels.Count += 1 488 if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok { 489 m.Count += 1 490 } else { 491 MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v} 492 } 493 case *UnknownLogModel: 494 l.UnknownLogModels.Count += 1 495 found := false 496 for i := range l.UnknownLogModels.modelStats { 497 if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) { 498 l.UnknownLogModels.modelStats[i].Count += 1 499 found = true 500 break 501 } 502 } 503 if !found { 504 l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v}) 505 } 506 default: 507 return fmt.Errorf("unexpected model type of %v", reflect.TypeOf(v)) 508 } 509 510 return nil 511 } 512 513 func redactIpAddressesAndPorts(a string) string { 514 ipAddressWithOptionalPort := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`) 515 return ipAddressWithOptionalPort.ReplaceAllString(a, "<redacted>") 516 } 517 518 // parseLogModel attempts to parse a string into a log model. It is expected 519 // that the provided string is valid JSON. 520 func parseLogModel(s string) (LogModel, error) { 521 var m LogFields 522 err := json.Unmarshal([]byte(s), &m) 523 if err != nil { 524 return nil, fmt.Errorf("failed to parse log line into JSON: %s", err) 525 } 526 527 var l LogModel 528 var b BaseLogModel 529 b.Example = s 530 b.Node = parseNode(&m) 531 532 if m["event_name"] != nil { 533 l = &MetricsLogModel{ 534 BaseLogModel: b, 535 Event: MetricsLogEventName(m["event_name"].(string)), 536 } 537 } else if m["msg"] != nil && m["level"] != nil { 538 var level LogLevel 539 switch m["level"].(string) { 540 case "debug": 541 level = LOG_LEVEL_DEBUG 542 case "info": 543 level = LOG_LEVEL_INFO 544 case "warning": 545 level = LOG_LEVEL_WARNING 546 case "error": 547 level = LOG_LEVEL_ERROR 548 default: 549 return nil, fmt.Errorf("unexpected log level: %s", m["level"].(string)) 550 } 551 552 var context *MessageLogContext 553 var err *MessageLogError 554 555 if val, ok := m["context"]; ok { 556 c := MessageLogContext(val.(string)) 557 context = &c 558 } 559 560 if val, ok := m["error"]; ok { 561 errorWithIpsRedacted := redactIpAddressesAndPorts(val.(string)) 562 e := MessageLogError(errorWithIpsRedacted) 563 err = &e 564 } 565 566 l = &MessageLogModel{ 567 BaseLogModel: b, 568 Msg: MessageLogName(m["msg"].(string)), 569 Level: level, 570 MessageLogContext: context, 571 MessageLogError: err, 572 } 573 } else { 574 l = &UnknownLogModel{ 575 BaseLogModel: b, 576 } 577 } 578 579 return l, nil 580 } 581 582 // parseNode takes a map and transforms it into a graph which represents its 583 // structure. 584 func parseNode(m *LogFields) node { 585 n := make(node) 586 for k, v := range *m { 587 i := parseInterface(v) 588 n[k] = i 589 } 590 return n 591 } 592 593 // parseInterface takes an interface and transforms it into a graph of its 594 // structure. 595 func parseInterface(i interface{}) interface{} { 596 switch v := i.(type) { 597 default: 598 return nil 599 case map[string]interface{}: 600 l := LogFields(v) 601 return parseNode(&l) 602 case []interface{}: 603 n := make([]node, 1) 604 for i := range v { 605 switch p := parseInterface(v[i]).(type) { 606 case node: 607 n = append(n, p) 608 } 609 } 610 return n 611 } 612 } 613 614 // sortLogModelsDescending merges all log models of different types and then 615 // sorts them in ascending order by the number times each occurs. Returns the 616 // sorted list and the total number of logs represented by each log model in 617 // the list. 618 func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) { 619 var messagesSort []MessageLogModelStats 620 var metricsSort []MetricsLogModelStats 621 var unknownSort []UnknownLogModelStats 622 623 if messages { 624 messagesSort = l.MessageLogModels.Sort() 625 messages := make([]interface{}, len(messagesSort)) 626 for i, v := range messagesSort { 627 messages[i] = v 628 } 629 models = append(models, messages...) 630 numLogs += l.MessageLogModels.Count 631 } 632 633 if metrics { 634 metricsSort = l.MetricsLogModels.Sort() 635 metrics := make([]interface{}, len(metricsSort)) 636 for i, v := range metricsSort { 637 metrics[i] = v 638 } 639 models = append(models, metrics...) 640 numLogs += l.MetricsLogModels.Count 641 } 642 643 if unknown { 644 unknownSort = l.UnknownLogModels.Sort() 645 unknown := make([]interface{}, len(unknownSort)) 646 for i, v := range unknownSort { 647 unknown[i] = v 648 } 649 models = append(models, unknown...) 650 numLogs += l.UnknownLogModels.Count 651 } 652 653 sort.Slice(models, func(i, j int) bool { 654 a := models[i].(LogModelStatsMetrics) 655 b := models[j].(LogModelStatsMetrics) 656 657 return b.NumLogs() > a.NumLogs() 658 }) 659 660 return models, numLogs 661 } 662 663 // NumDistinctLogs returns the number of unique log models contained within the 664 // LogStats structure. 665 func (l *LogStats) NumDistinctLogs() uint { 666 return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats)) 667 } 668 669 func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) { 670 logs, numLogs := l.SortLogModels(messages, metrics, unknown) 671 672 for _, x := range logs { 673 switch v := x.(type) { 674 case MessageLogModelStats: 675 v.PrintWithRelativePercent(numLogs, printStructure, printExample) 676 case MetricsLogModelStats: 677 v.PrintWithRelativePercent(numLogs, printStructure, printExample) 678 case UnknownLogModelStats: 679 v.PrintWithRelativePercent(numLogs, printExample) 680 } 681 } 682 }