github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/Server/logging/analysis/analysis.go (about)

     1  /*
     2   * Copyright (c) 2018, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package analysis implements heuristical frequency analysis of Psiphon Tunnel
    21  // Core server logs. Log lines are parsed into 3 distinct log types: message,
    22  // metrics and unknown. Under these log types the number of logs of each unique
    23  // identifier is counted. The unique identifiers are as follows:
    24  // message: "msg" field
    25  // metrics: "event_name" field
    26  // unknown: key graph
    27  package analysis
    28  
    29  import (
    30  	"bufio"
    31  	"encoding/json"
    32  	"fmt"
    33  	"log"
    34  	"os"
    35  	"reflect"
    36  	"regexp"
    37  	"sort"
    38  
    39  	"github.com/sirupsen/logrus"
    40  )
    41  
    42  type LogLevel int
    43  
    44  const (
    45  	LOG_LEVEL_UNKNOWN          = -1
    46  	LOG_LEVEL_DEBUG   LogLevel = iota
    47  	LOG_LEVEL_INFO
    48  	LOG_LEVEL_WARNING
    49  	LOG_LEVEL_ERROR
    50  )
    51  
    52  func (l LogLevel) String() string {
    53  	switch l {
    54  	default:
    55  		return "Unknown"
    56  	case LOG_LEVEL_UNKNOWN:
    57  		return "Unknown"
    58  	case LOG_LEVEL_DEBUG:
    59  		return "Debug"
    60  	case LOG_LEVEL_INFO:
    61  		return "Info"
    62  	case LOG_LEVEL_WARNING:
    63  		return "Warning"
    64  	case LOG_LEVEL_ERROR:
    65  		return "Error"
    66  	}
    67  }
    68  
    69  type MetricsLogEventName string
    70  type MessageLogKey string
    71  type MessageLogName string
    72  type MessageLogContext string
    73  type MessageLogError string
    74  type LogFields logrus.Fields
    75  type node map[string]interface{}
    76  
    77  // Models for each psiphond log type
    78  
    79  type LogModel interface {
    80  	JsonString() string
    81  	Print(bool, bool)
    82  }
    83  
    84  type BaseLogModel struct {
    85  	Example string
    86  	Node    node
    87  }
    88  
    89  type MessageLogModel struct {
    90  	BaseLogModel
    91  	Msg               MessageLogName
    92  	Level             LogLevel
    93  	MessageLogContext *MessageLogContext
    94  	MessageLogError   *MessageLogError
    95  }
    96  
    97  type MetricsLogModel struct {
    98  	BaseLogModel
    99  	Event MetricsLogEventName
   100  }
   101  
   102  type UnknownLogModel struct {
   103  	BaseLogModel
   104  }
   105  
   106  func (a *BaseLogModel) equal(b BaseLogModel) bool {
   107  	return a.Node.equal(b.Node)
   108  }
   109  
   110  func (a *MessageLogModel) key() MessageLogKey {
   111  	var errorString string
   112  	var context string
   113  
   114  	if a.MessageLogError != nil {
   115  		errorString = string(*a.MessageLogError)
   116  	}
   117  	if a.MessageLogContext != nil {
   118  		context = string(*a.MessageLogContext)
   119  	}
   120  
   121  	return MessageLogKey(fmt.Sprintf("(%s,%d, %s,%s)", a.Msg, a.Level, errorString, context))
   122  }
   123  
   124  func (a *MessageLogContext) equal(b *MessageLogContext) bool {
   125  	if a != nil && b != nil {
   126  		return *a == *b
   127  	} else if a == nil && b == nil {
   128  		return true
   129  	}
   130  	return false
   131  }
   132  
   133  func (a *MessageLogError) equal(b *MessageLogError) bool {
   134  	if a != nil && b != nil {
   135  		return *a == *b
   136  	} else if a == nil && b == nil {
   137  		return true
   138  	}
   139  	return false
   140  }
   141  
   142  func (a *MessageLogModel) equal(b MessageLogModel) bool {
   143  	if a.Msg != b.Msg {
   144  		return false
   145  	} else if a.Level != b.Level {
   146  		return false
   147  	}
   148  
   149  	return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError)
   150  }
   151  
   152  func (a *MetricsLogModel) equal(b MetricsLogModel) bool {
   153  	return a.Event == b.Event
   154  }
   155  
   156  func (a *UnknownLogModel) equal(b UnknownLogModel) bool {
   157  	return a.Node.equal(b.Node)
   158  }
   159  
   160  // equal returns true if both nodes have the same key graphs.
   161  func (a *node) equal(b node) bool {
   162  	for k, v := range *a {
   163  		if val, ok := b[k]; ok {
   164  			if reflect.TypeOf(v) != reflect.TypeOf(val) {
   165  				return false
   166  			}
   167  			switch m := val.(type) {
   168  			case nil:
   169  				return true
   170  			case node:
   171  				vNode := v.(node)
   172  				return vNode.equal(m)
   173  			case []node:
   174  				vNode := v.([]node)
   175  				if len(vNode) != len(m) {
   176  					return false
   177  				}
   178  				for i := range m {
   179  					if !vNode[i].equal(m[i]) {
   180  						return false
   181  					}
   182  				}
   183  			default:
   184  				log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val))
   185  			}
   186  		} else {
   187  			return false
   188  		}
   189  	}
   190  	return true
   191  }
   192  
   193  func (a *BaseLogModel) JsonString() string {
   194  	b, err := json.Marshal(a.Node)
   195  	if err != nil {
   196  		log.Fatal(err)
   197  	}
   198  	return string(b)
   199  }
   200  
   201  func (a *BaseLogModel) Print(printStructure, printExample bool) {
   202  	if printStructure {
   203  		fmt.Printf("Structure: %s\n", a.JsonString())
   204  	}
   205  	if printExample {
   206  		fmt.Println("ExampleText: ", a.Example)
   207  	}
   208  }
   209  
   210  func (a *MessageLogModel) Print(printStructure, printExample bool) {
   211  	fmt.Printf("MessageLog\n")
   212  	fmt.Printf("MessageLogName: %s\n", a.Msg)
   213  
   214  	if a.MessageLogError != nil {
   215  		fmt.Printf("MessageLogError: %s\n", *a.MessageLogError)
   216  	}
   217  	if a.MessageLogContext != nil {
   218  		fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext)
   219  	}
   220  
   221  	if printStructure {
   222  		fmt.Printf("Structure: %s\n", a.JsonString())
   223  	}
   224  	if printExample {
   225  		fmt.Println("ExampleText: ", a.Example)
   226  	}
   227  }
   228  
   229  func (a *MetricsLogModel) Print(printStructure, printExample bool) {
   230  	fmt.Printf("MetricsLog\n")
   231  	fmt.Printf("MetricsLogEventName: %s\n", a.Event)
   232  	if printStructure {
   233  		fmt.Printf("Structure: %s\n", a.JsonString())
   234  	}
   235  	if printExample {
   236  		fmt.Println("ExampleText: ", a.Example)
   237  	}
   238  }
   239  
   240  func (a *UnknownLogModel) Print(printStructure, printExample bool) {
   241  	fmt.Printf("UnknownLog\n")
   242  	fmt.Printf("Structure: %s\n", a.JsonString())
   243  	if printExample {
   244  		fmt.Println("ExampleText: ", a.Example)
   245  	}
   246  }
   247  
   248  // Stats for each log model
   249  
   250  type LogModelStatsMetrics interface {
   251  	NumLogs() uint
   252  }
   253  
   254  type LogModelStats struct {
   255  	Count uint
   256  }
   257  
   258  type MessageLogModelStats struct {
   259  	LogModelStats
   260  	MessageLogModel
   261  }
   262  
   263  type MetricsLogModelStats struct {
   264  	LogModelStats
   265  	MetricsLogModel
   266  }
   267  
   268  type UnknownLogModelStats struct {
   269  	LogModelStats
   270  	UnknownLogModel
   271  }
   272  
   273  func (a MessageLogModelStats) NumLogs() uint {
   274  	return a.Count
   275  }
   276  
   277  func (a MetricsLogModelStats) NumLogs() uint {
   278  	return a.Count
   279  }
   280  
   281  func (a UnknownLogModelStats) NumLogs() uint {
   282  	return a.Count
   283  }
   284  
   285  func (a *MessageLogModelStats) Print(printStructure, printExample bool) {
   286  	a.MessageLogModel.Print(printStructure, printExample)
   287  }
   288  
   289  func (a *MetricsLogModelStats) Print(printStructure, printExample bool) {
   290  	a.MetricsLogModel.Print(printStructure, printExample)
   291  }
   292  
   293  func (a *UnknownLogModelStats) Print(printExample bool) {
   294  	a.UnknownLogModel.Print(true, printExample)
   295  }
   296  
   297  func safeDivide(a, b float64) float64 {
   298  	if b != 0 {
   299  		return a / b
   300  	}
   301  	return 0
   302  }
   303  
   304  func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
   305  	a.Print(printStructure, printExample)
   306  	fmt.Printf("Count: %d of %d\n", a.Count, count)
   307  	fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
   308  	fmt.Printf("\n")
   309  }
   310  
   311  func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
   312  	a.Print(printStructure, printExample)
   313  	fmt.Printf("Count: %d of %d\n", a.Count, count)
   314  	fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
   315  	fmt.Printf("\n")
   316  }
   317  
   318  func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) {
   319  	a.Print(printExample)
   320  	fmt.Printf("Count: %d of %d\n", a.Count, count)
   321  	fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
   322  	fmt.Printf("\n")
   323  }
   324  
   325  // Log type stats
   326  // Aggregate log models by log type
   327  
   328  type LogTypeStats struct {
   329  	Count uint
   330  }
   331  
   332  type MessageLogStats struct {
   333  	LogTypeStats
   334  	modelStats map[MessageLogKey]*MessageLogModelStats
   335  }
   336  
   337  type MetricsLogStats struct {
   338  	LogTypeStats
   339  	modelStats map[MetricsLogEventName]*MetricsLogModelStats
   340  }
   341  
   342  type UnknownLogStats struct {
   343  	LogTypeStats
   344  	modelStats []UnknownLogModelStats
   345  }
   346  
   347  func (a *MessageLogStats) Print() {
   348  	for _, v := range a.Sort() {
   349  		v.PrintWithRelativePercent(a.Count, false, false)
   350  	}
   351  }
   352  
   353  func (a *MetricsLogStats) Print() {
   354  	for _, v := range a.Sort() {
   355  		v.PrintWithRelativePercent(a.Count, false, false)
   356  	}
   357  }
   358  
   359  func (a *UnknownLogStats) Print() {
   360  	for _, v := range a.Sort() {
   361  		v.PrintWithRelativePercent(a.Count, true)
   362  	}
   363  }
   364  
   365  func (a *MessageLogStats) Sort() []MessageLogModelStats {
   366  	var s []MessageLogModelStats
   367  	for _, v := range a.modelStats {
   368  		if v != nil {
   369  			s = append(s, *v)
   370  		}
   371  	}
   372  
   373  	sort.Slice(s, func(i, j int) bool {
   374  		return s[j].Count > s[i].Count
   375  	})
   376  
   377  	return s
   378  }
   379  
   380  func (a *MetricsLogStats) Sort() []MetricsLogModelStats {
   381  	var s []MetricsLogModelStats
   382  	for _, v := range a.modelStats {
   383  		if v != nil {
   384  			s = append(s, *v)
   385  		}
   386  	}
   387  
   388  	sort.Slice(s, func(i, j int) bool {
   389  		return s[j].Count > s[i].Count
   390  	})
   391  	return s
   392  }
   393  
   394  func (a *UnknownLogStats) Sort() []UnknownLogModelStats {
   395  	var s []UnknownLogModelStats
   396  	s = append(s, a.modelStats...)
   397  
   398  	sort.Slice(s, func(i, j int) bool {
   399  		return s[j].Count > s[i].Count
   400  	})
   401  	return s
   402  }
   403  
   404  // Log file stats
   405  
   406  type LogStats struct {
   407  	MessageLogModels MessageLogStats
   408  	MetricsLogModels MetricsLogStats
   409  	UnknownLogModels UnknownLogStats
   410  }
   411  
   412  // NewLogStats initializes a new LogStats structure.
   413  func NewLogStats() (l *LogStats) {
   414  	l = &LogStats{
   415  		MessageLogModels: MessageLogStats{
   416  			modelStats: make(map[MessageLogKey]*MessageLogModelStats),
   417  		},
   418  		MetricsLogModels: MetricsLogStats{
   419  			modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats),
   420  		},
   421  		UnknownLogModels: UnknownLogStats{
   422  			modelStats: nil,
   423  		},
   424  	}
   425  
   426  	return l
   427  }
   428  
   429  func NewLogStatsFromFiles(files []string) (l *LogStats, err error) {
   430  	l = NewLogStats()
   431  
   432  	for _, file := range files {
   433  		err = l.ParseFile(file)
   434  		if err != nil {
   435  			return nil, err
   436  		}
   437  	}
   438  
   439  	return l, nil
   440  }
   441  
   442  // ParseFile takes a psiphond log file as input, parses the log lines into log
   443  // models and updates the LogStats structure.
   444  func (l *LogStats) ParseFile(filename string) error {
   445  	file, err := os.Open(filename)
   446  	if err != nil {
   447  		return err
   448  	}
   449  	defer file.Close()
   450  
   451  	scanner := bufio.NewScanner(file)
   452  	for scanner.Scan() {
   453  		err := l.ParseLogLine(scanner.Text())
   454  		if err != nil {
   455  			return err
   456  		}
   457  	}
   458  
   459  	if err := scanner.Err(); err != nil {
   460  		return err
   461  	}
   462  
   463  	return nil
   464  }
   465  
   466  // ParseLogLine attempts to parse a log line into a log model and then updates the
   467  // LogStats structure.
   468  func (l *LogStats) ParseLogLine(log string) error {
   469  	MessageLogModels := &l.MessageLogModels
   470  	MetricsLogModels := &l.MetricsLogModels
   471  
   472  	logModel, err := parseLogModel(log)
   473  	if err != nil {
   474  		return err
   475  	}
   476  
   477  	switch v := logModel.(type) {
   478  	case *MessageLogModel:
   479  		MessageLogModels.Count += 1
   480  
   481  		if m, ok := MessageLogModels.modelStats[v.key()]; ok {
   482  			m.Count += 1
   483  		} else {
   484  			MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v}
   485  		}
   486  	case *MetricsLogModel:
   487  		l.MetricsLogModels.Count += 1
   488  		if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok {
   489  			m.Count += 1
   490  		} else {
   491  			MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v}
   492  		}
   493  	case *UnknownLogModel:
   494  		l.UnknownLogModels.Count += 1
   495  		found := false
   496  		for i := range l.UnknownLogModels.modelStats {
   497  			if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) {
   498  				l.UnknownLogModels.modelStats[i].Count += 1
   499  				found = true
   500  				break
   501  			}
   502  		}
   503  		if !found {
   504  			l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v})
   505  		}
   506  	default:
   507  		return fmt.Errorf("unexpected model type of %v", reflect.TypeOf(v))
   508  	}
   509  
   510  	return nil
   511  }
   512  
   513  func redactIpAddressesAndPorts(a string) string {
   514  	ipAddressWithOptionalPort := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
   515  	return ipAddressWithOptionalPort.ReplaceAllString(a, "<redacted>")
   516  }
   517  
   518  // parseLogModel attempts to parse a string into a log model. It is expected
   519  // that the provided string is valid JSON.
   520  func parseLogModel(s string) (LogModel, error) {
   521  	var m LogFields
   522  	err := json.Unmarshal([]byte(s), &m)
   523  	if err != nil {
   524  		return nil, fmt.Errorf("failed to parse log line into JSON: %s", err)
   525  	}
   526  
   527  	var l LogModel
   528  	var b BaseLogModel
   529  	b.Example = s
   530  	b.Node = parseNode(&m)
   531  
   532  	if m["event_name"] != nil {
   533  		l = &MetricsLogModel{
   534  			BaseLogModel: b,
   535  			Event:        MetricsLogEventName(m["event_name"].(string)),
   536  		}
   537  	} else if m["msg"] != nil && m["level"] != nil {
   538  		var level LogLevel
   539  		switch m["level"].(string) {
   540  		case "debug":
   541  			level = LOG_LEVEL_DEBUG
   542  		case "info":
   543  			level = LOG_LEVEL_INFO
   544  		case "warning":
   545  			level = LOG_LEVEL_WARNING
   546  		case "error":
   547  			level = LOG_LEVEL_ERROR
   548  		default:
   549  			return nil, fmt.Errorf("unexpected log level: %s", m["level"].(string))
   550  		}
   551  
   552  		var context *MessageLogContext
   553  		var err *MessageLogError
   554  
   555  		if val, ok := m["context"]; ok {
   556  			c := MessageLogContext(val.(string))
   557  			context = &c
   558  		}
   559  
   560  		if val, ok := m["error"]; ok {
   561  			errorWithIpsRedacted := redactIpAddressesAndPorts(val.(string))
   562  			e := MessageLogError(errorWithIpsRedacted)
   563  			err = &e
   564  		}
   565  
   566  		l = &MessageLogModel{
   567  			BaseLogModel:      b,
   568  			Msg:               MessageLogName(m["msg"].(string)),
   569  			Level:             level,
   570  			MessageLogContext: context,
   571  			MessageLogError:   err,
   572  		}
   573  	} else {
   574  		l = &UnknownLogModel{
   575  			BaseLogModel: b,
   576  		}
   577  	}
   578  
   579  	return l, nil
   580  }
   581  
   582  // parseNode takes a map and transforms it into a graph which represents its
   583  // structure.
   584  func parseNode(m *LogFields) node {
   585  	n := make(node)
   586  	for k, v := range *m {
   587  		i := parseInterface(v)
   588  		n[k] = i
   589  	}
   590  	return n
   591  }
   592  
   593  // parseInterface takes an interface and transforms it into a graph of its
   594  // structure.
   595  func parseInterface(i interface{}) interface{} {
   596  	switch v := i.(type) {
   597  	default:
   598  		return nil
   599  	case map[string]interface{}:
   600  		l := LogFields(v)
   601  		return parseNode(&l)
   602  	case []interface{}:
   603  		n := make([]node, 1)
   604  		for i := range v {
   605  			switch p := parseInterface(v[i]).(type) {
   606  			case node:
   607  				n = append(n, p)
   608  			}
   609  		}
   610  		return n
   611  	}
   612  }
   613  
   614  // sortLogModelsDescending merges all log models of different types and then
   615  // sorts them in ascending order by the number times each occurs. Returns the
   616  // sorted list and the total number of logs represented by each log model in
   617  // the list.
   618  func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) {
   619  	var messagesSort []MessageLogModelStats
   620  	var metricsSort []MetricsLogModelStats
   621  	var unknownSort []UnknownLogModelStats
   622  
   623  	if messages {
   624  		messagesSort = l.MessageLogModels.Sort()
   625  		messages := make([]interface{}, len(messagesSort))
   626  		for i, v := range messagesSort {
   627  			messages[i] = v
   628  		}
   629  		models = append(models, messages...)
   630  		numLogs += l.MessageLogModels.Count
   631  	}
   632  
   633  	if metrics {
   634  		metricsSort = l.MetricsLogModels.Sort()
   635  		metrics := make([]interface{}, len(metricsSort))
   636  		for i, v := range metricsSort {
   637  			metrics[i] = v
   638  		}
   639  		models = append(models, metrics...)
   640  		numLogs += l.MetricsLogModels.Count
   641  	}
   642  
   643  	if unknown {
   644  		unknownSort = l.UnknownLogModels.Sort()
   645  		unknown := make([]interface{}, len(unknownSort))
   646  		for i, v := range unknownSort {
   647  			unknown[i] = v
   648  		}
   649  		models = append(models, unknown...)
   650  		numLogs += l.UnknownLogModels.Count
   651  	}
   652  
   653  	sort.Slice(models, func(i, j int) bool {
   654  		a := models[i].(LogModelStatsMetrics)
   655  		b := models[j].(LogModelStatsMetrics)
   656  
   657  		return b.NumLogs() > a.NumLogs()
   658  	})
   659  
   660  	return models, numLogs
   661  }
   662  
   663  // NumDistinctLogs returns the number of unique log models contained within the
   664  // LogStats structure.
   665  func (l *LogStats) NumDistinctLogs() uint {
   666  	return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats))
   667  }
   668  
   669  func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) {
   670  	logs, numLogs := l.SortLogModels(messages, metrics, unknown)
   671  
   672  	for _, x := range logs {
   673  		switch v := x.(type) {
   674  		case MessageLogModelStats:
   675  			v.PrintWithRelativePercent(numLogs, printStructure, printExample)
   676  		case MetricsLogModelStats:
   677  			v.PrintWithRelativePercent(numLogs, printStructure, printExample)
   678  		case UnknownLogModelStats:
   679  			v.PrintWithRelativePercent(numLogs, printExample)
   680  		}
   681  	}
   682  }