github.com/GuanceCloud/cliutils@v1.1.21/pipeline/ptinput/funcs/handle.go (about)

     1  // Unless explicitly stated otherwise all files in this repository are licensed
     2  // under the MIT License.
     3  // This product includes software developed at Guance Cloud (https://www.guance.com/).
     4  // Copyright 2021-present Guance, Inc.
     5  
     6  package funcs
     7  
     8  import (
     9  	"fmt"
    10  	"net/url"
    11  	"reflect"
    12  	"regexp"
    13  	"time"
    14  
    15  	"github.com/GuanceCloud/cliutils/pipeline/ptinput/ipdb"
    16  	"github.com/GuanceCloud/platypus/pkg/ast"
    17  	"github.com/araddon/dateparse"
    18  	"github.com/mssola/user_agent"
    19  	conv "github.com/spf13/cast"
    20  	"github.com/tidwall/gjson"
    21  )
    22  
    23  var datePattern = func() []struct {
    24  	desc        string
    25  	pattern     *regexp.Regexp
    26  	goFmt       string
    27  	defaultYear bool
    28  } {
    29  	dataPatternSource := []struct {
    30  		desc        string
    31  		pattern     string
    32  		goFmt       string
    33  		defaultYear bool
    34  	}{
    35  		{
    36  			desc:    "nginx log datetime, 02/Jan/2006:15:04:05 -0700",
    37  			pattern: `\d{2}/\w+/\d{4}:\d{2}:\d{2}:\d{2} \+\d{4}`,
    38  			goFmt:   "02/Jan/2006:15:04:05 -0700",
    39  		},
    40  		{
    41  			desc:    "redis log datetime, 14 May 2019 19:11:40.164",
    42  			pattern: `\d{2} \w+ \d{4} \d{2}:\d{2}:\d{2}.\d{3}`,
    43  			goFmt:   "02 Jan 2006 15:04:05.000",
    44  		},
    45  		{
    46  			desc:        "redis log datetime, 14 May 19:11:40.164",
    47  			pattern:     `\d{2} \w+ \d{2}:\d{2}:\d{2}.\d{3}`,
    48  			goFmt:       "02 Jan 15:04:05.000 2006",
    49  			defaultYear: true,
    50  		},
    51  		{
    52  			desc:    "mysql, 171113 14:14:20",
    53  			pattern: `\d{6} \d{2}:\d{2}:\d{2}`,
    54  			goFmt:   "060102 15:04:05",
    55  		},
    56  
    57  		{
    58  			desc:    "gin, 2021/02/27 - 14:14:20",
    59  			pattern: `\d{4}/\d{2}/\d{2} - \d{2}:\d{2}:\d{2}`,
    60  			goFmt:   "2006/01/02 - 15:04:05",
    61  		},
    62  		{
    63  			desc:    "apache,  Tue May 18 06:25:05.176170 2021",
    64  			pattern: `\w+ \w+ \d{2} \d{2}:\d{2}:\d{2}.\d{6} \d{4}`,
    65  			goFmt:   "Mon Jan 2 15:04:05.000000 2006",
    66  		},
    67  		{
    68  			desc:    "postgresql, 2021-05-27 06:54:14.760 UTC",
    69  			pattern: `\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3} UTC`,
    70  			goFmt:   "2006-01-02 15:04:05.000 UTC",
    71  		},
    72  	}
    73  
    74  	dst := []struct {
    75  		desc        string
    76  		pattern     *regexp.Regexp
    77  		goFmt       string
    78  		defaultYear bool
    79  	}{}
    80  
    81  	for _, p := range dataPatternSource {
    82  		if c, err := regexp.Compile(p.pattern); err != nil {
    83  			l.Errorf("Compile `%s` failed!", p.goFmt)
    84  		} else {
    85  			dst = append(dst, struct {
    86  				desc        string
    87  				pattern     *regexp.Regexp
    88  				goFmt       string
    89  				defaultYear bool
    90  			}{
    91  				desc:        p.desc,
    92  				pattern:     c,
    93  				goFmt:       p.goFmt,
    94  				defaultYear: p.defaultYear,
    95  			})
    96  		}
    97  	}
    98  	return dst
    99  }()
   100  
   101  func UrldecodeHandle(path string) (string, error) {
   102  	params, err := url.QueryUnescape(path)
   103  	if err != nil {
   104  		return "", err
   105  	}
   106  
   107  	return params, nil
   108  }
   109  
   110  func UserAgentHandle(str string) (map[string]interface{}, map[string]ast.DType) {
   111  	res := make(map[string]interface{})
   112  	retType := make(map[string]ast.DType)
   113  
   114  	ua := user_agent.New(str)
   115  
   116  	res["isMobile"] = ua.Mobile()
   117  	retType["isMobile"] = ast.Bool
   118  
   119  	res["isBot"] = ua.Bot()
   120  	retType["isBot"] = ast.Bool
   121  
   122  	res["os"] = ua.OS()
   123  	retType["os"] = ast.String
   124  
   125  	name, version := ua.Browser()
   126  	res["browser"] = name
   127  	retType["browser"] = ast.String
   128  
   129  	res["browserVer"] = version
   130  	retType["browserVer"] = ast.String
   131  
   132  	en, v := ua.Engine()
   133  	res["engine"] = en
   134  	retType["engine"] = ast.String
   135  	res["engineVer"] = v
   136  	retType["engineVer"] = ast.String
   137  	res["ua"] = ua.Platform()
   138  	retType["ua"] = ast.String
   139  	return res, retType
   140  }
   141  
   142  func GeoIPHandle(db ipdb.IPdb, ip string) (map[string]string, error) {
   143  	if db == nil {
   144  		return nil, nil
   145  	}
   146  
   147  	record, err := db.Geo(ip)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	res := make(map[string]string)
   153  
   154  	res["city"] = record.City
   155  	res["province"] = record.Region
   156  	res["country"] = record.Country
   157  	res["isp"] = db.SearchIsp(ip)
   158  
   159  	return res, nil
   160  }
   161  
   162  func DateFormatHandle(t *time.Time, fmts string) (string, error) {
   163  	if timefmt, ok := dateFormatStr[fmts]; ok {
   164  		return t.Format(timefmt), nil
   165  	}
   166  	return "", fmt.Errorf("format pattern %v no support", fmts)
   167  }
   168  
   169  func GroupHandle(value interface{}, start, end float64) bool {
   170  	num := conv.ToFloat64(value)
   171  
   172  	if num >= start && num <= end {
   173  		return true
   174  	}
   175  
   176  	return false
   177  }
   178  
   179  func GroupInHandle(value interface{}, set []interface{}) bool {
   180  	for _, val := range set {
   181  		if reflect.DeepEqual(value, val) {
   182  			return true
   183  		}
   184  	}
   185  
   186  	return false
   187  }
   188  
   189  func parseDatePattern(value string, loc *time.Location) (int64, error) {
   190  	valueCpy := value
   191  	for _, p := range datePattern {
   192  		if p.defaultYear {
   193  			ty := time.Now()
   194  			year := ty.Year()
   195  			value = fmt.Sprintf("%s %d", value, year)
   196  		} else {
   197  			value = valueCpy
   198  		}
   199  
   200  		// 默认定义的规则能匹配,不匹配的则由 dataparse 处理
   201  		if tm, err := time.ParseInLocation(p.goFmt, value, loc); err != nil {
   202  			continue
   203  		} else {
   204  			unixTime := tm.UnixNano()
   205  			return unixTime, nil
   206  		}
   207  	}
   208  	return 0, fmt.Errorf("no match")
   209  }
   210  
   211  func TimestampHandle(value, tz string) (int64, error) {
   212  	var t time.Time
   213  	var err error
   214  	timezone := time.Local
   215  
   216  	if tz != "" {
   217  		// parse timezone as +x or -x
   218  		if tz[0] == '+' || tz[0] == '-' {
   219  			if _, has := timezoneList[tz]; !has {
   220  				return 0, fmt.Errorf("fail to parse timezone %s", tz)
   221  			}
   222  			tz = timezoneList[tz]
   223  		}
   224  		if timezone, err = time.LoadLocation(tz); err != nil {
   225  			return 0, err
   226  		}
   227  	}
   228  
   229  	// pattern match first
   230  	unixTime, err := parseDatePattern(value, timezone)
   231  
   232  	if unixTime > 0 && err == nil {
   233  		return unixTime, nil
   234  	}
   235  
   236  	if t, err = dateparse.ParseIn(value, timezone); err != nil {
   237  		return 0, err
   238  	}
   239  
   240  	// l.Debugf("parse `%s' -> %v(nano: %d)", value, t, t.UnixNano())
   241  
   242  	return t.UnixNano(), nil
   243  }
   244  
   245  var dateFormatStr = map[string]string{
   246  	"ANSIC":       time.ANSIC,
   247  	"UnixDate":    time.UnixDate,
   248  	"RubyDate":    time.RubyDate,
   249  	"RFC822":      time.RFC822,
   250  	"RFC822Z":     time.RFC822Z,
   251  	"RFC850":      time.RFC850,
   252  	"RFC1123":     time.RFC1123,
   253  	"RFC1123Z":    time.RFC1123Z,
   254  	"RFC3339":     time.RFC3339,
   255  	"RFC3339Nano": time.RFC3339Nano,
   256  	"Kitchen":     time.Kitchen,
   257  	"Stamp":       time.Stamp,
   258  	"StampMilli":  time.StampMilli,
   259  	"StampMicro":  time.StampMicro,
   260  	"StampNano":   time.StampNano,
   261  }
   262  
   263  func datetimeInnerFormat(fmt string) bool {
   264  	if _, ok := dateFormatStr[fmt]; ok {
   265  		return ok
   266  	}
   267  	return false
   268  }
   269  
   270  func JSONParse(jsonStr string) map[string]interface{} {
   271  	res := make(map[string]interface{})
   272  	jsonObj := gjson.Parse(jsonStr)
   273  
   274  	if isObject(jsonObj) {
   275  		parseJSON2Map(jsonObj, res, "")
   276  	} else if isArray(jsonObj) {
   277  		for idx, obj := range jsonObj.Array() {
   278  			key := fmt.Sprintf("[%d]", idx)
   279  			parseJSON2Map(obj, res, key)
   280  		}
   281  	}
   282  
   283  	return res
   284  }
   285  
   286  func parseJSON2Map(obj gjson.Result, res map[string]interface{}, prefix string) {
   287  	if isObject(obj) {
   288  		for key, value := range obj.Map() {
   289  			if prefix != "" {
   290  				key = prefix + "." + key
   291  			}
   292  
   293  			switch {
   294  			case isObject(value):
   295  				parseJSON2Map(value, res, key)
   296  			case isArray(value):
   297  				for idx, v := range value.Array() {
   298  					fullkey := key + "[" + fmt.Sprintf("%d", idx) + "]"
   299  					parseJSON2Map(v, res, fullkey)
   300  				}
   301  			default:
   302  				res[key] = value.Value()
   303  				continue
   304  			}
   305  		}
   306  	} else {
   307  		res[prefix] = obj.Value()
   308  	}
   309  }
   310  
   311  func isObject(obj gjson.Result) bool {
   312  	return obj.IsObject()
   313  }
   314  
   315  func isArray(obj gjson.Result) bool {
   316  	return obj.IsArray()
   317  }
   318  
   319  var monthMaps = map[string]time.Month{
   320  	"january":   time.January,
   321  	"february":  time.February,
   322  	"march":     time.March,
   323  	"april":     time.April,
   324  	"june":      time.June,
   325  	"july":      time.July,
   326  	"august":    time.August,
   327  	"september": time.September,
   328  	"october":   time.October,
   329  	"november":  time.November,
   330  	"december":  time.December,
   331  
   332  	"jan": time.January,
   333  	"feb": time.February,
   334  	"mar": time.March,
   335  	"apr": time.April,
   336  	"may": time.May,
   337  	"jun": time.June,
   338  	"jul": time.July,
   339  	"aug": time.August,
   340  	"sep": time.September,
   341  	"oct": time.October,
   342  	"nov": time.November,
   343  	"dec": time.December,
   344  }
   345  
   346  var timezoneList = map[string]string{
   347  	"-11":    "Pacific/Midway",
   348  	"-10":    "Pacific/Honolulu",
   349  	"-9:30":  "Pacific/Marquesas",
   350  	"-9":     "America/Anchorage",
   351  	"-8":     "America/Los_Angeles",
   352  	"-7":     "America/Phoenix",
   353  	"-6":     "America/Chicago",
   354  	"-5":     "America/New_York",
   355  	"-4":     "America/Santiago",
   356  	"-3:30":  "America/St_Johns",
   357  	"-3":     "America/Sao_Paulo",
   358  	"-2":     "America/Noronha",
   359  	"-1":     "America/Scoresbysund",
   360  	"+0":     "Europe/London",
   361  	"+1":     "Europe/Vatican",
   362  	"+2":     "Europe/Kiev",
   363  	"+3":     "Europe/Moscow",
   364  	"+3:30":  "Asia/Tehran",
   365  	"+4":     "Asia/Dubai",
   366  	"+4:30":  "Asia/Kabul",
   367  	"+5":     "Asia/Samarkand",
   368  	"+5:30":  "Asia/Kolkata",
   369  	"+5:45":  "Asia/Kathmandu",
   370  	"+6":     "Asia/Almaty",
   371  	"+6:30":  "Asia/Yangon",
   372  	"+7":     "Asia/Jakarta",
   373  	"+8":     "Asia/Shanghai",
   374  	"+8:45":  "Australia/Eucla",
   375  	"+9":     "Asia/Tokyo",
   376  	"+9:30":  "Australia/Darwin",
   377  	"+10":    "Australia/Sydney",
   378  	"+10:30": "Australia/Lord_Howe",
   379  	"+11":    "Pacific/Guadalcanal",
   380  	"+12":    "Pacific/Auckland",
   381  	"+12:45": "Pacific/Chatham",
   382  	"+13":    "Pacific/Apia",
   383  	"+14":    "Pacific/Kiritimati",
   384  
   385  	"CST": "Asia/Shanghai",
   386  	"UTC": "Europe/London",
   387  	// TODO: add more...
   388  }