github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/logline/parser_test.go (about) 1 package logline 2 3 import ( 4 "bufio" 5 "fmt" 6 "net/http" 7 "os" 8 "testing" 9 10 "github.com/stretchr/testify/assert" 11 ) 12 13 func TestParse(t *testing.T) { 14 // 1. 样本与模式对照书写,模式中的#对应的样本字符为锚定符 15 // 2. 需要捕获锚定符之间的值时,给定一个标识符(例如ip,time),如果不需要取值则使用空格略过 16 // 3. 值名称为time时表示日期时间,对应的样本中的时间值,要修改成golang的时间格式(layout),参见 https://golang.org/src/time/format.go 17 // 4. 竖线表示过滤器,目前仅支持path过滤器,就是从uri(带query)中取出path(不带query) 18 // 5. 捕获标识符对应的样本值为整数时会解析成int类型,为小数时会解析成float64类型 19 20 // pattern="%h %l %u %t %r %s %b %S %D %T %F %{Referer}i %{X-Forwarded-For}i %{User-Agent}i %{X-Real-IP}i" 21 const samplee = `127.0.0.1 - - [02/Jan/2006:15:04:05 -0700] GET /path?indent=true HTTP/1.1 200 41824 - 8 0.008 6 - - Nginx/1.1` 22 const pattern = `ip # # ##time ##method#uri|path # #code#bytesSent#-#millis#seconds#` 23 24 p, err := NewPattern(samplee, pattern) 25 assert.Nil(t, err) 26 27 line := `192.158.77.11 - - [26/May/2021:18:55:45 +0800] GET /solr/licenseIndex/select?indent=true&5-26T10rows=2500&sort=id+asc&start=0&wt=json HTTP/1.1 200 41824 - 8 0.008 6 - - Go-http-client/1.1` 28 29 m, ok := p.Parse(line) 30 assert.True(t, ok) 31 32 tt, _ := TimeValue(`02/Jan/2006:15:04:05 -0700`).Convert("26/May/2021:18:55:45 +0800") 33 assert.Equal(t, map[string]interface{}{ 34 "ip": "192.158.77.11", 35 "time": tt, 36 "method": http.MethodGet, 37 "uri": "/solr/licenseIndex/select", 38 "code": 200, 39 "bytesSent": 41824, 40 "millis": 8, 41 "seconds": 0.008, 42 }, m) 43 44 // pattern: '%h %l %u %t "%r" %s %b "%{Referer}i" "%{User-Agent}i" %D' 45 logSamplee := "10.1.6.1 - - [02/Jan/2006:15:04:05 -0700] !HEAD / HTTP/1.0! 200 94 !-! !-! 0" 46 logPattern := "ip # # #time # #method#path|path# ##code#bytesSent## # # ##millis" 47 p2, err2 := NewPattern(logSamplee, logPattern, WithReplace(`!`, `"`)) 48 assert.Nil(t, err2) 49 50 line = `10.16.26.21 - - [19/May/2021:00:00:13 +0800] "POST /upload1 HTTP/1.1" 200 94 "-" "Apache-HttpClient/4.5.1 (Java/1.8.0_74)" 42` 51 m2, ok2 := p2.Parse(line) 52 assert.True(t, ok2) 53 tt, _ = TimeValue(`02/Jan/2006:15:04:05 -0700`).Convert("19/May/2021:00:00:13 +0800") 54 assert.Equal(t, map[string]interface{}{ 55 "ip": "10.16.26.21", 56 "time": tt, 57 "method": "POST", 58 "path": "/upload1", 59 "code": 200, 60 "bytesSent": 94, 61 "millis": 42, 62 }, m2) 63 64 // https://qsli.github.io/2016/12/23/tomcat-access-log/ 65 // parseFile(`/Users/bingoobjca/Downloads/localhost_access_log2021-05-21.txt`, p) 66 // parseFile(`/Users/bingoobjca/Downloads/scaffold_access_log.2021-05-19.log`, p2) 67 } 68 69 func parseFile(file string, p *Pattern) { 70 f, _ := os.Open(file) 71 defer f.Close() 72 73 out, _ := os.OpenFile(file+".parsed", os.O_CREATE|os.O_WRONLY, os.ModePerm) 74 defer out.Close() 75 76 scanner := bufio.NewScanner(f) 77 78 lineNo := 0 79 for scanner.Scan() { 80 line := scanner.Bytes() 81 lineNo++ 82 m, ok := p.ParseBytes(line) 83 84 fmt.Fprintf(out, "%v, ok:%t\n", m, ok) 85 } 86 87 fmt.Printf("total lines: %d\n", lineNo) 88 }