github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/logline/parser_test.go (about)

     1  package logline
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"net/http"
     7  	"os"
     8  	"testing"
     9  
    10  	"github.com/stretchr/testify/assert"
    11  )
    12  
    13  func TestParse(t *testing.T) {
    14  	// 1. 样本与模式对照书写,模式中的#对应的样本字符为锚定符
    15  	// 2. 需要捕获锚定符之间的值时,给定一个标识符(例如ip,time),如果不需要取值则使用空格略过
    16  	// 3. 值名称为time时表示日期时间,对应的样本中的时间值,要修改成golang的时间格式(layout),参见 https://golang.org/src/time/format.go
    17  	// 4. 竖线表示过滤器,目前仅支持path过滤器,就是从uri(带query)中取出path(不带query)
    18  	// 5. 捕获标识符对应的样本值为整数时会解析成int类型,为小数时会解析成float64类型
    19  
    20  	// pattern="%h %l %u %t %r %s %b %S %D %T %F %{Referer}i %{X-Forwarded-For}i %{User-Agent}i %{X-Real-IP}i"
    21  	const samplee = `127.0.0.1 - - [02/Jan/2006:15:04:05 -0700] GET    /path?indent=true HTTP/1.1 200  41824     - 8      0.008   6 - - Nginx/1.1`
    22  	const pattern = `ip       # # ##time                      ##method#uri|path         #        #code#bytesSent#-#millis#seconds#`
    23  
    24  	p, err := NewPattern(samplee, pattern)
    25  	assert.Nil(t, err)
    26  
    27  	line := `192.158.77.11 - - [26/May/2021:18:55:45 +0800] GET /solr/licenseIndex/select?indent=true&5-26T10rows=2500&sort=id+asc&start=0&wt=json HTTP/1.1 200 41824 - 8 0.008 6 - - Go-http-client/1.1`
    28  
    29  	m, ok := p.Parse(line)
    30  	assert.True(t, ok)
    31  
    32  	tt, _ := TimeValue(`02/Jan/2006:15:04:05 -0700`).Convert("26/May/2021:18:55:45 +0800")
    33  	assert.Equal(t, map[string]interface{}{
    34  		"ip":        "192.158.77.11",
    35  		"time":      tt,
    36  		"method":    http.MethodGet,
    37  		"uri":       "/solr/licenseIndex/select",
    38  		"code":      200,
    39  		"bytesSent": 41824,
    40  		"millis":    8,
    41  		"seconds":   0.008,
    42  	}, m)
    43  
    44  	// pattern: '%h %l %u %t "%r" %s %b "%{Referer}i" "%{User-Agent}i" %D'
    45  	logSamplee := "10.1.6.1 - - [02/Jan/2006:15:04:05 -0700] !HEAD   /         HTTP/1.0! 200  94        !-! !-! 0"
    46  	logPattern := "ip      # #  #time                      # #method#path|path#        ##code#bytesSent## # # ##millis"
    47  	p2, err2 := NewPattern(logSamplee, logPattern, WithReplace(`!`, `"`))
    48  	assert.Nil(t, err2)
    49  
    50  	line = `10.16.26.21 - - [19/May/2021:00:00:13 +0800] "POST /upload1 HTTP/1.1" 200 94 "-" "Apache-HttpClient/4.5.1 (Java/1.8.0_74)" 42`
    51  	m2, ok2 := p2.Parse(line)
    52  	assert.True(t, ok2)
    53  	tt, _ = TimeValue(`02/Jan/2006:15:04:05 -0700`).Convert("19/May/2021:00:00:13 +0800")
    54  	assert.Equal(t, map[string]interface{}{
    55  		"ip":        "10.16.26.21",
    56  		"time":      tt,
    57  		"method":    "POST",
    58  		"path":      "/upload1",
    59  		"code":      200,
    60  		"bytesSent": 94,
    61  		"millis":    42,
    62  	}, m2)
    63  
    64  	// https://qsli.github.io/2016/12/23/tomcat-access-log/
    65  	// parseFile(`/Users/bingoobjca/Downloads/localhost_access_log2021-05-21.txt`, p)
    66  	// parseFile(`/Users/bingoobjca/Downloads/scaffold_access_log.2021-05-19.log`, p2)
    67  }
    68  
    69  func parseFile(file string, p *Pattern) {
    70  	f, _ := os.Open(file)
    71  	defer f.Close()
    72  
    73  	out, _ := os.OpenFile(file+".parsed", os.O_CREATE|os.O_WRONLY, os.ModePerm)
    74  	defer out.Close()
    75  
    76  	scanner := bufio.NewScanner(f)
    77  
    78  	lineNo := 0
    79  	for scanner.Scan() {
    80  		line := scanner.Bytes()
    81  		lineNo++
    82  		m, ok := p.ParseBytes(line)
    83  
    84  		fmt.Fprintf(out, "%v, ok:%t\n", m, ok)
    85  	}
    86  
    87  	fmt.Printf("total lines: %d\n", lineNo)
    88  }