github.com/netdata/go.d.plugin@v0.58.1/modules/squidlog/logline.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package squidlog
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  )
    12  
    13  // https://wiki.squid-cache.org/Features/LogFormat
    14  // http://www.squid-cache.org/Doc/config/logformat/
    15  // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes
    16  // https://www.websense.com/content/support/library/web/v773/wcg_help/squid.aspx
    17  
    18  /*
    19  4.6.1:
    20  logformat squid      %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %[un %Sh/%<a %mt
    21  logformat common     %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
    22  logformat combined   %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
    23  logformat referrer   %ts.%03tu %>a %{Referer}>h %ru
    24  logformat useragent  %>a [%tl] "%{User-Agent}>h"
    25  logformat icap_squid %ts.%03tu %6icap::tr %>A %icap::to/%03icap::Hs %icap::<st %icap::rm %icap::ru %un -/%icap::<A -
    26  */
    27  
    28  /*
    29  Valid Capture Name: [A-Za-z0-9_]+
    30  // TODO: namings
    31  
    32  | local                   | squid format code | description                                                            |
    33  |-------------------------|-------------------|------------------------------------------------------------------------|
    34  | resp_time               | %tr               | Response time (milliseconds).
    35  | client_address          | %>a               | Client source IP address.
    36  | client_address          | %>A               | Client FQDN.
    37  | cache_code              | %Ss               | Squid request status (TCP_MISS etc).
    38  | http_code               | %>Hs              | The HTTP response status code from Content Gateway to client.
    39  | resp_size               | %<st              | Total size of reply sent to client (after adaptation).
    40  | req_method              | %rm               | Request method (GET/POST etc).
    41  | hier_code               | %Sh               | Squid hierarchy status (DEFAULT_PARENT etc).
    42  | server_address          | %<a               | Server IP address of the last server or peer connection.
    43  | server_address          | %<A               | Server FQDN or peer name.
    44  | mime_type               | %mt               | MIME content type.
    45  
    46  // Following needed to make default log format csv parsable
    47  | result_code             | %Ss/%03>Hs        | cache code and http code.
    48  | hierarchy               | %Sh/%<a           | hierarchy code and server address.
    49  
    50  Notes:
    51  - %<a: older versions of Squid would put the origin server hostname here.
    52  */
    53  
    54  var (
    55  	errEmptyLine     = errors.New("empty line")
    56  	errBadRespTime   = errors.New("bad response time")
    57  	errBadClientAddr = errors.New("bad client address")
    58  	errBadCacheCode  = errors.New("bad cache code")
    59  	errBadHTTPCode   = errors.New("bad http code")
    60  	errBadRespSize   = errors.New("bad response size")
    61  	errBadReqMethod  = errors.New("bad request method")
    62  	errBadHierCode   = errors.New("bad hier code")
    63  	errBadServerAddr = errors.New("bad server address")
    64  	errBadMimeType   = errors.New("bad mime type")
    65  	errBadResultCode = errors.New("bad result code")
    66  	errBadHierarchy  = errors.New("bad hierarchy")
    67  )
    68  
    69  func newEmptyLogLine() *logLine {
    70  	var l logLine
    71  	l.reset()
    72  	return &l
    73  }
    74  
    75  type (
    76  	logLine struct {
    77  		clientAddr string
    78  		serverAddr string
    79  
    80  		respTime int
    81  		respSize int
    82  		httpCode int
    83  
    84  		reqMethod string
    85  		mimeType  string
    86  
    87  		cacheCode string
    88  		hierCode  string
    89  	}
    90  )
    91  
    92  const (
    93  	fieldRespTime   = "resp_time"
    94  	fieldClientAddr = "client_address"
    95  	fieldCacheCode  = "cache_code"
    96  	fieldHTTPCode   = "http_code"
    97  	fieldRespSize   = "resp_size"
    98  	fieldReqMethod  = "req_method"
    99  	fieldHierCode   = "hier_code"
   100  	fieldServerAddr = "server_address"
   101  	fieldMimeType   = "mime_type"
   102  	fieldResultCode = "result_code"
   103  	fieldHierarchy  = "hierarchy"
   104  )
   105  
   106  func (l *logLine) Assign(field string, value string) (err error) {
   107  	if value == "" {
   108  		return
   109  	}
   110  
   111  	switch field {
   112  	case fieldRespTime:
   113  		err = l.assignRespTime(value)
   114  	case fieldClientAddr:
   115  		err = l.assignClientAddress(value)
   116  	case fieldCacheCode:
   117  		err = l.assignCacheCode(value)
   118  	case fieldHTTPCode:
   119  		err = l.assignHTTPCode(value)
   120  	case fieldRespSize:
   121  		err = l.assignRespSize(value)
   122  	case fieldReqMethod:
   123  		err = l.assignReqMethod(value)
   124  	case fieldHierCode:
   125  		err = l.assignHierCode(value)
   126  	case fieldMimeType:
   127  		err = l.assignMimeType(value)
   128  	case fieldServerAddr:
   129  		err = l.assignServerAddress(value)
   130  	case fieldResultCode:
   131  		err = l.assignResultCode(value)
   132  	case fieldHierarchy:
   133  		err = l.assignHierarchy(value)
   134  	}
   135  	return err
   136  }
   137  
   138  const hyphen = "-"
   139  
   140  func (l *logLine) assignRespTime(time string) error {
   141  	if time == hyphen {
   142  		return fmt.Errorf("assign '%s': %w", time, errBadRespTime)
   143  	}
   144  	v, err := strconv.Atoi(time)
   145  	if err != nil || !isRespTimeValid(v) {
   146  		return fmt.Errorf("assign '%s': %w", time, errBadRespTime)
   147  	}
   148  	l.respTime = v
   149  	return nil
   150  }
   151  
   152  func (l *logLine) assignClientAddress(address string) error {
   153  	if address == hyphen {
   154  		return fmt.Errorf("assign '%s': %w", address, errBadClientAddr)
   155  	}
   156  	l.clientAddr = address
   157  	return nil
   158  }
   159  
   160  func (l *logLine) assignCacheCode(code string) error {
   161  	if code == hyphen || !isCacheCodeValid(code) {
   162  		return fmt.Errorf("assign '%s': %w", code, errBadCacheCode)
   163  	}
   164  	l.cacheCode = code
   165  	return nil
   166  }
   167  
   168  func (l *logLine) assignHTTPCode(code string) error {
   169  	if code == hyphen {
   170  		return fmt.Errorf("assign '%s': %w", code, errBadHTTPCode)
   171  	}
   172  	v, err := strconv.Atoi(code)
   173  	if err != nil || !isHTTPCodeValid(v) {
   174  		return fmt.Errorf("assign '%s': %w", code, errBadHTTPCode)
   175  	}
   176  	l.httpCode = v
   177  	return nil
   178  }
   179  
   180  func (l *logLine) assignResultCode(code string) error {
   181  	i := strings.IndexByte(code, '/')
   182  	if i <= 0 {
   183  		return fmt.Errorf("assign '%s': %w", code, errBadResultCode)
   184  	}
   185  	if err := l.assignCacheCode(code[:i]); err != nil {
   186  		return err
   187  	}
   188  	return l.assignHTTPCode(code[i+1:])
   189  }
   190  
   191  func (l *logLine) assignRespSize(size string) error {
   192  	if size == hyphen {
   193  		return fmt.Errorf("assign '%s': %w", size, errBadRespSize)
   194  	}
   195  	v, err := strconv.Atoi(size)
   196  	if err != nil || !isRespSizeValid(v) {
   197  		return fmt.Errorf("assign '%s': %w", size, errBadRespSize)
   198  	}
   199  	l.respSize = v
   200  	return nil
   201  }
   202  
   203  func (l *logLine) assignReqMethod(method string) error {
   204  	if method == hyphen || !isReqMethodValid(method) {
   205  		return fmt.Errorf("assign '%s': %w", method, errBadReqMethod)
   206  	}
   207  	l.reqMethod = method
   208  	return nil
   209  }
   210  
   211  func (l *logLine) assignHierCode(code string) error {
   212  	if code == hyphen || !isHierCodeValid(code) {
   213  		return fmt.Errorf("assign '%s': %w", code, errBadHierCode)
   214  	}
   215  	l.hierCode = code
   216  	return nil
   217  }
   218  
   219  func (l *logLine) assignServerAddress(address string) error {
   220  	// Logged as "-" if there is no hierarchy information.
   221  	// For TCP HIT, TCP failures, cachemgr requests and all UDP requests, there is no hierarchy information.
   222  	if address == hyphen {
   223  		return nil
   224  	}
   225  	l.serverAddr = address
   226  	return nil
   227  }
   228  
   229  func (l *logLine) assignHierarchy(hierarchy string) error {
   230  	i := strings.IndexByte(hierarchy, '/')
   231  	if i <= 0 {
   232  		return fmt.Errorf("assign '%s': %w", hierarchy, errBadHierarchy)
   233  	}
   234  	if err := l.assignHierCode(hierarchy[:i]); err != nil {
   235  		return err
   236  	}
   237  	return l.assignServerAddress(hierarchy[i+1:])
   238  }
   239  
   240  func (l *logLine) assignMimeType(mime string) error {
   241  	// ICP exchanges usually don't have any content type, and thus are logged "-".
   242  	//Also, some weird replies have content types ":" or even empty ones.
   243  	if mime == hyphen || mime == ":" {
   244  		return nil
   245  	}
   246  	// format: type/subtype, type/subtype;parameter=value
   247  	i := strings.IndexByte(mime, '/')
   248  	if i <= 0 || !isMimeTypeValid(mime[:i]) {
   249  		return fmt.Errorf("assign '%s': %w", mime, errBadMimeType)
   250  	}
   251  	l.mimeType = mime[:i] // drop subtype
   252  	return nil
   253  }
   254  
   255  func (l logLine) verify() error {
   256  	if l.empty() {
   257  		return fmt.Errorf("verify: %w", errEmptyLine)
   258  	}
   259  	if l.hasRespTime() && !l.isRespTimeValid() {
   260  		return fmt.Errorf("verify '%d': %w", l.respTime, errBadRespTime)
   261  	}
   262  	if l.hasClientAddress() && !l.isClientAddressValid() {
   263  		return fmt.Errorf("verify '%s': %w", l.clientAddr, errBadClientAddr)
   264  	}
   265  	if l.hasCacheCode() && !l.isCacheCodeValid() {
   266  		return fmt.Errorf("verify '%s': %w", l.cacheCode, errBadCacheCode)
   267  	}
   268  	if l.hasHTTPCode() && !l.isHTTPCodeValid() {
   269  		return fmt.Errorf("verify '%d': %w", l.httpCode, errBadHTTPCode)
   270  	}
   271  	if l.hasRespSize() && !l.isRespSizeValid() {
   272  		return fmt.Errorf("verify '%d': %w", l.respSize, errBadRespSize)
   273  	}
   274  	if l.hasReqMethod() && !l.isReqMethodValid() {
   275  		return fmt.Errorf("verify '%s': %w", l.reqMethod, errBadReqMethod)
   276  	}
   277  	if l.hasHierCode() && !l.isHierCodeValid() {
   278  		return fmt.Errorf("verify '%s': %w", l.hierCode, errBadHierCode)
   279  	}
   280  	if l.hasServerAddress() && !l.isServerAddressValid() {
   281  		return fmt.Errorf("verify '%s': %w", l.serverAddr, errBadServerAddr)
   282  	}
   283  	if l.hasMimeType() && !l.isMimeTypeValid() {
   284  		return fmt.Errorf("verify '%s': %w", l.mimeType, errBadMimeType)
   285  	}
   286  	return nil
   287  }
   288  
   289  func (l logLine) empty() bool                { return l == emptyLogLine }
   290  func (l logLine) hasRespTime() bool          { return !isEmptyNumber(l.respTime) }
   291  func (l logLine) hasClientAddress() bool     { return !isEmptyString(l.clientAddr) }
   292  func (l logLine) hasCacheCode() bool         { return !isEmptyString(l.cacheCode) }
   293  func (l logLine) hasHTTPCode() bool          { return !isEmptyNumber(l.httpCode) }
   294  func (l logLine) hasRespSize() bool          { return !isEmptyNumber(l.respSize) }
   295  func (l logLine) hasReqMethod() bool         { return !isEmptyString(l.reqMethod) }
   296  func (l logLine) hasHierCode() bool          { return !isEmptyString(l.hierCode) }
   297  func (l logLine) hasServerAddress() bool     { return !isEmptyString(l.serverAddr) }
   298  func (l logLine) hasMimeType() bool          { return !isEmptyString(l.mimeType) }
   299  func (l logLine) isRespTimeValid() bool      { return isRespTimeValid(l.respTime) }
   300  func (l logLine) isClientAddressValid() bool { return reAddress.MatchString(l.clientAddr) }
   301  func (l logLine) isCacheCodeValid() bool     { return isCacheCodeValid(l.cacheCode) }
   302  func (l logLine) isHTTPCodeValid() bool      { return isHTTPCodeValid(l.httpCode) }
   303  func (l logLine) isRespSizeValid() bool      { return isRespSizeValid(l.respSize) }
   304  func (l logLine) isReqMethodValid() bool     { return isReqMethodValid(l.reqMethod) }
   305  func (l logLine) isHierCodeValid() bool      { return isHierCodeValid(l.hierCode) }
   306  func (l logLine) isServerAddressValid() bool { return reAddress.MatchString(l.serverAddr) }
   307  func (l logLine) isMimeTypeValid() bool      { return isMimeTypeValid(l.mimeType) }
   308  
   309  func (l *logLine) reset() {
   310  	l.respTime = emptyNumber
   311  	l.clientAddr = emptyString
   312  	l.cacheCode = emptyString
   313  	l.httpCode = emptyNumber
   314  	l.respSize = emptyNumber
   315  	l.reqMethod = emptyString
   316  	l.hierCode = emptyString
   317  	l.serverAddr = emptyString
   318  	l.mimeType = emptyString
   319  }
   320  
   321  var emptyLogLine = *newEmptyLogLine()
   322  
   323  const (
   324  	emptyString = "__empty_string__"
   325  	emptyNumber = -9999
   326  )
   327  
   328  var (
   329  	// IPv4, IPv6, FQDN.
   330  	reAddress = regexp.MustCompile(`^(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3}|[a-f0-9:]{3,}|[a-zA-Z0-9-.]{3,})$`)
   331  )
   332  
   333  func isEmptyString(s string) bool {
   334  	return s == emptyString || s == ""
   335  }
   336  
   337  func isEmptyNumber(n int) bool {
   338  	return n == emptyNumber
   339  }
   340  
   341  func isRespTimeValid(time int) bool {
   342  	return time >= 0
   343  }
   344  
   345  // isCacheCodeValid does not guarantee cache result code is valid, but it is very likely.
   346  func isCacheCodeValid(code string) bool {
   347  	// https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes
   348  	if code == "NONE" {
   349  		return true
   350  	}
   351  	return len(code) > 5 && (code[:4] == "TCP_" || code[:4] == "UDP_")
   352  }
   353  
   354  func isHTTPCodeValid(code int) bool {
   355  	// https://wiki.squid-cache.org/SquidFaq/SquidLogs#HTTP_status_codes
   356  	return code == 0 || code >= 100 && code <= 603
   357  }
   358  
   359  func isRespSizeValid(size int) bool {
   360  	return size >= 0
   361  }
   362  
   363  func isReqMethodValid(method string) bool {
   364  	// https://wiki.squid-cache.org/SquidFaq/SquidLogs#Request_methods
   365  	switch method {
   366  	case "GET",
   367  		"HEAD",
   368  		"POST",
   369  		"PUT",
   370  		"PATCH",
   371  		"DELETE",
   372  		"CONNECT",
   373  		"OPTIONS",
   374  		"TRACE",
   375  		"ICP_QUERY",
   376  		"PURGE",
   377  		"PROPFIND",
   378  		"PROPATCH",
   379  		"MKCOL",
   380  		"COPY",
   381  		"MOVE",
   382  		"LOCK",
   383  		"UNLOCK",
   384  		"NONE":
   385  		return true
   386  	}
   387  	return false
   388  }
   389  
   390  // isHierCodeValid does not guarantee hierarchy code is valid, but it is very likely.
   391  func isHierCodeValid(code string) bool {
   392  	// https://wiki.squid-cache.org/SquidFaq/SquidLogs#Hierarchy_Codes
   393  	return len(code) > 6 && code[:5] == "HIER_"
   394  }
   395  
   396  // isMimeTypeValid expects only mime type part.
   397  func isMimeTypeValid(mimeType string) bool {
   398  	// https://www.iana.org/assignments/media-types/media-types.xhtml
   399  	if mimeType == "text" {
   400  		return true
   401  	}
   402  	switch mimeType {
   403  	case "application", "audio", "font", "image", "message", "model", "multipart", "video":
   404  		return true
   405  	}
   406  	return false
   407  }