github.com/netdata/go.d.plugin@v0.58.1/modules/weblog/logline.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package weblog 4 5 import ( 6 "errors" 7 "fmt" 8 "regexp" 9 "strconv" 10 "strings" 11 ) 12 13 // TODO: it is not clear how to handle "-", current handling is not good 14 // In general it is: 15 // - If a field is unused in a particular entry dash "-" marks the omitted field. 16 // In addition to that "-" is used as zero value in: 17 // - apache: %b '-' when no bytes are sent. 18 // 19 // Log Format: 20 // - CLF: https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format 21 // - ELF: https://www.w3.org/TR/WD-logfile.html 22 // - Apache CLF: https://httpd.apache.org/docs/trunk/logs.html#common 23 24 // Variables: 25 // - nginx: http://nginx.org/en/docs/varindex.html 26 // - apache: http://httpd.apache.org/docs/current/mod/mod_log_config.html#logformat 27 // - IIS: https://learn.microsoft.com/en-us/windows/win32/http/w3c-logging 28 29 /* 30 | nginx | apache | description | 31 |-------------------------|-----------|-----------------------------------------------| 32 | $host ($http_host) | %v | Name of the server which accepted a request. 33 | $server_port | %p | Port of the server which accepted a request. 34 | $scheme | - | Request scheme. "http" or "https". 35 | $remote_addr | %a (%h) | Client address. 36 | $request | %r | Full original request line. The line is "$request_method $request_uri $server_protocol". 37 | $request_method | %m | Request method. Usually "GET" or "POST". 38 | $request_uri | %U | Full original request URI. 39 | $server_protocol | %H | Request protocol. Usually "HTTP/1.0", "HTTP/1.1", or "HTTP/2.0". 40 | $status | %s (%>s) | Response status code. 41 | $request_length | %I | Bytes received from a client, including request and headers. 42 | $bytes_sent | %O | Bytes sent to a client, including request and headers. 43 | $body_bytes_sent | %B (%b) | Bytes sent to a client, not counting the response header. 44 | $request_time | %D | Request processing time. 45 | $upstream_response_time | - | Time spent on receiving the response from the upstream server. 46 | $ssl_protocol | - | Protocol of an established SSL connection. 47 | $ssl_cipher | - | String of ciphers used for an established SSL connection. 48 */ 49 50 var ( 51 errEmptyLine = errors.New("empty line") 52 errBadVhost = errors.New("bad vhost") 53 errBadVhostPort = errors.New("bad vhost with port") 54 errBadPort = errors.New("bad port") 55 errBadReqScheme = errors.New("bad req scheme") 56 errBadReqClient = errors.New("bad req client") 57 errBadRequest = errors.New("bad request") 58 errBadReqMethod = errors.New("bad req method") 59 errBadReqURL = errors.New("bad req url") 60 errBadReqProto = errors.New("bad req protocol") 61 errBadReqSize = errors.New("bad req size") 62 errBadRespCode = errors.New("bad resp status code") 63 errBadRespSize = errors.New("bad resp size") 64 errBadReqProcTime = errors.New("bad req processing time") 65 errBadUpsRespTime = errors.New("bad upstream resp time") 66 errBadSSLProto = errors.New("bad ssl protocol") 67 errBadSSLCipherSuite = errors.New("bad ssl cipher suite") 68 ) 69 70 func newEmptyLogLine() *logLine { 71 var l logLine 72 l.custom.fields = make(map[string]struct{}) 73 l.custom.values = make([]customValue, 0, 20) 74 l.reset() 75 return &l 76 } 77 78 type ( 79 logLine struct { 80 web 81 custom custom 82 } 83 web struct { 84 vhost string 85 port string 86 reqScheme string 87 reqClient string 88 reqMethod string 89 reqURL string 90 reqProto string 91 reqSize int 92 reqProcTime float64 93 respCode int 94 respSize int 95 upsRespTime float64 96 sslProto string 97 sslCipherSuite string 98 } 99 custom struct { 100 fields map[string]struct{} 101 values []customValue 102 } 103 customValue struct { 104 name string 105 value string 106 } 107 ) 108 109 func (l *logLine) Assign(field string, value string) (err error) { 110 if value == "" { 111 return 112 } 113 114 switch field { 115 case "host", "http_host", "v": 116 err = l.assignVhost(value) 117 case "server_port", "p": 118 err = l.assignPort(value) 119 case "host:$server_port", "v:%p": 120 err = l.assignVhostWithPort(value) 121 case "scheme": 122 err = l.assignReqScheme(value) 123 case "remote_addr", "a", "h": 124 err = l.assignReqClient(value) 125 case "request", "r": 126 err = l.assignRequest(value) 127 case "request_method", "m": 128 err = l.assignReqMethod(value) 129 case "request_uri", "U": 130 err = l.assignReqURL(value) 131 case "server_protocol", "H": 132 err = l.assignReqProto(value) 133 case "status", "s", ">s": 134 err = l.assignRespCode(value) 135 case "request_length", "I": 136 err = l.assignReqSize(value) 137 case "bytes_sent", "body_bytes_sent", "b", "O", "B": 138 err = l.assignRespSize(value) 139 case "request_time", "D": 140 err = l.assignReqProcTime(value) 141 case "upstream_response_time": 142 err = l.assignUpsRespTime(value) 143 case "ssl_protocol": 144 err = l.assignSSLProto(value) 145 case "ssl_cipher": 146 err = l.assignSSLCipherSuite(value) 147 default: 148 err = l.assignCustom(field, value) 149 } 150 if err != nil { 151 err = fmt.Errorf("assign '%s': %w", field, err) 152 } 153 return err 154 } 155 156 const hyphen = "-" 157 158 func (l *logLine) assignVhost(vhost string) error { 159 if vhost == hyphen { 160 return nil 161 } 162 // nginx $host and $http_host returns ipv6 in [], apache not 163 if idx := strings.IndexByte(vhost, ']'); idx > 0 { 164 vhost = vhost[1:idx] 165 } 166 l.vhost = vhost 167 return nil 168 } 169 170 func (l *logLine) assignPort(port string) error { 171 if port == hyphen { 172 return nil 173 } 174 if !isPortValid(port) { 175 return fmt.Errorf("assign '%s' : %w", port, errBadPort) 176 } 177 l.port = port 178 return nil 179 } 180 181 func (l *logLine) assignVhostWithPort(vhostPort string) error { 182 if vhostPort == hyphen { 183 return nil 184 } 185 idx := strings.LastIndexByte(vhostPort, ':') 186 if idx == -1 { 187 return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort) 188 } 189 if err := l.assignPort(vhostPort[idx+1:]); err != nil { 190 return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort) 191 } 192 if err := l.assignVhost(vhostPort[0:idx]); err != nil { 193 return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort) 194 } 195 return nil 196 } 197 198 func (l *logLine) assignReqScheme(scheme string) error { 199 if scheme == hyphen { 200 return nil 201 } 202 if !isSchemeValid(scheme) { 203 return fmt.Errorf("assign '%s' : %w", scheme, errBadReqScheme) 204 } 205 l.reqScheme = scheme 206 return nil 207 } 208 209 func (l *logLine) assignReqClient(client string) error { 210 if client == hyphen { 211 return nil 212 } 213 l.reqClient = client 214 return nil 215 } 216 217 func (l *logLine) assignRequest(request string) error { 218 if request == hyphen { 219 return nil 220 } 221 var first, last int 222 if first = strings.IndexByte(request, ' '); first < 0 { 223 return fmt.Errorf("assign '%s': %w", request, errBadRequest) 224 } 225 if last = strings.LastIndexByte(request, ' '); first == last { 226 return fmt.Errorf("assign '%s': %w", request, errBadRequest) 227 } 228 proto := request[last+1:] 229 url := request[first+1 : last] 230 method := request[0:first] 231 if err := l.assignReqMethod(method); err != nil { 232 return err 233 } 234 if err := l.assignReqURL(url); err != nil { 235 return err 236 } 237 return l.assignReqProto(proto) 238 } 239 240 func (l *logLine) assignReqMethod(method string) error { 241 if method == hyphen { 242 return nil 243 } 244 if !isReqMethodValid(method) { 245 return fmt.Errorf("assign '%s' : %w", method, errBadReqMethod) 246 } 247 l.reqMethod = method 248 return nil 249 } 250 251 func (l *logLine) assignReqURL(url string) error { 252 if url == hyphen { 253 return nil 254 } 255 if isEmptyString(url) { 256 return fmt.Errorf("assign '%s' : %w", url, errBadReqURL) 257 } 258 l.reqURL = url 259 return nil 260 } 261 262 func (l *logLine) assignReqProto(proto string) error { 263 if proto == hyphen { 264 return nil 265 } 266 if !isReqProtoValid(proto) { 267 return fmt.Errorf("assign '%s': %w", proto, errBadReqProto) 268 } 269 l.reqProto = proto[5:] 270 return nil 271 } 272 273 func (l *logLine) assignRespCode(status string) error { 274 if status == hyphen { 275 return nil 276 } 277 v, err := strconv.Atoi(status) 278 if err != nil || !isRespCodeValid(v) { 279 return fmt.Errorf("assign '%s': %w", status, errBadRespCode) 280 } 281 l.respCode = v 282 return nil 283 } 284 285 func (l *logLine) assignReqSize(size string) error { 286 // apache: can be "-" according web_log py regexp. 287 if size == hyphen { 288 l.reqSize = 0 289 return nil 290 } 291 v, err := strconv.Atoi(size) 292 if err != nil || !isSizeValid(v) { 293 return fmt.Errorf("assign '%s': %w", size, errBadReqSize) 294 } 295 l.reqSize = v 296 return nil 297 } 298 299 func (l *logLine) assignRespSize(size string) error { 300 // apache: %b. In CLF format, i.e. a '-' rather than a 0 when no bytes are sent. 301 if size == hyphen { 302 l.respSize = 0 303 return nil 304 } 305 v, err := strconv.Atoi(size) 306 if err != nil || !isSizeValid(v) { 307 return fmt.Errorf("assign '%s': %w", size, errBadRespSize) 308 } 309 l.respSize = v 310 return nil 311 } 312 313 func (l *logLine) assignReqProcTime(time string) error { 314 if time == hyphen { 315 return nil 316 } 317 if time == "0.000" { 318 l.reqProcTime = 0 319 return nil 320 } 321 v, err := strconv.ParseFloat(time, 64) 322 if err != nil || !isTimeValid(v) { 323 return fmt.Errorf("assign '%s': %w", time, errBadReqProcTime) 324 } 325 l.reqProcTime = v * timeMultiplier(time) 326 return nil 327 } 328 329 func isUpstreamTimeSeparator(r rune) bool { return r == ',' || r == ':' } 330 331 func (l *logLine) assignUpsRespTime(time string) error { 332 if time == hyphen { 333 return nil 334 } 335 336 // the upstream response time string can contain multiple values, separated 337 // by commas (in case the request was handled by multiple servers), or colons 338 // (in case the request passed between multiple server groups via an internal redirect) 339 // the individual values should be summed up to obtain the correct amount of time 340 // the request spent in upstream 341 var sum float64 342 for _, val := range strings.FieldsFunc(time, isUpstreamTimeSeparator) { 343 val = strings.TrimSpace(val) 344 v, err := strconv.ParseFloat(val, 64) 345 if err != nil || !isTimeValid(v) { 346 return fmt.Errorf("assign '%s': %w", time, errBadUpsRespTime) 347 } 348 349 sum += v 350 } 351 352 l.upsRespTime = sum * timeMultiplier(time) 353 return nil 354 } 355 356 func (l *logLine) assignSSLProto(proto string) error { 357 if proto == hyphen { 358 return nil 359 } 360 if !isSSLProtoValid(proto) { 361 return fmt.Errorf("assign '%s': %w", proto, errBadSSLProto) 362 } 363 l.sslProto = proto 364 return nil 365 } 366 367 func (l *logLine) assignSSLCipherSuite(cipher string) error { 368 if cipher == hyphen { 369 return nil 370 } 371 if strings.IndexByte(cipher, '-') <= 0 && strings.IndexByte(cipher, '_') <= 0 { 372 return fmt.Errorf("assign '%s': %w", cipher, errBadSSLCipherSuite) 373 } 374 l.sslCipherSuite = cipher 375 return nil 376 } 377 378 func (l *logLine) assignCustom(field, value string) error { 379 if len(l.custom.fields) == 0 || value == hyphen { 380 return nil 381 } 382 if _, ok := l.custom.fields[field]; ok { 383 l.custom.values = append(l.custom.values, customValue{name: field, value: value}) 384 } 385 return nil 386 } 387 388 func (l *logLine) verify() error { 389 if l.empty() { 390 return fmt.Errorf("verify: %w", errEmptyLine) 391 } 392 if l.hasRespCode() && !l.isRespCodeValid() { 393 return fmt.Errorf("verify '%d': %w", l.respCode, errBadRespCode) 394 } 395 if l.hasVhost() && !l.isVhostValid() { 396 return fmt.Errorf("verify '%s': %w", l.vhost, errBadVhost) 397 } 398 if l.hasPort() && !l.isPortValid() { 399 return fmt.Errorf("verify '%s': %w", l.port, errBadPort) 400 } 401 if l.hasReqScheme() && !l.isSchemeValid() { 402 return fmt.Errorf("verify '%s': %w", l.reqScheme, errBadReqScheme) 403 } 404 if l.hasReqClient() && !l.isClientValid() { 405 return fmt.Errorf("verify '%s': %w", l.reqClient, errBadReqClient) 406 } 407 if l.hasReqMethod() && !l.isMethodValid() { 408 return fmt.Errorf("verify '%s': %w", l.reqMethod, errBadReqMethod) 409 } 410 if l.hasReqURL() && !l.isURLValid() { 411 return fmt.Errorf("verify '%s': %w", l.reqURL, errBadReqURL) 412 } 413 if l.hasReqProto() && !l.isProtoValid() { 414 return fmt.Errorf("verify '%s': %w", l.reqProto, errBadReqProto) 415 } 416 if l.hasReqSize() && !l.isReqSizeValid() { 417 return fmt.Errorf("verify '%d': %w", l.reqSize, errBadReqSize) 418 } 419 if l.hasRespSize() && !l.isRespSizeValid() { 420 return fmt.Errorf("verify '%d': %w", l.respSize, errBadRespSize) 421 } 422 if l.hasReqProcTime() && !l.isReqProcTimeValid() { 423 return fmt.Errorf("verify '%f': %w", l.reqProcTime, errBadReqProcTime) 424 } 425 if l.hasUpsRespTime() && !l.isUpsRespTimeValid() { 426 return fmt.Errorf("verify '%f': %w", l.upsRespTime, errBadUpsRespTime) 427 } 428 if l.hasSSLProto() && !l.isSSLProtoValid() { 429 return fmt.Errorf("verify '%s': %w", l.sslProto, errBadSSLProto) 430 } 431 if l.hasSSLCipherSuite() && !l.isSSLCipherSuiteValid() { 432 return fmt.Errorf("verify '%s': %w", l.sslCipherSuite, errBadSSLCipherSuite) 433 } 434 return nil 435 } 436 437 func (l *logLine) empty() bool { return !l.hasWebFields() && !l.hasCustomFields() } 438 func (l *logLine) hasCustomFields() bool { return len(l.custom.values) > 0 } 439 func (l *logLine) hasWebFields() bool { return l.web != emptyWebFields } 440 func (l *logLine) hasVhost() bool { return !isEmptyString(l.vhost) } 441 func (l *logLine) hasPort() bool { return !isEmptyString(l.port) } 442 func (l *logLine) hasReqScheme() bool { return !isEmptyString(l.reqScheme) } 443 func (l *logLine) hasReqClient() bool { return !isEmptyString(l.reqClient) } 444 func (l *logLine) hasReqMethod() bool { return !isEmptyString(l.reqMethod) } 445 func (l *logLine) hasReqURL() bool { return !isEmptyString(l.reqURL) } 446 func (l *logLine) hasReqProto() bool { return !isEmptyString(l.reqProto) } 447 func (l *logLine) hasRespCode() bool { return !isEmptyNumber(l.respCode) } 448 func (l *logLine) hasReqSize() bool { return !isEmptyNumber(l.reqSize) } 449 func (l *logLine) hasRespSize() bool { return !isEmptyNumber(l.respSize) } 450 func (l *logLine) hasReqProcTime() bool { return !isEmptyNumber(int(l.reqProcTime)) } 451 func (l *logLine) hasUpsRespTime() bool { return !isEmptyNumber(int(l.upsRespTime)) } 452 func (l *logLine) hasSSLProto() bool { return !isEmptyString(l.sslProto) } 453 func (l *logLine) hasSSLCipherSuite() bool { return !isEmptyString(l.sslCipherSuite) } 454 func (l *logLine) isVhostValid() bool { return reVhost.MatchString(l.vhost) } 455 func (l *logLine) isPortValid() bool { return isPortValid(l.port) } 456 func (l *logLine) isSchemeValid() bool { return isSchemeValid(l.reqScheme) } 457 func (l *logLine) isClientValid() bool { return reClient.MatchString(l.reqClient) } 458 func (l *logLine) isMethodValid() bool { return isReqMethodValid(l.reqMethod) } 459 func (l *logLine) isURLValid() bool { return !isEmptyString(l.reqURL) } 460 func (l *logLine) isProtoValid() bool { return isReqProtoVerValid(l.reqProto) } 461 func (l *logLine) isRespCodeValid() bool { return isRespCodeValid(l.respCode) } 462 func (l *logLine) isReqSizeValid() bool { return isSizeValid(l.reqSize) } 463 func (l *logLine) isRespSizeValid() bool { return isSizeValid(l.respSize) } 464 func (l *logLine) isReqProcTimeValid() bool { return isTimeValid(l.reqProcTime) } 465 func (l *logLine) isUpsRespTimeValid() bool { return isTimeValid(l.upsRespTime) } 466 func (l *logLine) isSSLProtoValid() bool { return isSSLProtoValid(l.sslProto) } 467 func (l *logLine) isSSLCipherSuiteValid() bool { return reCipherSuite.MatchString(l.sslCipherSuite) } 468 469 func (l *logLine) reset() { 470 l.web = emptyWebFields 471 l.custom.values = l.custom.values[:0] 472 } 473 474 var ( 475 // TODO: reClient doesn't work with %h when HostnameLookups is On. 476 reVhost = regexp.MustCompile(`^[a-zA-Z0-9-:.]+$`) 477 reClient = regexp.MustCompile(`^([\da-f:.]+|localhost)$`) 478 reCipherSuite = regexp.MustCompile(`^[A-Z0-9-_]+$`) // openssl -v 479 ) 480 481 var emptyWebFields = web{ 482 vhost: emptyString, 483 port: emptyString, 484 reqScheme: emptyString, 485 reqClient: emptyString, 486 reqMethod: emptyString, 487 reqURL: emptyString, 488 reqProto: emptyString, 489 reqSize: emptyNumber, 490 reqProcTime: emptyNumber, 491 respCode: emptyNumber, 492 respSize: emptyNumber, 493 upsRespTime: emptyNumber, 494 sslProto: emptyString, 495 sslCipherSuite: emptyString, 496 } 497 498 const ( 499 emptyString = "__empty_string__" 500 emptyNumber = -9999 501 ) 502 503 func isEmptyString(s string) bool { 504 return s == emptyString || s == "" 505 } 506 507 func isEmptyNumber(n int) bool { 508 return n == emptyNumber 509 } 510 511 func isReqMethodValid(method string) bool { 512 // https://www.iana.org/assignments/http-methods/http-methods.xhtml 513 switch method { 514 case "GET", 515 "ACL", 516 "BASELINE-CONTROL", 517 "BIND", 518 "CHECKIN", 519 "CHECKOUT", 520 "CONNECT", 521 "COPY", 522 "DELETE", 523 "HEAD", 524 "LABEL", 525 "LINK", 526 "LOCK", 527 "MERGE", 528 "MKACTIVITY", 529 "MKCALENDAR", 530 "MKCOL", 531 "MKREDIRECTREF", 532 "MKWORKSPACE", 533 "MOVE", 534 "OPTIONS", 535 "ORDERPATCH", 536 "PATCH", 537 "POST", 538 "PRI", 539 "PROPFIND", 540 "PROPPATCH", 541 "PUT", 542 "REBIND", 543 "REPORT", 544 "SEARCH", 545 "TRACE", 546 "UNBIND", 547 "UNCHECKOUT", 548 "UNLINK", 549 "UNLOCK", 550 "UPDATE", 551 "UPDATEREDIRECTREF": 552 return true 553 } 554 return false 555 } 556 557 func isReqProtoValid(proto string) bool { 558 return len(proto) >= 6 && proto[:5] == "HTTP/" && isReqProtoVerValid(proto[5:]) 559 } 560 561 func isReqProtoVerValid(version string) bool { 562 switch version { 563 case "1.1", "1", "1.0", "2", "2.0", "3", "3.0": 564 return true 565 } 566 return false 567 } 568 569 func isPortValid(port string) bool { 570 v, err := strconv.Atoi(port) 571 return err == nil && v >= 80 && v <= 49151 572 } 573 574 func isSchemeValid(scheme string) bool { 575 return scheme == "http" || scheme == "https" 576 } 577 578 func isRespCodeValid(code int) bool { 579 // rfc7231 580 // Informational responses (100–199), 581 // Successful responses (200–299), 582 // Redirects (300–399), 583 // Client errors (400–499), 584 // Server errors (500–599). 585 return code >= 100 && code <= 600 586 } 587 588 func isSizeValid(size int) bool { 589 return size >= 0 590 } 591 592 func isTimeValid(time float64) bool { 593 return time >= 0 594 } 595 596 func isSSLProtoValid(proto string) bool { 597 if proto == "TLSv1.2" { 598 return true 599 } 600 switch proto { 601 case "TLSv1.3", "SSLv2", "SSLv3", "TLSv1", "TLSv1.1": 602 return true 603 } 604 return false 605 } 606 607 func timeMultiplier(time string) float64 { 608 // TODO: Change code to detect and modify properly IIS time (in milliseconds) 609 // Convert to microseconds: 610 // - nginx time is in seconds with a milliseconds' resolution. 611 if strings.IndexByte(time, '.') > 0 { 612 return 1e6 613 } 614 // - apache time is in microseconds. 615 return 1 616 }