github.com/netdata/go.d.plugin@v0.58.1/modules/squidlog/logline.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package squidlog 4 5 import ( 6 "errors" 7 "fmt" 8 "regexp" 9 "strconv" 10 "strings" 11 ) 12 13 // https://wiki.squid-cache.org/Features/LogFormat 14 // http://www.squid-cache.org/Doc/config/logformat/ 15 // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes 16 // https://www.websense.com/content/support/library/web/v773/wcg_help/squid.aspx 17 18 /* 19 4.6.1: 20 logformat squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %[un %Sh/%<a %mt 21 logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh 22 logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh 23 logformat referrer %ts.%03tu %>a %{Referer}>h %ru 24 logformat useragent %>a [%tl] "%{User-Agent}>h" 25 logformat icap_squid %ts.%03tu %6icap::tr %>A %icap::to/%03icap::Hs %icap::<st %icap::rm %icap::ru %un -/%icap::<A - 26 */ 27 28 /* 29 Valid Capture Name: [A-Za-z0-9_]+ 30 // TODO: namings 31 32 | local | squid format code | description | 33 |-------------------------|-------------------|------------------------------------------------------------------------| 34 | resp_time | %tr | Response time (milliseconds). 35 | client_address | %>a | Client source IP address. 36 | client_address | %>A | Client FQDN. 37 | cache_code | %Ss | Squid request status (TCP_MISS etc). 38 | http_code | %>Hs | The HTTP response status code from Content Gateway to client. 39 | resp_size | %<st | Total size of reply sent to client (after adaptation). 40 | req_method | %rm | Request method (GET/POST etc). 41 | hier_code | %Sh | Squid hierarchy status (DEFAULT_PARENT etc). 42 | server_address | %<a | Server IP address of the last server or peer connection. 43 | server_address | %<A | Server FQDN or peer name. 44 | mime_type | %mt | MIME content type. 45 46 // Following needed to make default log format csv parsable 47 | result_code | %Ss/%03>Hs | cache code and http code. 48 | hierarchy | %Sh/%<a | hierarchy code and server address. 49 50 Notes: 51 - %<a: older versions of Squid would put the origin server hostname here. 52 */ 53 54 var ( 55 errEmptyLine = errors.New("empty line") 56 errBadRespTime = errors.New("bad response time") 57 errBadClientAddr = errors.New("bad client address") 58 errBadCacheCode = errors.New("bad cache code") 59 errBadHTTPCode = errors.New("bad http code") 60 errBadRespSize = errors.New("bad response size") 61 errBadReqMethod = errors.New("bad request method") 62 errBadHierCode = errors.New("bad hier code") 63 errBadServerAddr = errors.New("bad server address") 64 errBadMimeType = errors.New("bad mime type") 65 errBadResultCode = errors.New("bad result code") 66 errBadHierarchy = errors.New("bad hierarchy") 67 ) 68 69 func newEmptyLogLine() *logLine { 70 var l logLine 71 l.reset() 72 return &l 73 } 74 75 type ( 76 logLine struct { 77 clientAddr string 78 serverAddr string 79 80 respTime int 81 respSize int 82 httpCode int 83 84 reqMethod string 85 mimeType string 86 87 cacheCode string 88 hierCode string 89 } 90 ) 91 92 const ( 93 fieldRespTime = "resp_time" 94 fieldClientAddr = "client_address" 95 fieldCacheCode = "cache_code" 96 fieldHTTPCode = "http_code" 97 fieldRespSize = "resp_size" 98 fieldReqMethod = "req_method" 99 fieldHierCode = "hier_code" 100 fieldServerAddr = "server_address" 101 fieldMimeType = "mime_type" 102 fieldResultCode = "result_code" 103 fieldHierarchy = "hierarchy" 104 ) 105 106 func (l *logLine) Assign(field string, value string) (err error) { 107 if value == "" { 108 return 109 } 110 111 switch field { 112 case fieldRespTime: 113 err = l.assignRespTime(value) 114 case fieldClientAddr: 115 err = l.assignClientAddress(value) 116 case fieldCacheCode: 117 err = l.assignCacheCode(value) 118 case fieldHTTPCode: 119 err = l.assignHTTPCode(value) 120 case fieldRespSize: 121 err = l.assignRespSize(value) 122 case fieldReqMethod: 123 err = l.assignReqMethod(value) 124 case fieldHierCode: 125 err = l.assignHierCode(value) 126 case fieldMimeType: 127 err = l.assignMimeType(value) 128 case fieldServerAddr: 129 err = l.assignServerAddress(value) 130 case fieldResultCode: 131 err = l.assignResultCode(value) 132 case fieldHierarchy: 133 err = l.assignHierarchy(value) 134 } 135 return err 136 } 137 138 const hyphen = "-" 139 140 func (l *logLine) assignRespTime(time string) error { 141 if time == hyphen { 142 return fmt.Errorf("assign '%s': %w", time, errBadRespTime) 143 } 144 v, err := strconv.Atoi(time) 145 if err != nil || !isRespTimeValid(v) { 146 return fmt.Errorf("assign '%s': %w", time, errBadRespTime) 147 } 148 l.respTime = v 149 return nil 150 } 151 152 func (l *logLine) assignClientAddress(address string) error { 153 if address == hyphen { 154 return fmt.Errorf("assign '%s': %w", address, errBadClientAddr) 155 } 156 l.clientAddr = address 157 return nil 158 } 159 160 func (l *logLine) assignCacheCode(code string) error { 161 if code == hyphen || !isCacheCodeValid(code) { 162 return fmt.Errorf("assign '%s': %w", code, errBadCacheCode) 163 } 164 l.cacheCode = code 165 return nil 166 } 167 168 func (l *logLine) assignHTTPCode(code string) error { 169 if code == hyphen { 170 return fmt.Errorf("assign '%s': %w", code, errBadHTTPCode) 171 } 172 v, err := strconv.Atoi(code) 173 if err != nil || !isHTTPCodeValid(v) { 174 return fmt.Errorf("assign '%s': %w", code, errBadHTTPCode) 175 } 176 l.httpCode = v 177 return nil 178 } 179 180 func (l *logLine) assignResultCode(code string) error { 181 i := strings.IndexByte(code, '/') 182 if i <= 0 { 183 return fmt.Errorf("assign '%s': %w", code, errBadResultCode) 184 } 185 if err := l.assignCacheCode(code[:i]); err != nil { 186 return err 187 } 188 return l.assignHTTPCode(code[i+1:]) 189 } 190 191 func (l *logLine) assignRespSize(size string) error { 192 if size == hyphen { 193 return fmt.Errorf("assign '%s': %w", size, errBadRespSize) 194 } 195 v, err := strconv.Atoi(size) 196 if err != nil || !isRespSizeValid(v) { 197 return fmt.Errorf("assign '%s': %w", size, errBadRespSize) 198 } 199 l.respSize = v 200 return nil 201 } 202 203 func (l *logLine) assignReqMethod(method string) error { 204 if method == hyphen || !isReqMethodValid(method) { 205 return fmt.Errorf("assign '%s': %w", method, errBadReqMethod) 206 } 207 l.reqMethod = method 208 return nil 209 } 210 211 func (l *logLine) assignHierCode(code string) error { 212 if code == hyphen || !isHierCodeValid(code) { 213 return fmt.Errorf("assign '%s': %w", code, errBadHierCode) 214 } 215 l.hierCode = code 216 return nil 217 } 218 219 func (l *logLine) assignServerAddress(address string) error { 220 // Logged as "-" if there is no hierarchy information. 221 // For TCP HIT, TCP failures, cachemgr requests and all UDP requests, there is no hierarchy information. 222 if address == hyphen { 223 return nil 224 } 225 l.serverAddr = address 226 return nil 227 } 228 229 func (l *logLine) assignHierarchy(hierarchy string) error { 230 i := strings.IndexByte(hierarchy, '/') 231 if i <= 0 { 232 return fmt.Errorf("assign '%s': %w", hierarchy, errBadHierarchy) 233 } 234 if err := l.assignHierCode(hierarchy[:i]); err != nil { 235 return err 236 } 237 return l.assignServerAddress(hierarchy[i+1:]) 238 } 239 240 func (l *logLine) assignMimeType(mime string) error { 241 // ICP exchanges usually don't have any content type, and thus are logged "-". 242 //Also, some weird replies have content types ":" or even empty ones. 243 if mime == hyphen || mime == ":" { 244 return nil 245 } 246 // format: type/subtype, type/subtype;parameter=value 247 i := strings.IndexByte(mime, '/') 248 if i <= 0 || !isMimeTypeValid(mime[:i]) { 249 return fmt.Errorf("assign '%s': %w", mime, errBadMimeType) 250 } 251 l.mimeType = mime[:i] // drop subtype 252 return nil 253 } 254 255 func (l logLine) verify() error { 256 if l.empty() { 257 return fmt.Errorf("verify: %w", errEmptyLine) 258 } 259 if l.hasRespTime() && !l.isRespTimeValid() { 260 return fmt.Errorf("verify '%d': %w", l.respTime, errBadRespTime) 261 } 262 if l.hasClientAddress() && !l.isClientAddressValid() { 263 return fmt.Errorf("verify '%s': %w", l.clientAddr, errBadClientAddr) 264 } 265 if l.hasCacheCode() && !l.isCacheCodeValid() { 266 return fmt.Errorf("verify '%s': %w", l.cacheCode, errBadCacheCode) 267 } 268 if l.hasHTTPCode() && !l.isHTTPCodeValid() { 269 return fmt.Errorf("verify '%d': %w", l.httpCode, errBadHTTPCode) 270 } 271 if l.hasRespSize() && !l.isRespSizeValid() { 272 return fmt.Errorf("verify '%d': %w", l.respSize, errBadRespSize) 273 } 274 if l.hasReqMethod() && !l.isReqMethodValid() { 275 return fmt.Errorf("verify '%s': %w", l.reqMethod, errBadReqMethod) 276 } 277 if l.hasHierCode() && !l.isHierCodeValid() { 278 return fmt.Errorf("verify '%s': %w", l.hierCode, errBadHierCode) 279 } 280 if l.hasServerAddress() && !l.isServerAddressValid() { 281 return fmt.Errorf("verify '%s': %w", l.serverAddr, errBadServerAddr) 282 } 283 if l.hasMimeType() && !l.isMimeTypeValid() { 284 return fmt.Errorf("verify '%s': %w", l.mimeType, errBadMimeType) 285 } 286 return nil 287 } 288 289 func (l logLine) empty() bool { return l == emptyLogLine } 290 func (l logLine) hasRespTime() bool { return !isEmptyNumber(l.respTime) } 291 func (l logLine) hasClientAddress() bool { return !isEmptyString(l.clientAddr) } 292 func (l logLine) hasCacheCode() bool { return !isEmptyString(l.cacheCode) } 293 func (l logLine) hasHTTPCode() bool { return !isEmptyNumber(l.httpCode) } 294 func (l logLine) hasRespSize() bool { return !isEmptyNumber(l.respSize) } 295 func (l logLine) hasReqMethod() bool { return !isEmptyString(l.reqMethod) } 296 func (l logLine) hasHierCode() bool { return !isEmptyString(l.hierCode) } 297 func (l logLine) hasServerAddress() bool { return !isEmptyString(l.serverAddr) } 298 func (l logLine) hasMimeType() bool { return !isEmptyString(l.mimeType) } 299 func (l logLine) isRespTimeValid() bool { return isRespTimeValid(l.respTime) } 300 func (l logLine) isClientAddressValid() bool { return reAddress.MatchString(l.clientAddr) } 301 func (l logLine) isCacheCodeValid() bool { return isCacheCodeValid(l.cacheCode) } 302 func (l logLine) isHTTPCodeValid() bool { return isHTTPCodeValid(l.httpCode) } 303 func (l logLine) isRespSizeValid() bool { return isRespSizeValid(l.respSize) } 304 func (l logLine) isReqMethodValid() bool { return isReqMethodValid(l.reqMethod) } 305 func (l logLine) isHierCodeValid() bool { return isHierCodeValid(l.hierCode) } 306 func (l logLine) isServerAddressValid() bool { return reAddress.MatchString(l.serverAddr) } 307 func (l logLine) isMimeTypeValid() bool { return isMimeTypeValid(l.mimeType) } 308 309 func (l *logLine) reset() { 310 l.respTime = emptyNumber 311 l.clientAddr = emptyString 312 l.cacheCode = emptyString 313 l.httpCode = emptyNumber 314 l.respSize = emptyNumber 315 l.reqMethod = emptyString 316 l.hierCode = emptyString 317 l.serverAddr = emptyString 318 l.mimeType = emptyString 319 } 320 321 var emptyLogLine = *newEmptyLogLine() 322 323 const ( 324 emptyString = "__empty_string__" 325 emptyNumber = -9999 326 ) 327 328 var ( 329 // IPv4, IPv6, FQDN. 330 reAddress = regexp.MustCompile(`^(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3}|[a-f0-9:]{3,}|[a-zA-Z0-9-.]{3,})$`) 331 ) 332 333 func isEmptyString(s string) bool { 334 return s == emptyString || s == "" 335 } 336 337 func isEmptyNumber(n int) bool { 338 return n == emptyNumber 339 } 340 341 func isRespTimeValid(time int) bool { 342 return time >= 0 343 } 344 345 // isCacheCodeValid does not guarantee cache result code is valid, but it is very likely. 346 func isCacheCodeValid(code string) bool { 347 // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes 348 if code == "NONE" { 349 return true 350 } 351 return len(code) > 5 && (code[:4] == "TCP_" || code[:4] == "UDP_") 352 } 353 354 func isHTTPCodeValid(code int) bool { 355 // https://wiki.squid-cache.org/SquidFaq/SquidLogs#HTTP_status_codes 356 return code == 0 || code >= 100 && code <= 603 357 } 358 359 func isRespSizeValid(size int) bool { 360 return size >= 0 361 } 362 363 func isReqMethodValid(method string) bool { 364 // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Request_methods 365 switch method { 366 case "GET", 367 "HEAD", 368 "POST", 369 "PUT", 370 "PATCH", 371 "DELETE", 372 "CONNECT", 373 "OPTIONS", 374 "TRACE", 375 "ICP_QUERY", 376 "PURGE", 377 "PROPFIND", 378 "PROPATCH", 379 "MKCOL", 380 "COPY", 381 "MOVE", 382 "LOCK", 383 "UNLOCK", 384 "NONE": 385 return true 386 } 387 return false 388 } 389 390 // isHierCodeValid does not guarantee hierarchy code is valid, but it is very likely. 391 func isHierCodeValid(code string) bool { 392 // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Hierarchy_Codes 393 return len(code) > 6 && code[:5] == "HIER_" 394 } 395 396 // isMimeTypeValid expects only mime type part. 397 func isMimeTypeValid(mimeType string) bool { 398 // https://www.iana.org/assignments/media-types/media-types.xhtml 399 if mimeType == "text" { 400 return true 401 } 402 switch mimeType { 403 case "application", "audio", "font", "image", "message", "model", "multipart", "video": 404 return true 405 } 406 return false 407 }