github.com/honeycombio/honeytail@v1.9.0/parsers/postgresql/postgresql.go (about) 1 // Package postgresql contains code for parsing PostgreSQL slow query logs. 2 // 3 // The Postgres slow query format 4 // ------------------------------ 5 // 6 // In general, Postgres logs consist of a prefix, a level, and a message: 7 // 8 // 2017-11-06 19:20:32 UTC [11534-2] LOG: autovacuum launcher shutting down 9 // |<-------------prefix----------->|level|<----------message-------------->| 10 // 11 // 2017-11-07 01:43:39 UTC [3542-7] postgres@test LOG: duration: 15.577 ms statement: SELECT * FROM test; 12 // |<-------------prefix------------------------>|level|<-------------------message---------------------->| 13 // 14 // The format of the configuration prefix is configurable as `log_line_prefix` in postgresql.conf 15 // using the following format specifiers: 16 // 17 // %a = application name 18 // %u = user name 19 // %d = database name 20 // %r = remote host and port 21 // %h = remote host 22 // %p = process ID 23 // %t = timestamp without milliseconds 24 // %m = timestamp with milliseconds 25 // %i = command tag 26 // %e = SQL state 27 // %c = session ID 28 // %l = session line number 29 // %s = session start timestamp 30 // %v = virtual transaction ID 31 // %x = transaction ID (0 if none) 32 // %q = stop here in non-session 33 // processes 34 // %% = '%' 35 // 36 // For example, the prefix format for the lines above is: 37 // %t [%p-%l] %q%u@%d 38 // We currently require users to pass the prefix format as a parser option. 39 // 40 // Slow query logs specifically have the following format: 41 // 2017-11-07 01:43:39 UTC [3542-7] postgres@test LOG: duration: 15.577 ms statement: SELECT * FROM test; 42 // |<-------------prefix------------------------>|<----------header-------------------->|<-----query----->| 43 // 44 // For convenience, we call everything after the prefix but before the actual query string the "header". 45 // 46 // The query may span multiple lines; continuations are indented. For example: 47 // 48 // 2017-11-07 01:43:39 UTC [3542-7] postgres@test LOG: duration: 15.577 ms statement: SELECT * FROM test 49 // WHERE id=1; 50 package postgresql 51 52 import ( 53 "fmt" 54 "regexp" 55 "strconv" 56 "strings" 57 "sync" 58 "time" 59 60 "github.com/honeycombio/honeytail/event" 61 "github.com/honeycombio/honeytail/parsers" 62 "github.com/honeycombio/mysqltools/query/normalizer" 63 "github.com/sirupsen/logrus" 64 ) 65 66 const ( 67 // Regex string that matches timestamps in log 68 timestampRe = `\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[.0-9]* [A-Z]+` 69 defaultPrefix = "%t [%p-%l] %u@%d" 70 // Regex string that matches the header of a slow query log line 71 slowQueryHeader = `\s*(?P<level>[A-Z0-9]+):\s+duration: (?P<duration>[0-9\.]+) ms\s+(?:(statement)|(execute \S+)): ` 72 traceFromTrace1 = `\/\*.*trace_id=['"](?P<trace_id>[^'"]+)['"].*parent_id=['"](?P<parent_id>[^'"]+)['"].*\*\/` 73 traceFromTrace2 = `\/\*.*parent_id=['"](?P<parent_id>[^'"]+)['"].*trace_id=['"](?P<trace_id>[^'"]+)['"].*\*\/` 74 traceFromTraceParent = `\/\*.*traceparent=['"]\d{2}-(?P<trace_id>[^-]+)-(?P<parent_id>[^-]+)-\d{2}['"].*\*\/` 75 ) 76 77 var slowQueryHeaderRegex = &parsers.ExtRegexp{Regexp: regexp.MustCompile(slowQueryHeader)} 78 var traceFromTraceRegex1 = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTrace1)} 79 var traceFromTraceRegex2 = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTrace2)} 80 var traceFromTraceParentRegex = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTraceParent)} 81 82 // prefixField represents a specific format specifier in the log_line_prefix string 83 // (see module comment for details). 84 type prefixField struct { 85 Name string 86 Pattern string 87 } 88 89 func (p *prefixField) ReString() string { 90 return fmt.Sprintf("(?P<%s>%s)", p.Name, p.Pattern) 91 } 92 93 var prefixValues = map[string]prefixField{ 94 "%a": {Name: "application", Pattern: "\\S+"}, 95 "%u": {Name: "user", Pattern: "\\S+"}, 96 "%d": {Name: "database", Pattern: "\\S+"}, 97 "%r": {Name: "host_port", Pattern: "\\S+"}, 98 "%h": {Name: "host", Pattern: "\\S+"}, 99 "%p": {Name: "pid", Pattern: "\\d+"}, 100 "%t": {Name: "timestamp", Pattern: timestampRe}, 101 "%m": {Name: "timestamp_millis", Pattern: timestampRe}, 102 "%n": {Name: "timestamp_unix", Pattern: "\\d+"}, 103 "%i": {Name: "command_tag", Pattern: "\\S+"}, 104 "%e": {Name: "sql_state", Pattern: "\\S+"}, 105 "%c": {Name: "session_id", Pattern: "\\d+"}, 106 "%l": {Name: "session_line_number", Pattern: "\\d+"}, 107 "%s": {Name: "session_start", Pattern: timestampRe}, 108 "%v": {Name: "virtual_transaction_id", Pattern: "\\S+"}, 109 "%x": {Name: "transaction_id", Pattern: "\\S+"}, 110 } 111 112 type Options struct { 113 LogLinePrefix string `long:"log_line_prefix" description:"Format string for PostgreSQL log line prefix"` 114 } 115 116 type Parser struct { 117 // regex to match the log_line_prefix format specified by the user 118 pgPrefixRegex *parsers.ExtRegexp 119 } 120 121 func (p *Parser) Init(options interface{}) (err error) { 122 var logLinePrefixFormat string 123 conf, ok := options.(*Options) 124 if !ok || conf.LogLinePrefix == "" { 125 logLinePrefixFormat = defaultPrefix 126 } else { 127 logLinePrefixFormat = conf.LogLinePrefix 128 } 129 p.pgPrefixRegex, err = buildPrefixRegexp(logLinePrefixFormat) 130 return err 131 } 132 133 func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) { 134 rawEvents := make(chan []string) 135 wg := &sync.WaitGroup{} 136 wg.Add(1) 137 go p.handleEvents(rawEvents, send, wg) 138 var groupedLines []string 139 for line := range lines { 140 if prefixRegex != nil { 141 // This is the "global" prefix regex as specified by the 142 // --log_prefix option, for stripping prefixes added by syslog or 143 // the like. It's unlikely that it'll actually be set by consumers 144 // of database logs. Don't confuse this with p.pgPrefixRegex, which 145 // is a compiled regex for parsing the postgres-specific line 146 // prefix. 147 var prefix = prefixRegex.FindString(line) 148 line = strings.TrimPrefix(line, prefix) 149 } 150 if !isContinuationLine(line) && len(groupedLines) > 0 { 151 // If the line we just parsed is the start of a new log statement, 152 // send off the previously accumulated group. 153 rawEvents <- groupedLines 154 groupedLines = make([]string, 0, 1) 155 } 156 groupedLines = append(groupedLines, line) 157 } 158 159 rawEvents <- groupedLines 160 close(rawEvents) 161 wg.Wait() 162 } 163 164 // handleEvents receives sets of grouped log lines, each representing a single 165 // log statement. It attempts to parse them, and sends the events it constructs 166 // down the send channel. 167 func (p *Parser) handleEvents(rawEvents <-chan []string, send chan<- event.Event, wg *sync.WaitGroup) { 168 defer wg.Done() 169 // TODO: spin up a group of goroutines to do this 170 for rawEvent := range rawEvents { 171 ev := p.handleEvent(rawEvent) 172 if ev != nil { 173 send <- *ev 174 } 175 } 176 } 177 178 // handleEvent takes a single grouped log statement (an array of lines) and attempts to parse it. 179 // It returns a pointer to an Event if successful, and nil if not. 180 func (p *Parser) handleEvent(rawEvent []string) *event.Event { 181 normalizer := normalizer.Parser{} 182 if len(rawEvent) == 0 { 183 return nil 184 } 185 firstLine := rawEvent[0] 186 187 // First, try to parse the prefix 188 match, suffix, generalMeta := parsePrefix(p.pgPrefixRegex, firstLine) 189 if !match { 190 // Note: this may be noisy when debug logging is turned on, since the 191 // postgres general log contains lots of other statements as well. 192 logrus.WithField("line", firstLine).Debug("Log line prefix didn't match expected format") 193 return nil 194 } 195 196 ev := &event.Event{ 197 Data: make(map[string]interface{}, 0), 198 } 199 200 addFieldsToEvent(generalMeta, ev) 201 202 // Now, parse the slow query header 203 match, query, slowQueryMeta := parsePrefix(slowQueryHeaderRegex, suffix) 204 205 if !match { 206 logrus.WithField("line", firstLine).Debug("didn't find slow query header, skipping line") 207 return nil 208 } 209 210 if rawDuration, ok := slowQueryMeta["duration"]; ok { 211 duration, _ := strconv.ParseFloat(rawDuration, 64) 212 ev.Data["duration"] = duration // Backwards compatibility 213 ev.Data["duration_ms"] = duration 214 } else { 215 logrus.WithField("query", query).Debug("Failed to find query duration in log line") 216 } 217 218 // Finally, concatenate the remaining text to form the query, and attempt to 219 // normalize it. 220 for _, line := range rawEvent[1:] { 221 query += " " + strings.TrimLeft(line, " \t") 222 } 223 224 // Also try to parse the trace data from a SQL comment 225 match, traceData := parseTraceData(query) 226 227 if match { 228 if traceId, ok := traceData["trace_id"]; ok { 229 ev.Data["trace.trace_id"] = traceId 230 } 231 232 if parentId, ok := traceData["parent_id"]; ok { 233 ev.Data["trace.parent_id"] = parentId 234 } 235 } 236 237 normalizedQuery := normalizer.NormalizeQuery(query) 238 239 ev.Data["query"] = query 240 ev.Data["normalized_query"] = normalizedQuery 241 242 if len(normalizer.LastTables) > 0 { 243 ev.Data["tables"] = strings.Join(normalizer.LastTables, " ") 244 } 245 246 if len(normalizer.LastComments) > 0 { 247 ev.Data["comments"] = "/* " + strings.Join(normalizer.LastComments, " */ /* ") + " */" 248 } 249 250 return ev 251 } 252 253 func parseTraceData(query string) (matched bool, fields map[string]string) { 254 match, _, traceData := parsePrefix(traceFromTraceRegex1, query) 255 256 if match { 257 return match, traceData 258 } 259 260 match, _, traceData = parsePrefix(traceFromTraceRegex2, query) 261 262 if match { 263 return match, traceData 264 } 265 266 match, _, traceData = parsePrefix(traceFromTraceParentRegex, query) 267 268 return match, traceData 269 } 270 271 func isContinuationLine(line string) bool { 272 return strings.HasPrefix(line, "\t") 273 } 274 275 // addFieldsToEvent takes a map of key-value metadata extracted from a log 276 // line, and adds them to the given event. It'll convert values to integer 277 // types where possible, and try to populate the event's timestamp. 278 func addFieldsToEvent(fields map[string]string, ev *event.Event) { 279 for k, v := range fields { 280 // Try to convert values to integer types where sensible, and extract 281 // timestamp for event 282 switch k { 283 case "session_id", "pid", "session_line_number": 284 if typed, err := strconv.Atoi(v); err == nil { 285 ev.Data[k] = typed 286 } else { 287 ev.Data[k] = v 288 } 289 case "timestamp", "timestamp_millis": 290 if timestamp, err := time.Parse("2006-01-02 15:04:05.999 MST", v); err == nil { 291 ev.Timestamp = timestamp 292 } else { 293 logrus.WithField("timestamp", v).WithError(err).Debug("Error parsing query timestamp") 294 } 295 case "timestamp_unix": 296 if typed, err := strconv.Atoi(v); err == nil { 297 // Convert millisecond-resolution Unix timestamp to time.Time 298 // object 299 timestamp := time.Unix(int64(typed/1000), int64((1000*1000)*(typed%1000))).UTC() 300 ev.Timestamp = timestamp 301 } else { 302 logrus.WithField("timestamp", v).WithError(err).Debug("Error parsing query timestamp") 303 } 304 default: 305 ev.Data[k] = v 306 } 307 } 308 } 309 310 func parsePrefix(re *parsers.ExtRegexp, line string) (matched bool, suffix string, fields map[string]string) { 311 prefix, fields := re.FindStringSubmatchMap(line) 312 if prefix == "" { 313 return false, "", nil 314 } 315 return true, line[len(prefix):], fields 316 } 317 318 func buildPrefixRegexp(prefixFormat string) (*parsers.ExtRegexp, error) { 319 prefixFormat = strings.Replace(prefixFormat, "%%", "%", -1) 320 // The %q format specifier means "if this log line isn't part of a session, 321 // stop here." The slow query logs that we care about always come from 322 // sessions, so ignore this. 323 prefixFormat = strings.Replace(prefixFormat, "%q", "", -1) 324 prefixFormat = regexp.QuoteMeta(prefixFormat) 325 for k, v := range prefixValues { 326 prefixFormat = strings.Replace(prefixFormat, k, v.ReString(), -1) 327 } 328 prefixFormat = "^" + prefixFormat 329 330 re, err := regexp.Compile(prefixFormat) 331 if err != nil { 332 return nil, err 333 } 334 return &parsers.ExtRegexp{Regexp: re}, nil 335 }