github.com/honeycombio/honeytail@v1.9.0/parsers/mysql/mysql.go (about)

     1  // Package mysql parses the mysql slow query log
     2  package mysql
     3  
     4  import (
     5  	"database/sql"
     6  	"fmt"
     7  	"math/rand"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	_ "github.com/go-sql-driver/mysql"
    15  	"github.com/honeycombio/mysqltools/query/normalizer"
    16  	"github.com/sirupsen/logrus"
    17  
    18  	"github.com/honeycombio/honeytail/event"
    19  	"github.com/honeycombio/honeytail/httime"
    20  	"github.com/honeycombio/honeytail/parsers"
    21  )
    22  
    23  // See mysql_test for example log entries
    24  // 3 sample log entries
    25  //
    26  // # Time: 2016-04-01T00:31:09.817887Z
    27  // # User@Host: root[root] @ localhost []  Id:   233
    28  // # Query_time: 0.008393  Lock_time: 0.000154 Rows_sent: 1  Rows_examined: 357
    29  // SET timestamp=1459470669;
    30  // show status like 'Uptime';
    31  //
    32  // # Time: 2016-04-01T00:31:09.853523Z
    33  // # User@Host: root[root] @ localhost []  Id:   233
    34  // # Query_time: 0.020424  Lock_time: 0.000147 Rows_sent: 494  Rows_examined: 494
    35  // SET timestamp=1459470669;
    36  // SHOW /*innotop*/ GLOBAL VARIABLES;
    37  //
    38  // # Time: 2016-04-01T00:31:09.856726Z
    39  // # User@Host: root[root] @ localhost []  Id:   233
    40  // # Query_time: 0.000021  Lock_time: 0.000000 Rows_sent: 494  Rows_examined: 494
    41  // SET timestamp=1459470669;
    42  // # administrator command: Ping;
    43  //
    44  // We should ignore the administrator command entry; the stats it presents (eg rows_sent)
    45  // are actually for the previous command
    46  //
    47  // Sample line from RDS
    48  // # administrator command: Prepare;
    49  // # User@Host: root[root] @  [10.0.1.76]  Id: 325920
    50  // # Query_time: 0.000097  Lock_time: 0.000023 Rows_sent: 1  Rows_examined: 1
    51  // SET timestamp=1476127288;
    52  // SELECT * FROM foo WHERE bar=2 AND (baz=104 OR baz=0) ORDER BY baz;
    53  
    54  const (
    55  	rdsStr  = "rds"
    56  	ec2Str  = "ec2"
    57  	selfStr = "self"
    58  
    59  	// Event attributes
    60  	userKey            = "user"
    61  	clientKey          = "client"
    62  	queryTimeKey       = "query_time"
    63  	lockTimeKey        = "lock_time"
    64  	rowsSentKey        = "rows_sent"
    65  	rowsExaminedKey    = "rows_examined"
    66  	rowsAffectedKey    = "rows_affected"
    67  	databaseKey        = "database"
    68  	queryKey           = "query"
    69  	normalizedQueryKey = "normalized_query"
    70  	statementKey       = "statement"
    71  	tablesKey          = "tables"
    72  	commentsKey        = "comments"
    73  	// InnoDB keys (it seems)
    74  	bytesSentKey      = "bytes_sent"
    75  	tmpTablesKey      = "tmp_tables"
    76  	tmpDiskTablesKey  = "tmp_disk_tables"
    77  	tmpTableSizesKey  = "tmp_table_sizes"
    78  	transactionIDKey  = "transaction_id"
    79  	queryCacheHitKey  = "query_cache_hit"
    80  	fullScanKey       = "full_scan"
    81  	fullJoinKey       = "full_join"
    82  	tmpTableKey       = "tmp_table"
    83  	tmpTableOnDiskKey = "tmp_table_on_disk"
    84  	fileSortKey       = "filesort"
    85  	fileSortOnDiskKey = "filesort_on_disk"
    86  	mergePassesKey    = "merge_passes"
    87  	ioROpsKey         = "IO_r_ops"
    88  	ioRBytesKey       = "IO_r_bytes"
    89  	ioRWaitKey        = "IO_r_wait_sec"
    90  	recLockWaitKey    = "rec_lock_wait_sec"
    91  	queueWaitKey      = "queue_wait_sec"
    92  	pagesDistinctKey  = "pages_distinct"
    93  	// Event attributes that apply to the host as a whole
    94  	hostedOnKey   = "hosted_on"
    95  	readOnlyKey   = "read_only"
    96  	replicaLagKey = "replica_lag"
    97  	roleKey       = "role"
    98  )
    99  
   100  var (
   101  	reTime = parsers.ExtRegexp{regexp.MustCompile("^# Time: (?P<time>[^ ]+)Z *$")}
   102  	// older versions of the mysql slow query log use this format for the timestamp
   103  	reOldTime    = parsers.ExtRegexp{regexp.MustCompile("^# Time: (?P<datetime>[0-9]+ [0-9:.]+)")}
   104  	reAdminPing  = parsers.ExtRegexp{regexp.MustCompile("^# administrator command: Ping; *$")}
   105  	reUser       = parsers.ExtRegexp{regexp.MustCompile("^# User@Host: (?P<user>[^#]+) @ (?P<host>[^#]+?)( Id:.+)?$")}
   106  	reQueryStats = parsers.ExtRegexp{regexp.MustCompile("^# Query_time: (?P<queryTime>[0-9.]+) *Lock_time: (?P<lockTime>[0-9.]+) *Rows_sent: (?P<rowsSent>[0-9]+) *Rows_examined: (?P<rowsExamined>[0-9]+)( *Rows_affected: (?P<rowsAffected>[0-9]+))?.*$")}
   107  	// when capturing the log from the wire, you don't get lock time etc., only query time
   108  	reTCPQueryStats    = parsers.ExtRegexp{regexp.MustCompile("^# Query_time: (?P<queryTime>[0-9.]+).*$")}
   109  	reServStats        = parsers.ExtRegexp{regexp.MustCompile("^# Bytes_sent: (?P<bytesSent>[0-9.]+) *Tmp_tables: (?P<tmpTables>[0-9.]+) *Tmp_disk_tables: (?P<tmpDiskTables>[0-9]+) *Tmp_table_sizes: (?P<tmpTableSizes>[0-9]+).*$")}
   110  	reInnodbTrx        = parsers.ExtRegexp{regexp.MustCompile("^# InnoDB_trx_id: (?P<trxId>[A-F0-9]+) *$")}
   111  	reInnodbQueryPlan1 = parsers.ExtRegexp{regexp.MustCompile("^# QC_Hit: (?P<query_cache_hit>[[:alpha:]]+)  Full_scan: (?P<full_scan>[[:alpha:]]+)  Full_join: (?P<full_join>[[:alpha:]]+)  Tmp_table: (?P<tmp_table>[[:alpha:]]+)  Tmp_table_on_disk: (?P<tmp_table_on_disk>[[:alpha:]]+).*$")}
   112  	reInnodbQueryPlan2 = parsers.ExtRegexp{regexp.MustCompile("^# Filesort: (?P<filesort>[[:alpha:]]+)  Filesort_on_disk: (?P<filesort_on_disk>[[:alpha:]]+)  Merge_passes: (?P<merge_passes>[0-9]+).*$")}
   113  	reInnodbUsage1     = parsers.ExtRegexp{regexp.MustCompile("^# +InnoDB_IO_r_ops: (?P<io_r_ops>[0-9]+)  InnoDB_IO_r_bytes: (?P<io_r_bytes>[0-9]+)  InnoDB_IO_r_wait: (?P<io_r_wait>[0-9.]+).*$")}
   114  	reInnodbUsage2     = parsers.ExtRegexp{regexp.MustCompile("^# +InnoDB_rec_lock_wait: (?P<rec_lock_wait>[0-9.]+)  InnoDB_queue_wait: (?P<queue_wait>[0-9.]+).*$")}
   115  	reInnodbUsage3     = parsers.ExtRegexp{regexp.MustCompile("^# +InnoDB_pages_distinct: (?P<pages_distinct>[0-9]+).*")}
   116  	reSetTime          = parsers.ExtRegexp{regexp.MustCompile("^SET timestamp=(?P<unixTime>[0-9]+);$")}
   117  	reUse              = parsers.ExtRegexp{regexp.MustCompile("^(?i)use ")}
   118  
   119  	// if 'flush logs' is run at the mysql prompt (which rds commonly does, apparently) the following shows up in slow query log:
   120  	//   /usr/local/Cellar/mysql/5.7.12/bin/mysqld, Version: 5.7.12 (Homebrew). started with:
   121  	//   Tcp port: 3306  Unix socket: /tmp/mysql.sock
   122  	//   Time                 Id Command    Argument
   123  	reMySQLVersion       = parsers.ExtRegexp{regexp.MustCompile("/.*, Version: .* .*MySQL Community Server.*")}
   124  	reMySQLPortSock      = parsers.ExtRegexp{regexp.MustCompile("Tcp port:.* Unix socket:.*")}
   125  	reMySQLColumnHeaders = parsers.ExtRegexp{regexp.MustCompile("Time.*Id.*Command.*Argument.*")}
   126  )
   127  
   128  const timeFormat = "2006-01-02T15:04:05.000000"
   129  const oldTimeFormat = "010206 15:04:05.999999"
   130  
   131  type Options struct {
   132  	Host          string `long:"host" description:"MySQL host in the format (address:port)"`
   133  	User          string `long:"user" description:"MySQL username"`
   134  	Pass          string `long:"pass" description:"MySQL password"`
   135  	QueryInterval uint   `long:"interval" description:"interval for querying the MySQL DB in seconds"`
   136  
   137  	NumParsers int `hidden:"true" description:"number of MySQL parsers to spin up" yaml:"-"`
   138  }
   139  
   140  type Parser struct {
   141  	// set SampleRate to cause the MySQL parser to drop events after before
   142  	// they're parsed to save CPU
   143  	SampleRate int
   144  
   145  	conf       Options
   146  	wg         sync.WaitGroup
   147  	hostedOn   string
   148  	readOnly   *bool
   149  	replicaLag *int64
   150  	role       *string
   151  }
   152  
   153  // the normalizer can't be shared by all threads.
   154  type perThreadParser struct {
   155  	normalizer *normalizer.Parser
   156  }
   157  
   158  func (p *Parser) Init(options interface{}) error {
   159  	p.conf = *options.(*Options)
   160  	if p.conf.Host != "" {
   161  		url := fmt.Sprintf("%s:%s@tcp(%s)/", p.conf.User, p.conf.Pass, p.conf.Host)
   162  		db, err := sql.Open("mysql", url)
   163  		if err != nil {
   164  			logrus.Fatal(err)
   165  		}
   166  
   167  		// run one time queries
   168  		hostedOn, err := getHostedOn(db)
   169  		if err != nil {
   170  			logrus.WithError(err).Warn("failed to get host env")
   171  		}
   172  		p.hostedOn = hostedOn
   173  
   174  		role, err := getRole(db)
   175  		if err != nil {
   176  			logrus.WithError(err).Warn("failed to get role")
   177  		}
   178  		p.role = role
   179  
   180  		// update hostedOn and readOnly every <n> seconds
   181  		queryInterval := p.conf.QueryInterval
   182  		if queryInterval == 0 {
   183  			queryInterval = 30
   184  		}
   185  		go func() {
   186  			defer db.Close()
   187  			ticker := time.NewTicker(time.Second * time.Duration(queryInterval))
   188  			for _ = range ticker.C {
   189  				readOnly, err := getReadOnly(db)
   190  				if err != nil {
   191  					logrus.WithError(err).Warn("failed to get read-only state")
   192  				}
   193  				p.readOnly = readOnly
   194  
   195  				replicaLag, err := getReplicaLag(db)
   196  				if err != nil {
   197  					logrus.WithError(err).Warn("failed to get replica lag")
   198  				}
   199  				p.replicaLag = replicaLag
   200  			}
   201  		}()
   202  	}
   203  	return nil
   204  }
   205  
   206  func getReadOnly(db *sql.DB) (*bool, error) {
   207  	rows, err := db.Query("SELECT @@global.read_only;")
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	defer rows.Close()
   212  
   213  	var value bool
   214  
   215  	rows.Next()
   216  	if err := rows.Scan(&value); err != nil {
   217  		logrus.WithError(err).Warn("failed to get read-only state")
   218  		return nil, err
   219  	}
   220  	if err := rows.Err(); err != nil {
   221  		logrus.WithError(err).Warn("failed to get read-only state")
   222  		return nil, err
   223  	}
   224  	return &value, nil
   225  }
   226  
   227  func getHostedOn(db *sql.DB) (string, error) {
   228  	rows, err := db.Query("SHOW GLOBAL VARIABLES WHERE Variable_name = 'basedir';")
   229  	if err != nil {
   230  		return "", err
   231  	}
   232  	defer rows.Close()
   233  
   234  	var varName, value string
   235  
   236  	for rows.Next() {
   237  		if err := rows.Scan(&varName, &value); err != nil {
   238  			return "", err
   239  		}
   240  		if strings.HasPrefix(value, "/rdsdbbin/") {
   241  			return rdsStr, nil
   242  		}
   243  	}
   244  
   245  	// TODO: implement ec2 detection
   246  
   247  	if err := rows.Err(); err != nil {
   248  		return "", err
   249  	}
   250  	return "", nil
   251  }
   252  
   253  func getReplicaLag(db *sql.DB) (*int64, error) {
   254  	rows, err := db.Query("SHOW SLAVE STATUS")
   255  	if err != nil {
   256  		return nil, err
   257  	}
   258  	defer rows.Close()
   259  	if !rows.Next() {
   260  		return nil, err
   261  	}
   262  
   263  	columns, _ := rows.Columns()
   264  	values := make([]interface{}, len(columns))
   265  	for i := range values {
   266  		var v sql.RawBytes
   267  		values[i] = &v
   268  	}
   269  
   270  	err = rows.Scan(values...)
   271  	if err != nil {
   272  		return nil, err
   273  	}
   274  
   275  	for i, name := range columns {
   276  		bp := values[i].(*sql.RawBytes)
   277  		vs := string(*bp)
   278  		if name == "Seconds_Behind_Master" {
   279  			vi, err := strconv.ParseInt(vs, 10, 64)
   280  			if err != nil {
   281  				return nil, err
   282  			}
   283  			return &vi, nil
   284  		}
   285  	}
   286  	return nil, nil
   287  }
   288  
   289  func getRole(db *sql.DB) (*string, error) {
   290  	var res string
   291  	rows, err := db.Query("SHOW MASTER STATUS")
   292  	if err != nil {
   293  		return nil, err
   294  	}
   295  	defer rows.Close()
   296  	if !rows.Next() {
   297  		res = "secondary"
   298  		return nil, err
   299  	}
   300  	res = "primary"
   301  	return &res, nil
   302  }
   303  
   304  func isMySQLHeaderLine(line string) bool {
   305  	if len(line) == 0 {
   306  		return false
   307  	}
   308  	first := line[0]
   309  	return (first == '/' && reMySQLVersion.MatchString(line)) ||
   310  		(first == 'T' && reMySQLPortSock.MatchString(line)) ||
   311  		(first == 'T' && reMySQLColumnHeaders.MatchString(line))
   312  }
   313  
   314  func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) {
   315  	// start up a goroutine to handle grouped sets of lines
   316  	rawEvents := make(chan []string)
   317  	defer p.wg.Wait()
   318  	p.wg.Add(1)
   319  	go p.handleEvents(rawEvents, send)
   320  
   321  	// flag to indicate when we've got a complete event to send
   322  	var foundStatement bool
   323  	groupedLines := make([]string, 0, 5)
   324  	for line := range lines {
   325  		line = strings.TrimSpace(line)
   326  		// mysql parser does not support capturing fields in the line prefix - just
   327  		// strip it.
   328  		if prefixRegex != nil {
   329  			var prefix string
   330  			prefix = prefixRegex.FindString(line)
   331  			line = strings.TrimPrefix(line, prefix)
   332  		}
   333  
   334  		lineIsComment := strings.HasPrefix(line, "# ")
   335  		if !lineIsComment && !isMySQLHeaderLine(line) {
   336  			// we've finished the comments before the statement and now should slurp
   337  			// lines until the next comment
   338  			foundStatement = true
   339  		} else {
   340  			if foundStatement {
   341  				// we've started a new event. Send the previous one.
   342  				foundStatement = false
   343  				// if sampling is disabled or sampler says keep, pass along this group.
   344  				if p.SampleRate <= 1 || rand.Intn(p.SampleRate) == 0 {
   345  					rawEvents <- groupedLines
   346  				}
   347  				groupedLines = make([]string, 0, 5)
   348  			}
   349  		}
   350  		groupedLines = append(groupedLines, line)
   351  	}
   352  	// send the last event, if there was one collected
   353  	if foundStatement {
   354  		// if sampling is disabled or sampler says keep, pass along this group.
   355  		if p.SampleRate <= 1 || rand.Intn(p.SampleRate) == 0 {
   356  			rawEvents <- groupedLines
   357  		}
   358  	}
   359  	logrus.Debug("lines channel is closed, ending mysql processor")
   360  	close(rawEvents)
   361  }
   362  
   363  func (p *Parser) handleEvents(rawEvents <-chan []string, send chan<- event.Event) {
   364  	defer p.wg.Done()
   365  	wg := sync.WaitGroup{}
   366  	numParsers := 1
   367  	if p.conf.NumParsers > 0 {
   368  		numParsers = p.conf.NumParsers
   369  	}
   370  	for i := 0; i < numParsers; i++ {
   371  		ptp := perThreadParser{
   372  			normalizer: &normalizer.Parser{},
   373  		}
   374  		wg.Add(1)
   375  		go func() {
   376  			for rawE := range rawEvents {
   377  				sq, timestamp := p.handleEvent(&ptp, rawE)
   378  				if len(sq) == 0 {
   379  					continue
   380  				}
   381  				if q, ok := sq["query"]; !ok || q == "" {
   382  					// skip events with no query field
   383  					continue
   384  				}
   385  				if p.hostedOn != "" {
   386  					sq[hostedOnKey] = p.hostedOn
   387  				}
   388  				if p.readOnly != nil {
   389  					sq[readOnlyKey] = *p.readOnly
   390  				}
   391  				if p.replicaLag != nil {
   392  					sq[replicaLagKey] = *p.replicaLag
   393  				}
   394  				if p.role != nil {
   395  					sq[roleKey] = *p.role
   396  				}
   397  				send <- event.Event{
   398  					Timestamp:  timestamp,
   399  					SampleRate: p.SampleRate,
   400  					Data:       sq,
   401  				}
   402  			}
   403  			wg.Done()
   404  		}()
   405  	}
   406  	wg.Wait()
   407  	logrus.Debug("done with mysql handleEvents")
   408  }
   409  
   410  // Parse a set of MySQL log lines that seem to represent a single event and
   411  // return a struct of extracted data as well as the highest-resolution timestamp
   412  // available.
   413  func (p *Parser) handleEvent(ptp *perThreadParser, rawE []string) (
   414  	map[string]interface{}, time.Time) {
   415  	sq := map[string]interface{}{}
   416  	if len(rawE) == 0 {
   417  		return sq, time.Time{}
   418  	}
   419  	var (
   420  		timeFromComment time.Time
   421  		timeFromSet     int64
   422  		query           = ""
   423  	)
   424  	for _, line := range rawE {
   425  		// parse each line and populate the map of attributes
   426  		if _, mg := reTime.FindStringSubmatchMap(line); mg != nil {
   427  			timeFromComment, _ = httime.Parse(timeFormat, mg["time"])
   428  		} else if _, mg := reOldTime.FindStringSubmatchMap(line); mg != nil {
   429  			timeFromComment, _ = httime.Parse(oldTimeFormat, mg["datetime"])
   430  		} else if reAdminPing.MatchString(line) {
   431  			// this event is an administrative ping and we should
   432  			// ignore the entire event
   433  			logrus.WithFields(logrus.Fields{
   434  				"line":  line,
   435  				"event": rawE,
   436  			}).Debug("readmin ping detected; skipping this event")
   437  			return nil, time.Time{}
   438  		} else if _, mg := reUser.FindStringSubmatchMap(line); mg != nil {
   439  			query = ""
   440  			sq[userKey] = strings.Split(mg["user"], "[")[0]
   441  			sq[clientKey] = strings.TrimSpace(mg["host"])
   442  		} else if _, mg := reQueryStats.FindStringSubmatchMap(line); mg != nil {
   443  			query = ""
   444  			if queryTime, err := strconv.ParseFloat(mg["queryTime"], 64); err == nil {
   445  				sq[queryTimeKey] = queryTime
   446  			}
   447  			if lockTime, err := strconv.ParseFloat(mg["lockTime"], 64); err == nil {
   448  				sq[lockTimeKey] = lockTime
   449  			}
   450  			if rowsSent, err := strconv.Atoi(mg["rowsSent"]); err == nil {
   451  				sq[rowsSentKey] = rowsSent
   452  			}
   453  			if rowsExamined, err := strconv.Atoi(mg["rowsExamined"]); err == nil {
   454  				sq[rowsExaminedKey] = rowsExamined
   455  			}
   456  			if rowsAffected, err := strconv.Atoi(mg["rowsAffected"]); err == nil {
   457  				sq[rowsAffectedKey] = rowsAffected
   458  			}
   459  		} else if _, mg := reTCPQueryStats.FindStringSubmatchMap(line); mg != nil {
   460  			query = ""
   461  			if queryTime, err := strconv.ParseFloat(mg["queryTime"], 64); err == nil {
   462  				sq[queryTimeKey] = queryTime
   463  			}
   464  		} else if _, mg := reServStats.FindStringSubmatchMap(line); mg != nil {
   465  			query = ""
   466  			if bytesSent, err := strconv.Atoi(mg["bytesSent"]); err == nil {
   467  				sq[bytesSentKey] = bytesSent
   468  			}
   469  			if tmpTables, err := strconv.Atoi(mg["tmpTables"]); err == nil {
   470  				sq[tmpTablesKey] = tmpTables
   471  			}
   472  			if tmpDiskTables, err := strconv.Atoi(mg["tmpDiskTables"]); err == nil {
   473  				sq[tmpDiskTablesKey] = tmpDiskTables
   474  			}
   475  			if tmpTableSizes, err := strconv.Atoi(mg["tmpTableSizes"]); err == nil {
   476  				sq[tmpTableSizesKey] = tmpTableSizes
   477  			}
   478  		} else if _, mg := reInnodbQueryPlan1.FindStringSubmatchMap(line); mg != nil {
   479  			sq[queryCacheHitKey] = mg["query_cache_hit"] == "Yes"
   480  			sq[fullScanKey] = mg["full_scan"] == "Yes"
   481  			sq[fullJoinKey] = mg["full_join"] == "Yes"
   482  			sq[tmpTableKey] = mg["tmp_table"] == "Yes"
   483  			sq[tmpTableOnDiskKey] = mg["tmp_table_on_disk"] == "Yes"
   484  		} else if _, mg := reInnodbQueryPlan2.FindStringSubmatchMap(line); mg != nil {
   485  			sq[fileSortKey] = mg["filesort"] == "Yes"
   486  			sq[fileSortOnDiskKey] = mg["filesort_on_disk"] == "Yes"
   487  			if mergePasses, err := strconv.Atoi(mg["merge_passes"]); err == nil {
   488  				sq[mergePassesKey] = mergePasses
   489  			}
   490  		} else if _, mg := reInnodbUsage1.FindStringSubmatchMap(line); mg != nil {
   491  			if ioROps, err := strconv.Atoi(mg["io_r_ops"]); err == nil {
   492  				sq[ioROpsKey] = ioROps
   493  			}
   494  			if ioRBytes, err := strconv.Atoi(mg["io_r_bytes"]); err == nil {
   495  				sq[ioRBytesKey] = ioRBytes
   496  			}
   497  			if ioRWait, err := strconv.ParseFloat(mg["io_r_wait"], 64); err == nil {
   498  				sq[ioRWaitKey] = ioRWait
   499  			}
   500  		} else if _, mg := reInnodbUsage2.FindStringSubmatchMap(line); mg != nil {
   501  			if recLockWait, err := strconv.ParseFloat(mg["rec_lock_wait"], 64); err == nil {
   502  				sq[recLockWaitKey] = recLockWait
   503  			}
   504  			if queueWait, err := strconv.ParseFloat(mg["queue_wait"], 64); err == nil {
   505  				sq[queueWaitKey] = queueWait
   506  			}
   507  		} else if _, mg := reInnodbUsage3.FindStringSubmatchMap(line); mg != nil {
   508  			if pagesDistinct, err := strconv.Atoi(mg["pages_distinct"]); err == nil {
   509  				sq[pagesDistinctKey] = pagesDistinct
   510  			}
   511  		} else if _, mg := reInnodbTrx.FindStringSubmatchMap(line); mg != nil {
   512  			sq[transactionIDKey] = mg["trxId"]
   513  		} else if match := reUse.FindString(line); match != "" {
   514  			query = ""
   515  			db := strings.TrimPrefix(line, match)
   516  			db = strings.TrimRight(db, ";")
   517  			db = strings.Trim(db, "`")
   518  			sq[databaseKey] = db
   519  			// Use this line as the query/normalized_query unless, if a real query follows it will be replaced.
   520  			sq[queryKey] = strings.TrimRight(line, ";")
   521  			sq[normalizedQueryKey] = sq[queryKey]
   522  		} else if _, mg := reSetTime.FindStringSubmatchMap(line); mg != nil {
   523  			query = ""
   524  			timeFromSet, _ = strconv.ParseInt(mg["unixTime"], 10, 64)
   525  		} else if isMySQLHeaderLine(line) {
   526  			// ignore and skip the header lines
   527  		} else if !strings.HasPrefix(line, "# ") {
   528  			// treat any other line that doesn't start with '# ' as part of the
   529  			// query
   530  			query = query + " " + line
   531  			if strings.HasSuffix(query, ";") {
   532  				q := strings.TrimSpace(strings.TrimSuffix(query, ";"))
   533  				sq[queryKey] = q
   534  				sq[normalizedQueryKey] = ptp.normalizer.NormalizeQuery(q)
   535  				if len(ptp.normalizer.LastTables) > 0 {
   536  					sq[tablesKey] = strings.Join(ptp.normalizer.LastTables, " ")
   537  				}
   538  				if len(ptp.normalizer.LastComments) > 0 {
   539  					sq[commentsKey] = "/* " + strings.Join(ptp.normalizer.LastComments, " */ /* ") + " */"
   540  				}
   541  				sq[statementKey] = ptp.normalizer.LastStatement
   542  				query = ""
   543  			}
   544  		} else {
   545  			// unknown row; log and skip
   546  			logrus.WithFields(logrus.Fields{
   547  				"line": line,
   548  			}).Debug("No regex match for line in the middle of a query. skipping")
   549  		}
   550  	}
   551  
   552  	// We always need a timestamp.
   553  	//
   554  	// timeFromComment may include millisecond resolution but doesn't include
   555  	//   time zone.
   556  	// timeFromSet is a UNIX timestamp and thus more reliable, but also (thus)
   557  	//   doesn't contain millisecond resolution.
   558  	//
   559  	// In the best case (we have both), we combine the two; in the worst case (we
   560  	//   have neither) we fall back to "now."
   561  	combinedTime := httime.Now()
   562  	if !timeFromComment.IsZero() && timeFromSet > 0 {
   563  		nanos := time.Duration(timeFromComment.Nanosecond())
   564  		combinedTime = time.Unix(timeFromSet, 0).Add(nanos)
   565  	} else if !timeFromComment.IsZero() {
   566  		combinedTime = timeFromComment // cross our fingers that UTC is ok
   567  	} else if timeFromSet > 0 {
   568  		combinedTime = time.Unix(timeFromSet, 0)
   569  	}
   570  
   571  	return sq, combinedTime
   572  }
   573  
   574  // custom error to indicate empty query
   575  type emptyQueryError struct {
   576  	err string
   577  }
   578  
   579  func (e *emptyQueryError) Error() string {
   580  	e.err = "skipped slow query"
   581  	return e.err
   582  }