github.com/honeycombio/honeytail@v1.9.0/parsers/mysql/mysql_test.go (about)

     1  package mysql
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"math/rand"
     7  	"reflect"
     8  	"strings"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/honeycombio/honeytail/event"
    13  	"github.com/honeycombio/honeytail/httime"
    14  	"github.com/honeycombio/honeytail/httime/httimetest"
    15  	"github.com/honeycombio/mysqltools/query/normalizer"
    16  )
    17  
    18  var (
    19  	t1, t2, t3, tUnparseable time.Time
    20  	sqds                     []slowQueryData
    21  )
    22  
    23  type slowQueryData struct {
    24  	rawE      []string
    25  	sq        map[string]interface{}
    26  	timestamp time.Time
    27  }
    28  
    29  const (
    30  	hostLine  = "# User@Host: root[root] @ localhost [127.0.0.1]  Id:   136"
    31  	timerLine = "# Query_time: 0.000171  Lock_time: 0.000000 Rows_sent: 1  Rows_examined: 0"
    32  	useLine   = "use honeycomb;"
    33  )
    34  
    35  func init() {
    36  	var err error
    37  	fakeNow, err := time.Parse("02/Jan/2006:15:04:05.000000 -0700", "02/Aug/2010:13:24:56.000000 -0000")
    38  	if err != nil {
    39  		log.Fatal(err)
    40  	}
    41  	t1, err = time.Parse("02/Jan/2006:15:04:05.000000", "01/Apr/2016:00:31:09.817887")
    42  	if err != nil {
    43  		log.Fatal(err)
    44  	}
    45  	t2, err = time.Parse("02/Jan/2006:15:04:05.000000", "01/Apr/2016:00:31:10.817887")
    46  	if err != nil {
    47  		log.Fatal(err)
    48  	}
    49  	t3, err = time.Parse("02/Jan/2006:15:04:05", "01/Apr/2016:00:31:09")
    50  	if err != nil {
    51  		log.Fatal(err)
    52  	}
    53  	httime.DefaultNower = &httimetest.FakeNower{fakeNow}
    54  	tUnparseable = httime.Now()
    55  
    56  	// `Time` field has ms resolution but no time zone; `SET timestamp=` is UNIX timestamp but no ms resolution
    57  	// e: mysql… i guess we could/should just combine the unix timestamp second and the parsed timestamp ms
    58  	// justify parsing both
    59  	// could be terrible
    60  
    61  	sqds = []slowQueryData{
    62  		{ /* 0 */
    63  			rawE: []string{
    64  				"# Time: 2016-04-01T00:31:09.817887Z",
    65  				"# Query_time: 0.008393  Lock_time: 0.000154 Rows_sent: 1  Rows_examined: 357",
    66  			},
    67  			sq: map[string]interface{}{
    68  				queryTimeKey:    0.008393,
    69  				lockTimeKey:     0.000154,
    70  				rowsSentKey:     1,
    71  				rowsExaminedKey: 357,
    72  			},
    73  			timestamp: t1,
    74  		},
    75  		{ /* 1 */
    76  			rawE: []string{
    77  				"# Time: not-a-parsable-time-stampZ",
    78  				"# User@Host: someuser @ hostfoo [192.168.2.1]  Id:   666",
    79  			},
    80  			sq: map[string]interface{}{
    81  				userKey:   "someuser",
    82  				clientKey: "hostfoo [192.168.2.1]",
    83  			},
    84  			timestamp: tUnparseable,
    85  		},
    86  		{ /* 2 */
    87  			rawE: []string{
    88  				"# Time: not-a-parsable-time-stampZ",
    89  				"# User@Host: root @ localhost []  Id:   233",
    90  			},
    91  			sq: map[string]interface{}{
    92  				userKey:   "root",
    93  				clientKey: "localhost []",
    94  			},
    95  			timestamp: tUnparseable,
    96  		},
    97  		{ /* 3 */
    98  			rawE: []string{
    99  				"# Time: not-a-parsable-time-stampZ",
   100  				"# User@Host: root @ []  Id:   233",
   101  			},
   102  			sq: map[string]interface{}{
   103  				userKey:   "root",
   104  				clientKey: "[]",
   105  			},
   106  			timestamp: tUnparseable,
   107  		},
   108  		{ /* 4 */
   109  			// RDS style user host line
   110  			rawE: []string{
   111  				"# User@Host: root[root] @  [10.0.1.76]  Id: 325920",
   112  			},
   113  			sq: map[string]interface{}{
   114  				userKey:   "root",
   115  				clientKey: "[10.0.1.76]",
   116  			},
   117  			timestamp: tUnparseable,
   118  		},
   119  		{ /* 5 */
   120  			// RDS style user host line with hostname
   121  			rawE: []string{
   122  				"# User@Host: root[root] @ foobar [10.0.1.76]  Id: 325920",
   123  			},
   124  			sq: map[string]interface{}{
   125  				userKey:   "root",
   126  				clientKey: "foobar [10.0.1.76]",
   127  			},
   128  			timestamp: tUnparseable,
   129  		},
   130  		{ /* 6 */
   131  			rawE: []string{
   132  				"# Time: not-a-recognizable time stamp",
   133  				"# administrator command: Ping;",
   134  			},
   135  			sq:        nil,
   136  			timestamp: time.Time{},
   137  		},
   138  		{ /* 7 */
   139  			rawE: []string{
   140  				"# Time: not-a-parsable-time-stampZ",
   141  				"SET timestamp=1459470669;",
   142  				"show status like 'Uptime';",
   143  			},
   144  			sq: map[string]interface{}{
   145  				queryKey:           "show status like 'Uptime'",
   146  				normalizedQueryKey: "show status like ?",
   147  				statementKey:       "",
   148  			},
   149  			timestamp: t1.Truncate(time.Second),
   150  		},
   151  		{ /* 8 */
   152  			// fails to parse "Time" but we capture unix time and we fall back to the scan normalizer
   153  			rawE: []string{
   154  				"# Time: not-a-parsable-time-stampZ",
   155  				"SET timestamp=1459470669;",
   156  				"SELECT * FROM (SELECT  T1.orderNumber,  STATUS,  SUM(quantityOrdered * priceEach) AS  total FROM orders WHERE total > 1000 AS T1 INNER JOIN orderdetails AS T2 ON T1.orderNumber = T2.orderNumber GROUP BY  orderNumber) T WHERE total > 100;",
   157  			},
   158  			sq: map[string]interface{}{
   159  				queryKey:           "SELECT * FROM (SELECT  T1.orderNumber,  STATUS,  SUM(quantityOrdered * priceEach) AS  total FROM orders WHERE total > 1000 AS T1 INNER JOIN orderdetails AS T2 ON T1.orderNumber = T2.orderNumber GROUP BY  orderNumber) T WHERE total > 100",
   160  				normalizedQueryKey: "select * from (select t1.ordernumber, status, sum(quantityordered * priceeach) as total from orders where total > ? as t1 inner join orderdetails as t2 on t1.ordernumber = t2.ordernumber group by ordernumber) t where total > ?",
   161  				statementKey:       "",
   162  			},
   163  			timestamp: t1.Truncate(time.Second),
   164  		},
   165  		{ /* 9 */
   166  			rawE: []string{
   167  				"# Time: not-a-parsable-time-stampZ",
   168  				"SET timestamp=1459470669;",
   169  				"SELECT * FROM orders WHERE total > 1000;",
   170  			},
   171  			sq: map[string]interface{}{
   172  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   173  				normalizedQueryKey: "select * from orders where total > ?",
   174  				tablesKey:          "orders",
   175  				statementKey:       "select",
   176  			},
   177  			timestamp: t1.Truncate(time.Second),
   178  		},
   179  		{ /* 10 */
   180  			// query spans multiple lines
   181  			rawE: []string{
   182  				"# Time: not-a-parsable-time-stampZ",
   183  				"SET timestamp=1459470669;",
   184  				"SELECT *",
   185  				"FROM orders WHERE",
   186  				"total > 1000;",
   187  			},
   188  			sq: map[string]interface{}{
   189  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   190  				normalizedQueryKey: "select * from orders where total > ?",
   191  				tablesKey:          "orders",
   192  				statementKey:       "select",
   193  			},
   194  			timestamp: t1.Truncate(time.Second),
   195  		},
   196  		{ /* 11 */
   197  			rawE: []string{
   198  				"# Time: not-a-parsable-time-stampZ",
   199  				"SET timestamp=1459470669;",
   200  				"use someDB;",
   201  			},
   202  			sq: map[string]interface{}{
   203  				databaseKey:        "someDB",
   204  				queryKey:           "use someDB",
   205  				normalizedQueryKey: "use someDB",
   206  			},
   207  			timestamp: t1.Truncate(time.Second),
   208  		},
   209  		{ /* 12 */
   210  			// a use as well as query
   211  			rawE: []string{
   212  				"# Time: not-a-parsable-time-stampZ",
   213  				"SET timestamp=1459470669;",
   214  				"use someDB;",
   215  				"SELECT *",
   216  				"FROM orders WHERE",
   217  				"total > 1000;",
   218  			},
   219  			sq: map[string]interface{}{
   220  				databaseKey:        "someDB",
   221  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   222  				normalizedQueryKey: "select * from orders where total > ?",
   223  				tablesKey:          "orders",
   224  				statementKey:       "select",
   225  			},
   226  			timestamp: t1.Truncate(time.Second),
   227  		},
   228  		// some tests for corrupted logs
   229  		{ /* 13 */
   230  			// invalid query + use + query, ignore the invalid query
   231  			rawE: []string{
   232  				"# Time: not-a-parsable-time-stampZ",
   233  				"SET timestamp=1459470669;",
   234  				"SELECT *",
   235  				"use someDB;",
   236  				"SELECT *",
   237  				"FROM orders WHERE",
   238  				"total > 1000;",
   239  			},
   240  			sq: map[string]interface{}{
   241  				databaseKey:        "someDB",
   242  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   243  				normalizedQueryKey: "select * from orders where total > ?",
   244  				tablesKey:          "orders",
   245  				statementKey:       "select",
   246  			},
   247  			timestamp: t1.Truncate(time.Second),
   248  		},
   249  		{ /* 14 */
   250  			// invalid query + set time + query, ignore the invalid query
   251  			rawE: []string{
   252  				"# Time: not-a-parsable-time-stampZ",
   253  				"SET timestamp=1459470669;",
   254  				"SELECT * FROM orders WHERE",
   255  				"SET timestamp=1459470670;",
   256  				"SELECT * FROM orders WHERE total > 1000;",
   257  			},
   258  			sq: map[string]interface{}{
   259  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   260  				normalizedQueryKey: "select * from orders where total > ?",
   261  				tablesKey:          "orders",
   262  				statementKey:       "select",
   263  			},
   264  			timestamp: t2.Truncate(time.Second),
   265  		},
   266  		{ /* 15 */
   267  			// query + query_time comment + query, ignore the first query
   268  			rawE: []string{
   269  				"# Time: 2016-04-01T00:31:09.817887Z",
   270  				"SELECT * FROM orders WHERE total < 1000;",
   271  				"# Query_time: 0.008393  Lock_time: 0.000154 Rows_sent: 1  Rows_examined: 357",
   272  				"SELECT * FROM orders WHERE total > 1000;",
   273  			},
   274  			sq: map[string]interface{}{
   275  				queryTimeKey:       0.008393,
   276  				lockTimeKey:        0.000154,
   277  				rowsSentKey:        1,
   278  				rowsExaminedKey:    357,
   279  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   280  				normalizedQueryKey: "select * from orders where total > ?",
   281  				tablesKey:          "orders",
   282  				statementKey:       "select",
   283  			},
   284  			timestamp: t1,
   285  		},
   286  		{ /* 16 */
   287  			// invalid query + user@host comment + query, ignore the invalid query
   288  			rawE: []string{
   289  				"# Time: 2016-04-01T00:31:09.817887Z",
   290  				"SELECT * FROM orders WHERE",
   291  				"# User@Host: someuser @ hostfoo [192.168.2.1]  Id:   666",
   292  				"SELECT * FROM orders WHERE total > 1000;",
   293  			},
   294  			sq: map[string]interface{}{
   295  				userKey:            "someuser",
   296  				clientKey:          "hostfoo [192.168.2.1]",
   297  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   298  				normalizedQueryKey: "select * from orders where total > ?",
   299  				tablesKey:          "orders",
   300  				statementKey:       "select",
   301  			},
   302  			timestamp: t1,
   303  		},
   304  		{ /* 17 */
   305  			// query with a comment
   306  			rawE: []string{
   307  				"# Time: 2016-04-01T00:31:09.817887Z",
   308  				"# User@Host: someuser @ hostfoo [192.168.2.1]  Id:   666",
   309  				"SELECT /* from mysql.go:245 */ /* another comment */ * FROM orders WHERE total > 1000;",
   310  			},
   311  			sq: map[string]interface{}{
   312  				userKey:            "someuser",
   313  				clientKey:          "hostfoo [192.168.2.1]",
   314  				queryKey:           "SELECT /* from mysql.go:245 */ /* another comment */ * FROM orders WHERE total > 1000",
   315  				normalizedQueryKey: "select * from orders where total > ?",
   316  				tablesKey:          "orders",
   317  				statementKey:       "select",
   318  				commentsKey:        "/* from mysql.go:245 */ /* another comment */",
   319  			},
   320  			timestamp: t1,
   321  		},
   322  		{ /* 18 */
   323  			// query without its last line
   324  			rawE: []string{
   325  				"# Time: 2016-04-01T00:31:09.817887Z",
   326  				"SELECT * FROM orders",
   327  			},
   328  			sq:        map[string]interface{}{},
   329  			timestamp: t1,
   330  		},
   331  		{ /* 19 */
   332  			rawE: []string{},
   333  			sq:   map[string]interface{}{},
   334  		},
   335  		{ /* 20 */
   336  			rawE: []string{
   337  				"# Time: 160907  3:10:22",
   338  				"# User@Host: rw[rw] @  [10.96.81.110]  Id: 1394495950",
   339  				"# Schema: our_index  Last_errno: 0  Killed: 0",
   340  				"# Query_time: 1.294391  Lock_time: 0.000119  Rows_sent: 4049  Rows_examined: 4049  Rows_affected: 1",
   341  				"# Bytes_sent: 153824  Tmp_tables: 1  Tmp_disk_tables: 2  Tmp_table_sizes: 3",
   342  				"# InnoDB_trx_id: A569C193C7",
   343  				"# QC_Hit: No  Full_scan: No  Full_join: No  Tmp_table: No  Tmp_table_on_disk: No",
   344  				"# Filesort: No  Filesort_on_disk: No  Merge_passes: 0",
   345  				"#   InnoDB_IO_r_ops: 0  InnoDB_IO_r_bytes: 0  InnoDB_IO_r_wait: 0.000000",
   346  				"#   InnoDB_rec_lock_wait: 0.000000  InnoDB_queue_wait: 0.000000",
   347  				"#   InnoDB_pages_distinct: 6756",
   348  				"SET timestamp=1473217822;",
   349  				"/* [vreegU1vU6FPXHBnW6OU_DalWUR8] [our_index_A] [/dynamic_sitemaps.php] */ SELECT * FROM `cats_index` as `Cat_Cat` WHERE `Cat_Cat`.`cat_id` BETWEEN 9670064 AND 9680063 ORDER BY `Cat_Cat`.`cat_id`;",
   350  			},
   351  			sq: map[string]interface{}{
   352  				userKey:            "rw",
   353  				clientKey:          "[10.96.81.110]",
   354  				queryTimeKey:       1.294391,
   355  				lockTimeKey:        0.000119,
   356  				rowsSentKey:        4049,
   357  				rowsExaminedKey:    4049,
   358  				rowsAffectedKey:    1,
   359  				queryKey:           "/* [vreegU1vU6FPXHBnW6OU_DalWUR8] [our_index_A] [/dynamic_sitemaps.php] */ SELECT * FROM `cats_index` as `Cat_Cat` WHERE `Cat_Cat`.`cat_id` BETWEEN 9670064 AND 9680063 ORDER BY `Cat_Cat`.`cat_id`",
   360  				normalizedQueryKey: "select * from `cats_index` as `cat_cat` where `cat_cat`.`cat_id` between ? and ? order by `cat_cat`.`cat_id`",
   361  				statementKey:       "select",
   362  				tablesKey:          "cat_cat cats_index",
   363  				bytesSentKey:       153824,
   364  				tmpTablesKey:       1,
   365  				tmpDiskTablesKey:   2,
   366  				tmpTableSizesKey:   3,
   367  				transactionIDKey:   "A569C193C7",
   368  				queryCacheHitKey:   false,
   369  				fullScanKey:        false,
   370  				fullJoinKey:        false,
   371  				tmpTableKey:        false,
   372  				tmpTableOnDiskKey:  false,
   373  				fileSortKey:        false,
   374  				fileSortOnDiskKey:  false,
   375  				mergePassesKey:     0,
   376  				ioROpsKey:          0,
   377  				ioRBytesKey:        0,
   378  				ioRWaitKey:         0.0,
   379  				recLockWaitKey:     0.0,
   380  				queueWaitKey:       0.0,
   381  				pagesDistinctKey:   6756,
   382  			},
   383  			timestamp: time.Unix(1473217822, 0),
   384  		},
   385  		{ /* 21 */
   386  			rawE: []string{
   387  				"# Time: 160907  3:10:22",
   388  				"# User@Host: rw[rw] @  [10.96.81.110]  Id: 1394495950",
   389  				"# Schema: our_index  Last_errno: 0  Killed: 0",
   390  				"# Query_time: 1.294391  Lock_time: 0.000119  Rows_sent: 4049  Rows_examined: 4049  Rows_affected: 1",
   391  				"# Bytes_sent: 153824  Tmp_tables: 1  Tmp_disk_tables: 2  Tmp_table_sizes: 3",
   392  				"# InnoDB_trx_id: A569C193C7",
   393  				"# QC_Hit: Yes  Full_scan: Yes  Full_join: Yes  Tmp_table: Yes  Tmp_table_on_disk: Yes",
   394  				"# Filesort: Yes  Filesort_on_disk: Yes  Merge_passes: 42",
   395  				"#   InnoDB_IO_r_ops: 1  InnoDB_IO_r_bytes: 2  InnoDB_IO_r_wait: 3.000000",
   396  				"#   InnoDB_rec_lock_wait: 4.000000  InnoDB_queue_wait: 5.000000",
   397  				"#   InnoDB_pages_distinct: 6756",
   398  				"SET timestamp=1473217822;",
   399  				"/* [vreegU1vU6FPXHBnW6OU_DalWUR8] [our_index_A] [/dynamic_sitemaps.php] */ SELECT * FROM `cats_index` as `Cat_Cat` WHERE `Cat_Cat`.`cat_id` BETWEEN 9670064 AND 9680063 ORDER BY `Cat_Cat`.`cat_id`;",
   400  			},
   401  			sq: map[string]interface{}{
   402  				userKey:            "rw",
   403  				clientKey:          "[10.96.81.110]",
   404  				queryTimeKey:       1.294391,
   405  				lockTimeKey:        0.000119,
   406  				rowsSentKey:        4049,
   407  				rowsExaminedKey:    4049,
   408  				rowsAffectedKey:    1,
   409  				queryKey:           "/* [vreegU1vU6FPXHBnW6OU_DalWUR8] [our_index_A] [/dynamic_sitemaps.php] */ SELECT * FROM `cats_index` as `Cat_Cat` WHERE `Cat_Cat`.`cat_id` BETWEEN 9670064 AND 9680063 ORDER BY `Cat_Cat`.`cat_id`",
   410  				normalizedQueryKey: "select * from `cats_index` as `cat_cat` where `cat_cat`.`cat_id` between ? and ? order by `cat_cat`.`cat_id`",
   411  				statementKey:       "select",
   412  				tablesKey:          "cat_cat cats_index",
   413  				bytesSentKey:       153824,
   414  				tmpTablesKey:       1,
   415  				tmpDiskTablesKey:   2,
   416  				tmpTableSizesKey:   3,
   417  				transactionIDKey:   "A569C193C7",
   418  				queryCacheHitKey:   true,
   419  				fullScanKey:        true,
   420  				fullJoinKey:        true,
   421  				tmpTableKey:        true,
   422  				tmpTableOnDiskKey:  true,
   423  				fileSortKey:        true,
   424  				fileSortOnDiskKey:  true,
   425  				mergePassesKey:     42,
   426  				ioROpsKey:          1,
   427  				ioRBytesKey:        2,
   428  				ioRWaitKey:         3.0,
   429  				recLockWaitKey:     4.0,
   430  				queueWaitKey:       5.0,
   431  				pagesDistinctKey:   6756,
   432  			},
   433  			timestamp: time.Unix(1473217822, 0),
   434  		},
   435  		{ /* 22 */
   436  			rawE: []string{
   437  				"# User@Host: weaverw[weaverw] @ [10.14.214.13]",
   438  				"# Thread_id: 78959 Schema: weave3 Last_errno: 0 Killed: 0",
   439  				"# Query_time: 10.749944 Lock_time: 0.017599 Rows_sent: 0 Rows_examined: 0 Rows_affected: 10 Rows_read: 12",
   440  				"# Bytes_sent: 51 Tmp_tables: 0 Tmp_disk_tables: 0 Tmp_table_sizes: 0",
   441  				"# InnoDB_trx_id: 98CF",
   442  				"use weave3;",
   443  				"SET timestamp=1364506803;",
   444  				"SELECT COUNT(*) FROM foo;",
   445  			},
   446  			sq: map[string]interface{}{
   447  				userKey:            "weaverw",
   448  				clientKey:          "[10.14.214.13]",
   449  				queryTimeKey:       10.749944,
   450  				lockTimeKey:        0.017599,
   451  				rowsSentKey:        0,
   452  				rowsExaminedKey:    0,
   453  				rowsAffectedKey:    10,
   454  				databaseKey:        "weave3",
   455  				queryKey:           "SELECT COUNT(*) FROM foo",
   456  				normalizedQueryKey: "select count(*) from foo",
   457  				statementKey:       "select",
   458  				tablesKey:          "foo",
   459  				bytesSentKey:       51,
   460  				tmpTablesKey:       0,
   461  				tmpDiskTablesKey:   0,
   462  				tmpTableSizesKey:   0,
   463  				transactionIDKey:   "98CF",
   464  			},
   465  			timestamp: time.Unix(1364506803, 0),
   466  		},
   467  		{ /* 23 */
   468  			rawE: []string{
   469  				"# User@Host: rdsadmin[rdsadmin] @ localhost [127.0.0.1]  Id:     1",
   470  				"# Query_time: 0.000439  Lock_time: 0.000000 Rows_sent: 1  Rows_examined: 0",
   471  				"SET timestamp=1476901800;",
   472  				"select @@session.tx_read_only;",
   473  				"/rdsdbbin/mysql/bin/mysqld, Version: 5.6.22-log (MySQL Community Server (GPL)). started with:",
   474  				"Tcp port: 3306  Unix socket: /tmp/mysql.sock",
   475  				"Time                 Id Command    Argument",
   476  			},
   477  			sq: map[string]interface{}{
   478  				userKey:            "rdsadmin",
   479  				clientKey:          "localhost [127.0.0.1]",
   480  				queryTimeKey:       0.000439,
   481  				lockTimeKey:        0.0,
   482  				rowsSentKey:        1,
   483  				rowsExaminedKey:    0,
   484  				queryKey:           "select @@session.tx_read_only",
   485  				normalizedQueryKey: "select @@session.tx_read_only",
   486  				statementKey:       "select",
   487  				tablesKey:          "@@session",
   488  			},
   489  			timestamp: time.Unix(1476901800, 0),
   490  		},
   491  		{ /* 24 */
   492  			rawE: []string{
   493  				"# User@Host: rdsadmin[rdsadmin] @ localhost [127.0.0.1]  Id:     1",
   494  				"# Query_time: 0.000439  Lock_time: 0.000000 Rows_sent: 1  Rows_examined: 0",
   495  				"SET timestamp=1476901800;",
   496  				"SELECT * FROM users WHERE id='#';",
   497  			},
   498  			sq: map[string]interface{}{
   499  				userKey:            "rdsadmin",
   500  				clientKey:          "localhost [127.0.0.1]",
   501  				queryTimeKey:       0.000439,
   502  				lockTimeKey:        0.0,
   503  				rowsSentKey:        1,
   504  				rowsExaminedKey:    0,
   505  				queryKey:           "SELECT * FROM users WHERE id='#'",
   506  				normalizedQueryKey: "select * from users where id = ?",
   507  				statementKey:       "select",
   508  				tablesKey:          "users",
   509  			},
   510  			timestamp: time.Unix(1476901800, 0),
   511  		},
   512  		{ /* 25 */
   513  			rawE: []string{
   514  				"# User@Host: rdsadmin[rdsadmin] @ localhost [127.0.0.1]  Id:     1",
   515  				"# Query_time: 0.000439  Lock_time: 0.000000 Rows_sent: 1  Rows_examined: 0",
   516  				"SET timestamp=1476901800;",
   517  				"SELECT 1 /* this is a comment with a # in it */;",
   518  			},
   519  			sq: map[string]interface{}{
   520  				userKey:            "rdsadmin",
   521  				clientKey:          "localhost [127.0.0.1]",
   522  				queryTimeKey:       0.000439,
   523  				lockTimeKey:        0.0,
   524  				rowsSentKey:        1,
   525  				rowsExaminedKey:    0,
   526  				queryKey:           "SELECT 1 /* this is a comment with a # in it */",
   527  				normalizedQueryKey: "select ?",
   528  				statementKey:       "select",
   529  			},
   530  			timestamp: time.Unix(1476901800, 0),
   531  		},
   532  		{ /* 26 */
   533  			// timestamp is old style
   534  			rawE: []string{
   535  				"# Time: 040116 00:31:09",
   536  				"# Query_time: 0.008393",
   537  			},
   538  			sq: map[string]interface{}{
   539  				queryTimeKey: 0.008393,
   540  			},
   541  			timestamp: t3,
   542  		},
   543  		{ /* 27 */
   544  			// timestamp is old style and has millisec precision - comes from percona
   545  			rawE: []string{
   546  				"# Time: 040116 00:31:09.817887",
   547  				"# Query_time: 0.008393",
   548  			},
   549  			sq: map[string]interface{}{
   550  				queryTimeKey: 0.008393,
   551  			},
   552  			timestamp: t1,
   553  		},
   554  		{ /* 28 */
   555  			// query spans multiple lines, initial line blank
   556  			rawE: []string{
   557  				"# Time: not-a-parsable-time-stampZ",
   558  				"SET timestamp=1459470669;",
   559  				"",
   560  				"SELECT *",
   561  				"FROM orders WHERE",
   562  				"total > 1000;",
   563  			},
   564  			sq: map[string]interface{}{
   565  				queryKey:           "SELECT * FROM orders WHERE total > 1000",
   566  				normalizedQueryKey: "select * from orders where total > ?",
   567  				tablesKey:          "orders",
   568  				statementKey:       "select",
   569  			},
   570  			timestamp: t1.Truncate(time.Second),
   571  		},
   572  		{ /* 29 */
   573  			// query spans multiple lines, initial line blank, tabs and spaces
   574  			rawE: []string{
   575  				"# Time: not-a-parsable-time-stampZ",
   576  				"SET timestamp=1459470669;",
   577  				"",
   578  				"    SELECT *	",
   579  				"    	FROM orders WHERE   ",
   580  				"	total > 1000;",
   581  				"",
   582  			},
   583  			sq: map[string]interface{}{
   584  				queryKey: "SELECT *	     	FROM orders WHERE    	total > 1000",
   585  				normalizedQueryKey: "select * from orders where total > ?",
   586  				tablesKey:          "orders",
   587  				statementKey:       "select",
   588  			},
   589  			timestamp: t1.Truncate(time.Second),
   590  		},
   591  	}
   592  }
   593  
   594  func TestHandleEvent(t *testing.T) {
   595  	p := &Parser{}
   596  	ptp := &perThreadParser{
   597  		normalizer: &normalizer.Parser{},
   598  	}
   599  	for i, sqd := range sqds {
   600  		res, timestamp := p.handleEvent(ptp, sqd.rawE)
   601  		if len(res) != len(sqd.sq) {
   602  			t.Errorf("case num %d: expected to parse %d fields, got %d", i, len(sqd.sq), len(res))
   603  			fmt.Printf("res is %+v\n", res)
   604  		}
   605  		for k, v := range sqd.sq {
   606  			if !reflect.DeepEqual(res[k], v) {
   607  				t.Errorf("case num %d, key %s:\n\texpected:\t%q\n\tgot:\t\t%q", i, k, v, res[k])
   608  			}
   609  		}
   610  		if timestamp.UnixNano() != sqd.timestamp.UnixNano() {
   611  			t.Errorf("case num %d: time parsed incorrectly:\n\tExpected: %+v, Actual: %+v",
   612  				i, sqd.timestamp, timestamp)
   613  		}
   614  	}
   615  }
   616  
   617  func TestTimeProcessing(t *testing.T) {
   618  	p := &Parser{}
   619  	ptp := &perThreadParser{
   620  		normalizer: &normalizer.Parser{},
   621  	}
   622  	tsts := []struct {
   623  		lines    []string
   624  		expected time.Time
   625  	}{
   626  		{[]string{
   627  			"# Time: 2016-09-15T10:16:17.898325Z", hostLine, timerLine, useLine,
   628  			"SET timestamp=1473934577;",
   629  		}, time.Date(2016, time.September, 15, 10, 16, 17, 898325000, time.UTC)},
   630  		{[]string{
   631  			"# Time: not-a-parsable-time-stampZ", hostLine, timerLine, useLine,
   632  			"SET timestamp=1459470669;",
   633  		}, time.Date(2016, time.April, 1, 0, 31, 9, 0, time.UTC)},
   634  		{[]string{
   635  			"# Time: 2016-09-16T19:37:39.006083Z", hostLine, timerLine, useLine,
   636  		}, time.Date(2016, time.September, 16, 19, 37, 39, 6083000, time.UTC)},
   637  		{[]string{hostLine, timerLine, useLine}, httime.Now()},
   638  	}
   639  
   640  	for _, tt := range tsts {
   641  		_, timestamp := p.handleEvent(ptp, tt.lines)
   642  		if timestamp.Unix() != tt.expected.Unix() {
   643  			t.Errorf("Didn't capture unix ts from lines:\n%+v\n\tExpected: %d, Actual: %d",
   644  				strings.Join(tt.lines, "\n"), tt.expected.Unix(), timestamp.Unix())
   645  		}
   646  		if timestamp.Nanosecond() != tt.expected.Nanosecond() {
   647  			t.Errorf("Didn't capture time with MS resolution from lines:\n%+v\n\tExpected: %d, Actual: %d",
   648  				strings.Join(tt.lines, "\n"), tt.expected.Nanosecond(), timestamp.Nanosecond())
   649  		}
   650  	}
   651  }
   652  
   653  // test that ProcessLines correctly splits the mysql slow query log stream into
   654  // individual events. It should read in alternating sets of commented then
   655  // uncommented lines and split them at the first comment after an uncommented
   656  // line.
   657  func TestProcessLines(t *testing.T) {
   658  	ts1, _ := time.Parse(time.RFC3339Nano, "2016-04-01T00:31:09.817887Z")
   659  
   660  	tsts := []struct {
   661  		in       []string
   662  		expected []event.Event
   663  	}{
   664  		{
   665  			[]string{
   666  				"# administrator command: Prepare;",
   667  				"# Time: 2016-04-01T00:31:09.817887Z",
   668  				"# User@Host: someuser @ hostfoo [192.168.2.1]  Id:   666",
   669  				"# Query_time: 0.000073  Lock_time: 0.000000 Rows_sent: 0  Rows_examined: 0",
   670  				"SELECT * FROM orders WHERE total > 1000;",
   671  				"# administrator command: Prepare;",
   672  				"# Time: 2016-04-01T00:31:09.817887Z",
   673  				"# User@Host: otheruser @ hostbar [192.168.2.1]  Id:   666",
   674  				"# Query_time: 0.00457  Lock_time: 0.1 Rows_sent: 5  Rows_examined: 35",
   675  				"SELECT * FROM",
   676  				"customers;",
   677  			},
   678  			[]event.Event{
   679  				{
   680  					Timestamp: ts1,
   681  					Data: map[string]interface{}{
   682  						"client":           "hostfoo [192.168.2.1]",
   683  						"user":             "someuser",
   684  						"query_time":       0.000073,
   685  						"lock_time":        0.0,
   686  						"rows_sent":        0,
   687  						"rows_examined":    0,
   688  						"query":            "SELECT * FROM orders WHERE total > 1000",
   689  						"normalized_query": "select * from orders where total > ?",
   690  						"tables":           "orders",
   691  						"statement":        "select",
   692  					},
   693  				},
   694  				{
   695  					Timestamp: ts1,
   696  					Data: map[string]interface{}{
   697  						"client":           "hostbar [192.168.2.1]",
   698  						"user":             "otheruser",
   699  						"query_time":       0.00457,
   700  						"lock_time":        0.1,
   701  						"rows_sent":        5,
   702  						"rows_examined":    35,
   703  						"query":            "SELECT * FROM customers",
   704  						"normalized_query": "select * from customers",
   705  						"tables":           "customers",
   706  						"statement":        "select",
   707  					},
   708  				},
   709  			},
   710  		},
   711  		{ // missing a # Time: line on the second event
   712  			[]string{
   713  				"# Time: 151008  0:31:04",
   714  				"# User@Host: rails[rails] @  [10.252.9.33]",
   715  				"# Query_time: 0.030974  Lock_time: 0.000019 Rows_sent: 0  Rows_examined: 30259",
   716  				"SET timestamp=1444264264;",
   717  				"SELECT `metadata`.* FROM `metadata` WHERE (`metadata`.app_id = 993089);",
   718  				"# User@Host: rails[rails] @  [10.252.9.33]",
   719  				"# Query_time: 0.002280  Lock_time: 0.000023 Rows_sent: 0  Rows_examined: 921",
   720  				"SET timestamp=1444264264;",
   721  				"SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1;",
   722  			},
   723  			[]event.Event{
   724  				{
   725  					Timestamp: time.Unix(1444264264, 0),
   726  					Data: map[string]interface{}{
   727  						"client":           "[10.252.9.33]",
   728  						"user":             "rails",
   729  						"query_time":       0.030974,
   730  						"lock_time":        0.000019,
   731  						"rows_sent":        0,
   732  						"rows_examined":    30259,
   733  						"query":            "SELECT `metadata`.* FROM `metadata` WHERE (`metadata`.app_id = 993089)",
   734  						"normalized_query": "select `metadata`.* from `metadata` where (`metadata`.app_id = ?)",
   735  						"tables":           "metadata",
   736  						"statement":        "select",
   737  					},
   738  				},
   739  				{
   740  					Timestamp: time.Unix(1444264264, 0), // should pick up the SET timestamp=... cmd
   741  					Data: map[string]interface{}{
   742  						"client":           "[10.252.9.33]",
   743  						"user":             "rails",
   744  						"query_time":       0.002280,
   745  						"lock_time":        0.000023,
   746  						"rows_sent":        0,
   747  						"rows_examined":    921,
   748  						"query":            "SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1",
   749  						"normalized_query": "select `certs`.* from `certs` where (`certs`.app_id = ?) limit ?",
   750  						"tables":           "certs",
   751  						"statement":        "select",
   752  					},
   753  				},
   754  			},
   755  		},
   756  		{ // statement blocks with no query should be skipped
   757  			[]string{
   758  				"# Time: 151008  0:31:04",
   759  				"# User@Host: rails[rails] @  [10.252.9.33]",
   760  				"# Query_time: 0.030974  Lock_time: 0.000019 Rows_sent: 0  Rows_examined: 30259",
   761  				"SET timestamp=1444264264;",
   762  				"# User@Host: rails[rails] @  [10.252.9.33]",
   763  				"# Query_time: 0.002280  Lock_time: 0.000023 Rows_sent: 0  Rows_examined: 921",
   764  				"SET timestamp=1444264264;",
   765  				"SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1;",
   766  				"# User@Host: rails[rails] @  [10.252.9.33]",
   767  				"# Query_time: 0.002280  Lock_time: 0.000023 Rows_sent: 0  Rows_examined: 921",
   768  				"SET timestamp=1444264264;",
   769  				"# User@Host: rails[rails] @  [10.252.9.33]",
   770  				"# Query_time: 0.002280  Lock_time: 0.000023 Rows_sent: 0  Rows_examined: 921",
   771  				"SET timestamp=1444264264;",
   772  				"SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1;",
   773  			},
   774  			[]event.Event{
   775  				{
   776  					Timestamp: time.Unix(1444264264, 0), // should pick up the SET timestamp=... cmd
   777  					Data: map[string]interface{}{
   778  						"client":           "[10.252.9.33]",
   779  						"user":             "rails",
   780  						"query_time":       0.002280,
   781  						"lock_time":        0.000023,
   782  						"rows_sent":        0,
   783  						"rows_examined":    921,
   784  						"query":            "SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1",
   785  						"normalized_query": "select `certs`.* from `certs` where (`certs`.app_id = ?) limit ?",
   786  						"tables":           "certs",
   787  						"statement":        "select",
   788  					},
   789  				},
   790  				{
   791  					Timestamp: time.Unix(1444264264, 0), // should pick up the SET timestamp=... cmd
   792  					Data: map[string]interface{}{
   793  						"client":           "[10.252.9.33]",
   794  						"user":             "rails",
   795  						"query_time":       0.002280,
   796  						"lock_time":        0.000023,
   797  						"rows_sent":        0,
   798  						"rows_examined":    921,
   799  						"query":            "SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1",
   800  						"normalized_query": "select `certs`.* from `certs` where (`certs`.app_id = ?) limit ?",
   801  						"tables":           "certs",
   802  						"statement":        "select",
   803  					},
   804  				},
   805  			},
   806  		},
   807  		{ // fewer queries than expected - only one query is here but two are
   808  			// expected. put empty event there to match
   809  			[]string{
   810  				"# Time: 151008  0:31:04",
   811  				"# User@Host: rails[rails] @  [10.252.9.33]",
   812  				"# Query_time: 0.030974  Lock_time: 0.000019 Rows_sent: 0  Rows_examined: 30259",
   813  				"SET timestamp=1444264264;",
   814  				"# User@Host: rails[rails] @  [10.252.9.33]",
   815  				"# Query_time: 0.002280  Lock_time: 0.000023 Rows_sent: 0  Rows_examined: 921",
   816  				"SET timestamp=1444264264;",
   817  				"SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1;",
   818  				"# User@Host: rails[rails] @  [10.252.9.33]",
   819  				"# Query_time: 0.002280  Lock_time: 0.000023 Rows_sent: 0  Rows_examined: 921",
   820  				"SET timestamp=1444264264;",
   821  			},
   822  			[]event.Event{
   823  				{
   824  					Timestamp: time.Unix(1444264264, 0), // should pick up the SET timestamp=... cmd
   825  					Data: map[string]interface{}{
   826  						"client":           "[10.252.9.33]",
   827  						"user":             "rails",
   828  						"query_time":       0.002280,
   829  						"lock_time":        0.000023,
   830  						"rows_sent":        0,
   831  						"rows_examined":    921,
   832  						"query":            "SELECT `certs`.* FROM `certs` WHERE (`certs`.app_id = 993089) LIMIT 1",
   833  						"normalized_query": "select `certs`.* from `certs` where (`certs`.app_id = ?) limit ?",
   834  						"tables":           "certs",
   835  						"statement":        "select",
   836  					},
   837  				},
   838  				{}, // to match already closed channel
   839  			},
   840  		},
   841  	}
   842  
   843  	for _, tt := range tsts {
   844  		p := &Parser{
   845  			conf: Options{
   846  				NumParsers: 5,
   847  			},
   848  			// normalizer: &normalizer.Parser{},
   849  		}
   850  		lines := make(chan string, 10)
   851  		send := make(chan event.Event, 5)
   852  		go func() {
   853  			p.ProcessLines(lines, send, nil)
   854  			close(send)
   855  		}()
   856  		for _, line := range tt.in {
   857  			lines <- line
   858  		}
   859  		close(lines)
   860  
   861  		for range tt.expected {
   862  			ev := <-send
   863  			var found bool
   864  			// returned events may come out of order so just look for the event rather
   865  			// than expecting it to come in order
   866  			for _, exp := range tt.expected {
   867  				if ev.Timestamp.Equal(exp.Timestamp) && reflect.DeepEqual(ev.Data, exp.Data) {
   868  					found = true
   869  				}
   870  			}
   871  			if !found {
   872  				t.Errorf("ev\n%+v\nnot found in expected list:\n%+v\n", ev, tt.expected)
   873  			}
   874  		}
   875  		if len(send) > 0 {
   876  			t.Errorf("unexpected: %d additional events were extracted", len(send))
   877  		}
   878  	}
   879  	// test sampling
   880  	rand.Seed(3)
   881  	var numEvents int
   882  	for _, tt := range tsts {
   883  		p := &Parser{
   884  			conf: Options{
   885  				NumParsers: 5,
   886  			},
   887  			SampleRate: 3,
   888  			// normalizer: &normalizer.Parser{},
   889  		}
   890  		lines := make(chan string, 10)
   891  		send := make(chan event.Event, 5)
   892  		go func() {
   893  			p.ProcessLines(lines, send, nil)
   894  			close(send)
   895  		}()
   896  		for _, line := range tt.in {
   897  			lines <- line
   898  		}
   899  		close(lines)
   900  		for range send {
   901  			// just count the number of events we're getting on the downstream side of
   902  			// sampling, verify it's fewer than we sent in.
   903  			numEvents++
   904  		}
   905  	}
   906  	if numEvents != 5 {
   907  		t.Errorf("With sampling enabled, only expected 5 events, got %d", numEvents)
   908  	}
   909  }