github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/drop.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
    20  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    21  	_ "github.com/lib/pq"
    22  )
    23  
    24  func registerDrop(r *testRegistry) {
    25  	// TODO(tschottdorf): rearrange all tests so that their synopses are available
    26  	// via godoc and (some variation on) `roachtest run <testname> --help`.
    27  
    28  	// This test imports a TPCC dataset and then issues a manual deletion followed
    29  	// by a truncation for the `stock` table (which contains warehouses*100k
    30  	// rows). Next, it issues a `DROP` for the whole database, and sets the GC TTL
    31  	// to one second.
    32  	runDrop := func(ctx context.Context, t *test, c *cluster, warehouses, nodes int, initDiskSpace int) {
    33  		c.Put(ctx, cockroach, "./cockroach", c.Range(1, nodes))
    34  		c.Put(ctx, workload, "./workload", c.Range(1, nodes))
    35  		c.Start(ctx, t, c.Range(1, nodes), startArgs("-e", "COCKROACH_MEMPROF_INTERVAL=15s"))
    36  
    37  		m := newMonitor(ctx, c, c.Range(1, nodes))
    38  		m.Go(func(ctx context.Context) error {
    39  			t.WorkerStatus("importing TPCC fixture")
    40  			c.Run(ctx, c.Node(1), fmt.Sprintf(
    41  				"./workload fixtures load tpcc --warehouses=%d --db tpcc {pgurl:1}", warehouses))
    42  
    43  			// Don't open the DB connection until after the data has been imported.
    44  			// Otherwise the ALTER TABLE query below might fail to find the
    45  			// tpcc.order_line table that we just imported (!) due to what seems to
    46  			// be a problem with table descriptor leases (#24374).
    47  			db := c.Conn(ctx, 1)
    48  			defer db.Close()
    49  
    50  			run := func(maybeExperimental bool, stmtStr string, args ...interface{}) {
    51  				stmt := stmtStr
    52  				// We are removing the EXPERIMENTAL keyword in 2.1. For compatibility
    53  				// with 2.0 clusters we still need to try with it if the
    54  				// syntax without EXPERIMENTAL fails.
    55  				// TODO(knz): Remove this in 2.2.
    56  				if maybeExperimental {
    57  					stmt = fmt.Sprintf(stmtStr, "", "=")
    58  				}
    59  				t.WorkerStatus(stmt)
    60  				_, err := db.ExecContext(ctx, stmt, args...)
    61  				if err != nil && maybeExperimental && strings.Contains(err.Error(), "syntax error") {
    62  					stmt = fmt.Sprintf(stmtStr, "EXPERIMENTAL", "")
    63  					t.WorkerStatus(stmt)
    64  					_, err = db.ExecContext(ctx, stmt, args...)
    65  				}
    66  				if err != nil {
    67  					t.Fatal(err)
    68  				}
    69  			}
    70  
    71  			run(false, `SET CLUSTER SETTING trace.debug.enable = true`)
    72  
    73  			// Drop a constraint that would get in the way of deleting from tpcc.stock.
    74  			const stmtDropConstraint = "ALTER TABLE tpcc.order_line DROP CONSTRAINT fk_ol_supply_w_id_ref_stock"
    75  			run(false, stmtDropConstraint)
    76  
    77  			var rows, minWarehouse, maxWarehouse int
    78  			if err := db.QueryRow("select count(*), min(s_w_id), max(s_w_id) from tpcc.stock").Scan(&rows,
    79  				&minWarehouse, &maxWarehouse); err != nil {
    80  				t.Fatalf("failed to get range count: %v", err)
    81  			}
    82  
    83  			for j := 1; j <= nodes; j++ {
    84  				size, err := getDiskUsageInBytes(ctx, c, t.l, j)
    85  				if err != nil {
    86  					return err
    87  				}
    88  
    89  				t.l.Printf("Node %d space used: %s\n", j, humanizeutil.IBytes(int64(size)))
    90  
    91  				// Return if the size of the directory is less than 100mb
    92  				if size < initDiskSpace {
    93  					t.Fatalf("Node %d space used: %s less than %s", j, humanizeutil.IBytes(int64(size)),
    94  						humanizeutil.IBytes(int64(initDiskSpace)))
    95  				}
    96  			}
    97  
    98  			for i := minWarehouse; i <= maxWarehouse; i++ {
    99  				t.Progress(float64(i) / float64(maxWarehouse))
   100  				tBegin := timeutil.Now()
   101  				run(false, "DELETE FROM tpcc.stock WHERE s_w_id = $1", i)
   102  				elapsed := timeutil.Since(tBegin)
   103  				// TODO(tschottdorf): check what's reasonable here and make sure we don't drop below it.
   104  				c.l.Printf("deleted from tpcc.stock for warehouse %d (100k rows) in %s (%.2f rows/sec)\n", i, elapsed, 100000.0/elapsed.Seconds())
   105  			}
   106  
   107  			const stmtTruncate = "TRUNCATE TABLE tpcc.stock"
   108  			run(false, stmtTruncate)
   109  
   110  			const stmtDrop = "DROP DATABASE tpcc"
   111  			run(false, stmtDrop)
   112  			// The data has already been deleted, but changing the default zone config
   113  			// should take effect retroactively.
   114  			run(true, "ALTER RANGE default %[1]s CONFIGURE ZONE %[2]s '\ngc:\n  ttlseconds: 1\n'")
   115  
   116  			var allNodesSpaceCleared bool
   117  			var sizeReport string
   118  			maxSizeBytes := 100 * 1024 * 1024
   119  			if true {
   120  				// TODO(tschottdorf): This test should pass without this large fudge factor. This requires manual reproduction
   121  				// and an investigation of the compactor logs as well as the data directory.
   122  				maxSizeBytes *= 100
   123  			}
   124  			// We're waiting a maximum of 10 minutes to makes sure that the drop operations clear the disk.
   125  			for i := 0; i < 10; i++ {
   126  				sizeReport = ""
   127  				allNodesSpaceCleared = true
   128  				for j := 1; j <= nodes; j++ {
   129  					size, err := getDiskUsageInBytes(ctx, c, t.l, j)
   130  					if err != nil {
   131  						return err
   132  					}
   133  
   134  					nodeSpaceUsed := fmt.Sprintf("Node %d space after deletion used: %s\n", j, humanizeutil.IBytes(int64(size)))
   135  					t.l.Printf(nodeSpaceUsed)
   136  
   137  					// Return if the size of the directory is less than 100mb
   138  					if size > maxSizeBytes {
   139  						allNodesSpaceCleared = false
   140  						sizeReport += nodeSpaceUsed
   141  					}
   142  				}
   143  
   144  				if allNodesSpaceCleared {
   145  					break
   146  				}
   147  				time.Sleep(time.Minute)
   148  			}
   149  
   150  			if !allNodesSpaceCleared {
   151  				sizeReport += fmt.Sprintf("disk space usage has not dropped below %s on all nodes.",
   152  					humanizeutil.IBytes(int64(maxSizeBytes)))
   153  				t.Fatalf(sizeReport)
   154  			}
   155  
   156  			return nil
   157  		})
   158  		m.Wait()
   159  	}
   160  
   161  	warehouses := 100
   162  	numNodes := 9
   163  
   164  	// 1GB
   165  	initDiskSpace := int(1e9)
   166  
   167  	r.Add(testSpec{
   168  		Name:       fmt.Sprintf("drop/tpcc/w=%d,nodes=%d", warehouses, numNodes),
   169  		Owner:      OwnerKV,
   170  		MinVersion: `v2.1.0`,
   171  		Cluster:    makeClusterSpec(numNodes),
   172  		Run: func(ctx context.Context, t *test, c *cluster) {
   173  			// NB: this is likely not going to work out in `-local` mode. Edit the
   174  			// numbers during iteration.
   175  			if local {
   176  				numNodes = 4
   177  				warehouses = 1
   178  
   179  				// 100 MB
   180  				initDiskSpace = 1e8
   181  				fmt.Printf("running with w=%d,nodes=%d in local mode\n", warehouses, numNodes)
   182  			}
   183  			runDrop(ctx, t, c, warehouses, numNodes, initDiskSpace)
   184  		},
   185  	})
   186  }