github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/import.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/util/retry" 20 "github.com/cockroachdb/errors" 21 ) 22 23 func registerImportTPCC(r *testRegistry) { 24 runImportTPCC := func(ctx context.Context, t *test, c *cluster, warehouses int) { 25 c.Put(ctx, cockroach, "./cockroach") 26 c.Put(ctx, workload, "./workload") 27 t.Status("starting csv servers") 28 c.Start(ctx, t) 29 c.Run(ctx, c.All(), `./workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`) 30 31 t.Status("running workload") 32 m := newMonitor(ctx, c) 33 dul := NewDiskUsageLogger(c) 34 m.Go(dul.Runner) 35 hc := NewHealthChecker(c, c.All()) 36 m.Go(hc.Runner) 37 38 m.Go(func(ctx context.Context) error { 39 defer dul.Done() 40 defer hc.Done() 41 cmd := fmt.Sprintf( 42 `./workload fixtures import tpcc --warehouses=%d --csv-server='http://localhost:8081'`, 43 warehouses) 44 c.Run(ctx, c.Node(1), cmd) 45 return nil 46 }) 47 m.Wait() 48 } 49 50 const warehouses = 1000 51 for _, numNodes := range []int{4, 32} { 52 r.Add(testSpec{ 53 Name: fmt.Sprintf("import/tpcc/warehouses=%d/nodes=%d", warehouses, numNodes), 54 Owner: OwnerBulkIO, 55 Cluster: makeClusterSpec(numNodes), 56 Timeout: 5 * time.Hour, 57 Run: func(ctx context.Context, t *test, c *cluster) { 58 runImportTPCC(ctx, t, c, warehouses) 59 }, 60 }) 61 } 62 const geoWarehouses = 4000 63 const geoZones = "europe-west2-b,europe-west4-b,asia-northeast1-b,us-west1-b" 64 r.Add(testSpec{ 65 Skip: "#37349 - OOMing", 66 Name: fmt.Sprintf("import/tpcc/warehouses=%d/geo", geoWarehouses), 67 Owner: OwnerBulkIO, 68 Cluster: makeClusterSpec(8, cpu(16), geo(), zones(geoZones)), 69 Timeout: 5 * time.Hour, 70 Run: func(ctx context.Context, t *test, c *cluster) { 71 runImportTPCC(ctx, t, c, geoWarehouses) 72 }, 73 }) 74 } 75 76 func registerImportTPCH(r *testRegistry) { 77 for _, item := range []struct { 78 nodes int 79 timeout time.Duration 80 }{ 81 // TODO(dt): this test seems to have become slower as of 19.2. It previously 82 // had 4, 8 and 32 node configurations with comments claiming they ran in in 83 // 4-5h for 4 node and 3h for 8 node. As of 19.2, it seems to be timing out 84 // -- potentially because 8 secondary indexes is worst-case for direct 85 // ingestion and seems to cause a lot of compaction, but further profiling 86 // is required to confirm this. Until then, the 4 and 32 node configurations 87 // are removed (4 is too slow and 32 is pretty expensive) while 8-node is 88 // given a 50% longer timeout (which running by hand suggests should be OK). 89 // (10/30/19) The timeout was increased again to 8 hours. 90 {8, 8 * time.Hour}, 91 } { 92 item := item 93 r.Add(testSpec{ 94 Name: fmt.Sprintf(`import/tpch/nodes=%d`, item.nodes), 95 Owner: OwnerBulkIO, 96 Cluster: makeClusterSpec(item.nodes), 97 Timeout: item.timeout, 98 Run: func(ctx context.Context, t *test, c *cluster) { 99 c.Put(ctx, cockroach, "./cockroach") 100 c.Start(ctx, t) 101 conn := c.Conn(ctx, 1) 102 if _, err := conn.Exec(` 103 CREATE DATABASE csv; 104 SET CLUSTER SETTING jobs.registry.leniency = '5m'; 105 `); err != nil { 106 t.Fatal(err) 107 } 108 if _, err := conn.Exec( 109 `SET CLUSTER SETTING kv.bulk_ingest.max_index_buffer_size = '2gb'`, 110 ); err != nil && !strings.Contains(err.Error(), "unknown cluster setting") { 111 t.Fatal(err) 112 } 113 // Wait for all nodes to be ready. 114 if err := retry.ForDuration(time.Second*30, func() error { 115 var nodes int 116 if err := conn. 117 QueryRowContext(ctx, `select count(*) from crdb_internal.gossip_liveness where updated_at > now() - interval '8s'`). 118 Scan(&nodes); err != nil { 119 t.Fatal(err) 120 } else if nodes != item.nodes { 121 return errors.Errorf("expected %d nodes, got %d", item.nodes, nodes) 122 } 123 return nil 124 }); err != nil { 125 t.Fatal(err) 126 } 127 m := newMonitor(ctx, c) 128 dul := NewDiskUsageLogger(c) 129 m.Go(dul.Runner) 130 hc := NewHealthChecker(c, c.All()) 131 m.Go(hc.Runner) 132 133 // TODO(peter): This currently causes the test to fail because we see a 134 // flurry of valid merges when the import finishes. 135 // 136 // m.Go(func(ctx context.Context) error { 137 // // Make sure the merge queue doesn't muck with our import. 138 // return verifyMetrics(ctx, c, map[string]float64{ 139 // "cr.store.queue.merge.process.success": 10, 140 // "cr.store.queue.merge.process.failure": 10, 141 // }) 142 // }) 143 144 m.Go(func(ctx context.Context) error { 145 defer dul.Done() 146 defer hc.Done() 147 t.WorkerStatus(`running import`) 148 defer t.WorkerStatus() 149 _, err := conn.Exec(` 150 IMPORT TABLE csv.lineitem 151 CREATE USING 'gs://cockroach-fixtures/tpch-csv/schema/lineitem.sql' 152 CSV DATA ( 153 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.1', 154 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.2', 155 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.3', 156 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.4', 157 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.5', 158 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.6', 159 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.7', 160 'gs://cockroach-fixtures/tpch-csv/sf-100/lineitem.tbl.8' 161 ) WITH delimiter='|' 162 `) 163 return errors.Wrap(err, "import failed") 164 }) 165 166 t.Status("waiting") 167 m.Wait() 168 }, 169 }) 170 } 171 }