github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/backup.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 20 "github.com/cockroachdb/cockroach/pkg/util/version" 21 "github.com/cockroachdb/errors" 22 ) 23 24 func registerBackup(r *testRegistry) { 25 backup2TBSpec := makeClusterSpec(10) 26 r.Add(testSpec{ 27 Name: fmt.Sprintf("backup2TB/%s", backup2TBSpec), 28 Owner: OwnerBulkIO, 29 Cluster: backup2TBSpec, 30 MinVersion: "v2.1.0", 31 Run: func(ctx context.Context, t *test, c *cluster) { 32 rows := 65104166 33 dest := c.name 34 35 if local { 36 rows = 100 37 dest += fmt.Sprintf("%d", timeutil.Now().UnixNano()) 38 } 39 40 c.Put(ctx, workload, "./workload") 41 c.Put(ctx, cockroach, "./cockroach") 42 43 // NB: starting the cluster creates the logs dir as a side effect, 44 // needed below. 45 c.Start(ctx, t) 46 c.Run(ctx, c.All(), `./workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`) 47 time.Sleep(time.Second) // wait for csv server to open listener 48 49 c.Run(ctx, c.Node(1), "./workload", "fixtures", "import", "bank", 50 "--db=bank", "--payload-bytes=10240", "--ranges=0", "--csv-server", "http://localhost:8081", 51 fmt.Sprintf("--rows=%d", rows), "--seed=1", "{pgurl:1}") 52 53 m := newMonitor(ctx, c) 54 m.Go(func(ctx context.Context) error { 55 t.Status(`running backup`) 56 c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " 57 BACKUP bank.bank TO 'gs://cockroachdb-backup-testing/`+dest+`'"`) 58 return nil 59 }) 60 m.Wait() 61 }, 62 }) 63 64 // backupTPCC continuously runs TPCC, takes a full backup after some time, 65 // and incremental after more time. It then restores the two backups and 66 // verifies them with a fingerprint. 67 r.Add(testSpec{ 68 Name: `backupTPCC`, 69 Owner: OwnerBulkIO, 70 Cluster: makeClusterSpec(3), 71 Timeout: 1 * time.Hour, 72 Run: func(ctx context.Context, t *test, c *cluster) { 73 c.Put(ctx, cockroach, "./cockroach") 74 c.Put(ctx, workload, "./workload") 75 c.Start(ctx, t) 76 conn := c.Conn(ctx, 1) 77 78 duration := 5 * time.Minute 79 if local { 80 duration = 5 * time.Second 81 } 82 warehouses := 10 83 84 backupDir := "gs://cockroachdb-backup-testing/" + c.name 85 // Use inter-node file sharing on 20.1+. 86 if r.buildVersion.AtLeast(version.MustParse(`v20.1.0-0`)) { 87 backupDir = "nodelocal://1/" + c.name 88 } 89 fullDir := backupDir + "/full" 90 incDir := backupDir + "/inc" 91 92 t.Status(`workload initialization`) 93 cmd := fmt.Sprintf( 94 "./workload init tpcc --warehouses=%d {pgurl:1-%d}", 95 warehouses, c.spec.NodeCount, 96 ) 97 c.Run(ctx, c.Node(1), cmd) 98 99 m := newMonitor(ctx, c) 100 m.Go(func(ctx context.Context) error { 101 _, err := conn.ExecContext(ctx, ` 102 CREATE DATABASE restore_full; 103 CREATE DATABASE restore_inc; 104 `) 105 return err 106 }) 107 m.Wait() 108 109 t.Status(`run tpcc`) 110 ctx, cancel := context.WithCancel(ctx) 111 defer cancel() 112 113 cmdDone := make(chan error) 114 go func() { 115 cmd := fmt.Sprintf( 116 "./workload run tpcc --warehouses=%d {pgurl:1-%d}", 117 warehouses, c.spec.NodeCount, 118 ) 119 120 cmdDone <- c.RunE(ctx, c.Node(1), cmd) 121 }() 122 123 select { 124 case <-time.After(duration): 125 case <-ctx.Done(): 126 return 127 } 128 129 t.Status(`full backup`) 130 // Use a time slightly in the past to avoid "cannot specify timestamp in the future" errors. 131 tFull := fmt.Sprint(timeutil.Now().Add(time.Second * -2).UnixNano()) 132 m = newMonitor(ctx, c) 133 m.Go(func(ctx context.Context) error { 134 _, err := conn.ExecContext(ctx, 135 `BACKUP tpcc.* TO $1 AS OF SYSTEM TIME `+tFull, 136 fullDir, 137 ) 138 return err 139 }) 140 m.Wait() 141 142 t.Status(`continue tpcc`) 143 select { 144 case <-time.After(duration): 145 case <-ctx.Done(): 146 return 147 } 148 149 t.Status(`incremental backup`) 150 tInc := fmt.Sprint(timeutil.Now().Add(time.Second * -2).UnixNano()) 151 m = newMonitor(ctx, c) 152 m.Go(func(ctx context.Context) error { 153 _, err := conn.ExecContext(ctx, 154 `BACKUP tpcc.* TO $1 AS OF SYSTEM TIME `+tInc+` INCREMENTAL FROM $2`, 155 incDir, 156 fullDir, 157 ) 158 if err != nil { 159 return err 160 } 161 162 // Backups are done, make sure workload is still running. 163 select { 164 case err := <-cmdDone: 165 // Workload exited before it should have. 166 return err 167 default: 168 return nil 169 } 170 }) 171 m.Wait() 172 173 m = newMonitor(ctx, c) 174 m.Go(func(ctx context.Context) error { 175 t.Status(`restore full`) 176 if _, err := conn.ExecContext(ctx, 177 `RESTORE tpcc.* FROM $1 WITH into_db='restore_full'`, 178 fullDir, 179 ); err != nil { 180 return err 181 } 182 183 t.Status(`restore incremental`) 184 if _, err := conn.ExecContext(ctx, 185 `RESTORE tpcc.* FROM $1, $2 WITH into_db='restore_inc'`, 186 fullDir, 187 incDir, 188 ); err != nil { 189 return err 190 } 191 192 t.Status(`fingerprint`) 193 fingerprint := func(db string, asof string) (string, error) { 194 var b strings.Builder 195 196 var tables []string 197 rows, err := conn.QueryContext( 198 ctx, 199 fmt.Sprintf("SELECT table_name FROM [SHOW TABLES FROM %s] ORDER BY table_name", db), 200 ) 201 if err != nil { 202 return "", err 203 } 204 defer rows.Close() 205 for rows.Next() { 206 var name string 207 if err := rows.Scan(&name); err != nil { 208 return "", err 209 } 210 tables = append(tables, name) 211 } 212 213 for _, table := range tables { 214 fmt.Fprintf(&b, "table %s\n", table) 215 query := fmt.Sprintf("SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE %s.%s", db, table) 216 if asof != "" { 217 query = fmt.Sprintf("SELECT * FROM [%s] AS OF SYSTEM TIME %s", query, asof) 218 } 219 rows, err = conn.QueryContext(ctx, query) 220 if err != nil { 221 return "", err 222 } 223 defer rows.Close() 224 for rows.Next() { 225 var name, fp string 226 if err := rows.Scan(&name, &fp); err != nil { 227 return "", err 228 } 229 fmt.Fprintf(&b, "%s: %s\n", name, fp) 230 } 231 } 232 233 return b.String(), rows.Err() 234 } 235 236 tpccFull, err := fingerprint("tpcc", tFull) 237 if err != nil { 238 return err 239 } 240 tpccInc, err := fingerprint("tpcc", tInc) 241 if err != nil { 242 return err 243 } 244 restoreFull, err := fingerprint("restore_full", "") 245 if err != nil { 246 return err 247 } 248 restoreInc, err := fingerprint("restore_inc", "") 249 if err != nil { 250 return err 251 } 252 253 if tpccFull != restoreFull { 254 return errors.Errorf("got %s, expected %s", restoreFull, tpccFull) 255 } 256 if tpccInc != restoreInc { 257 return errors.Errorf("got %s, expected %s", restoreInc, tpccInc) 258 } 259 260 return nil 261 }) 262 m.Wait() 263 }, 264 }) 265 }