github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/tests/inverted_index_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tests_test 12 13 import ( 14 "context" 15 "math/rand" 16 "testing" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 20 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 21 "github.com/cockroachdb/cockroach/pkg/util/json" 22 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 23 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 24 ) 25 26 const numRandomJSONs = 1000 27 const numProbes = 10 28 const docsToUpdate = 100 29 const docsToDelete = 100 30 const jsonComplexity = 25 31 32 func TestInvertedIndex(t *testing.T) { 33 defer leaktest.AfterTest(t)() 34 35 tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) 36 defer tc.Stopper().Stop(context.Background()) 37 38 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 39 40 db.Exec(t, "CREATE DATABASE IF NOT EXISTS test") 41 db.Exec(t, "CREATE TABLE test.jsons (i INT PRIMARY KEY, j JSONB)") 42 43 r := rand.New(rand.NewSource(timeutil.Now().UnixNano())) 44 45 // Grab a bunch of random JSONs. We insert half before we add the inverted 46 // index and half after. 47 jsons := make([]json.JSON, numRandomJSONs) 48 for i := 0; i < numRandomJSONs; i++ { 49 var err error 50 jsons[i], err = json.Random(jsonComplexity, r) 51 if err != nil { 52 t.Fatal(err) 53 } 54 } 55 56 for i := 0; i < numRandomJSONs/2; i++ { 57 db.Exec(t, `INSERT INTO test.jsons VALUES ($1, $2)`, i, jsons[i].String()) 58 } 59 db.Exec(t, `CREATE INVERTED INDEX ON test.jsons (j)`) 60 for i := numRandomJSONs / 2; i < numRandomJSONs; i++ { 61 db.Exec(t, `INSERT INTO test.jsons VALUES ($1, $2)`, i, jsons[i].String()) 62 } 63 64 t.Run("ensure we're using the inverted index", func(t *testing.T) { 65 // Just to make sure we're using the inverted index. 66 explain := db.Query(t, `SELECT count(*) FROM [EXPLAIN SELECT * FROM test.jsons WHERE j @> '{"a": 1}'] WHERE description = 'jsons@jsons_j_idx'`) 67 explain.Next() 68 var c int 69 if err := explain.Scan(&c); err != nil { 70 t.Fatal(err) 71 } 72 explain.Close() 73 74 if c != 1 { 75 t.Fatalf("Query not using inverted index as expected") 76 } 77 }) 78 79 t.Run("probe database after inserts", func(t *testing.T) { 80 probeInvertedIndex(t, db, jsons) 81 }) 82 83 // Now let's do some updates: we're going to pick a handful of the JSON 84 // documents we inserted and change them to something else. 85 perm := rand.Perm(len(jsons)) 86 for i := 0; i < docsToUpdate; i++ { 87 var err error 88 jsons[perm[i]], err = json.Random(jsonComplexity, r) 89 if err != nil { 90 t.Fatal(err) 91 } 92 93 db.Exec(t, `UPDATE test.jsons SET j = $1 WHERE i = $2`, jsons[perm[i]].String(), perm[i]) 94 } 95 96 t.Run("probe database after updates", func(t *testing.T) { 97 probeInvertedIndex(t, db, jsons) 98 }) 99 100 // Now do some updates of the primary keys to prompt some deletions and 101 // re-insertions. Slightly biased because we always add the number of keys 102 // just as a simple way to avoid a conflict. 103 perm = rand.Perm(len(jsons)) 104 for i := 0; i < docsToUpdate; i++ { 105 db.Exec(t, `UPDATE test.jsons SET i = $1 WHERE i = $2`, perm[i], perm[i]+numRandomJSONs) 106 } 107 108 t.Run("probe database after pk updates", func(t *testing.T) { 109 probeInvertedIndex(t, db, jsons) 110 }) 111 112 // Now do some deletions. 113 perm = rand.Perm(len(jsons)) 114 for i := 0; i < docsToDelete; i++ { 115 db.Exec(t, `DELETE FROM test.jsons WHERE i = $1`, perm[i]) 116 jsons[perm[i]] = nil 117 } 118 119 // Collect the new set of json values by iterating over the rest of perm. 120 newJSONS := make([]json.JSON, len(jsons)-docsToDelete) 121 for i := 0; i < len(jsons)-docsToDelete; i++ { 122 newJSONS[i] = jsons[perm[i+docsToDelete]] 123 } 124 jsons = newJSONS 125 126 t.Run("probe database after deletes", func(t *testing.T) { 127 probeInvertedIndex(t, db, jsons) 128 }) 129 } 130 131 func probeInvertedIndex(t *testing.T, db *sqlutils.SQLRunner, jsons []json.JSON) { 132 perm := rand.Perm(len(jsons)) 133 134 // Now probe it to make sure the data makes sense. 135 for i := 0; i < numProbes; i++ { 136 j := jsons[perm[i]] 137 paths, err := json.AllPaths(j) 138 if err != nil { 139 t.Fatal(err) 140 } 141 142 for _, p := range paths { 143 seenOriginal := true 144 numResults := 0 145 rows := db.Query(t, "SELECT j FROM test.jsons WHERE j @> $1", p.String()) 146 for rows.Next() { 147 numResults++ 148 var s string 149 if err := rows.Scan(&s); err != nil { 150 t.Fatal(err) 151 } 152 returnedJSON, err := json.ParseJSON(s) 153 if err != nil { 154 t.Fatal(err) 155 } 156 157 cmp, err := j.Compare(returnedJSON) 158 if err != nil { 159 t.Fatal(err) 160 } 161 if cmp == 0 { 162 seenOriginal = true 163 } 164 165 c, err := json.Contains(returnedJSON, p) 166 if err != nil { 167 t.Fatal(err) 168 } 169 if !c { 170 t.Fatalf( 171 "json %s was returned from inverted index query but does not contain %s", 172 returnedJSON, 173 p, 174 ) 175 } 176 } 177 178 if !seenOriginal { 179 t.Fatalf("%s was not returned by querying path %s", j, p) 180 } 181 182 // Now let's verify the results ourselves... 183 countedResults := 0 184 for _, j := range jsons { 185 c, err := json.Contains(j, p) 186 if err != nil { 187 t.Fatal(err) 188 } 189 if c { 190 countedResults++ 191 } 192 } 193 194 if countedResults != numResults { 195 t.Fatalf("query returned %d results but there were actually %d results", numResults, countedResults) 196 } 197 198 if err := rows.Close(); err != nil { 199 t.Fatal(err) 200 } 201 } 202 } 203 }