github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/tests/inverted_index_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tests_test
    12  
    13  import (
    14  	"context"
    15  	"math/rand"
    16  	"testing"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/base"
    19  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    20  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    21  	"github.com/cockroachdb/cockroach/pkg/util/json"
    22  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    23  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    24  )
    25  
    26  const numRandomJSONs = 1000
    27  const numProbes = 10
    28  const docsToUpdate = 100
    29  const docsToDelete = 100
    30  const jsonComplexity = 25
    31  
    32  func TestInvertedIndex(t *testing.T) {
    33  	defer leaktest.AfterTest(t)()
    34  
    35  	tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{})
    36  	defer tc.Stopper().Stop(context.Background())
    37  
    38  	db := sqlutils.MakeSQLRunner(tc.Conns[0])
    39  
    40  	db.Exec(t, "CREATE DATABASE IF NOT EXISTS test")
    41  	db.Exec(t, "CREATE TABLE test.jsons (i INT PRIMARY KEY, j JSONB)")
    42  
    43  	r := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
    44  
    45  	// Grab a bunch of random JSONs. We insert half before we add the inverted
    46  	// index and half after.
    47  	jsons := make([]json.JSON, numRandomJSONs)
    48  	for i := 0; i < numRandomJSONs; i++ {
    49  		var err error
    50  		jsons[i], err = json.Random(jsonComplexity, r)
    51  		if err != nil {
    52  			t.Fatal(err)
    53  		}
    54  	}
    55  
    56  	for i := 0; i < numRandomJSONs/2; i++ {
    57  		db.Exec(t, `INSERT INTO test.jsons VALUES ($1, $2)`, i, jsons[i].String())
    58  	}
    59  	db.Exec(t, `CREATE INVERTED INDEX ON test.jsons (j)`)
    60  	for i := numRandomJSONs / 2; i < numRandomJSONs; i++ {
    61  		db.Exec(t, `INSERT INTO test.jsons VALUES ($1, $2)`, i, jsons[i].String())
    62  	}
    63  
    64  	t.Run("ensure we're using the inverted index", func(t *testing.T) {
    65  		// Just to make sure we're using the inverted index.
    66  		explain := db.Query(t, `SELECT count(*) FROM [EXPLAIN SELECT * FROM test.jsons WHERE j @> '{"a": 1}'] WHERE description = 'jsons@jsons_j_idx'`)
    67  		explain.Next()
    68  		var c int
    69  		if err := explain.Scan(&c); err != nil {
    70  			t.Fatal(err)
    71  		}
    72  		explain.Close()
    73  
    74  		if c != 1 {
    75  			t.Fatalf("Query not using inverted index as expected")
    76  		}
    77  	})
    78  
    79  	t.Run("probe database after inserts", func(t *testing.T) {
    80  		probeInvertedIndex(t, db, jsons)
    81  	})
    82  
    83  	// Now let's do some updates: we're going to pick a handful of the JSON
    84  	// documents we inserted and change them to something else.
    85  	perm := rand.Perm(len(jsons))
    86  	for i := 0; i < docsToUpdate; i++ {
    87  		var err error
    88  		jsons[perm[i]], err = json.Random(jsonComplexity, r)
    89  		if err != nil {
    90  			t.Fatal(err)
    91  		}
    92  
    93  		db.Exec(t, `UPDATE test.jsons SET j = $1 WHERE i = $2`, jsons[perm[i]].String(), perm[i])
    94  	}
    95  
    96  	t.Run("probe database after updates", func(t *testing.T) {
    97  		probeInvertedIndex(t, db, jsons)
    98  	})
    99  
   100  	// Now do some updates of the primary keys to prompt some deletions and
   101  	// re-insertions.  Slightly biased because we always add the number of keys
   102  	// just as a simple way to avoid a conflict.
   103  	perm = rand.Perm(len(jsons))
   104  	for i := 0; i < docsToUpdate; i++ {
   105  		db.Exec(t, `UPDATE test.jsons SET i = $1 WHERE i = $2`, perm[i], perm[i]+numRandomJSONs)
   106  	}
   107  
   108  	t.Run("probe database after pk updates", func(t *testing.T) {
   109  		probeInvertedIndex(t, db, jsons)
   110  	})
   111  
   112  	// Now do some deletions.
   113  	perm = rand.Perm(len(jsons))
   114  	for i := 0; i < docsToDelete; i++ {
   115  		db.Exec(t, `DELETE FROM test.jsons WHERE i = $1`, perm[i])
   116  		jsons[perm[i]] = nil
   117  	}
   118  
   119  	// Collect the new set of json values by iterating over the rest of perm.
   120  	newJSONS := make([]json.JSON, len(jsons)-docsToDelete)
   121  	for i := 0; i < len(jsons)-docsToDelete; i++ {
   122  		newJSONS[i] = jsons[perm[i+docsToDelete]]
   123  	}
   124  	jsons = newJSONS
   125  
   126  	t.Run("probe database after deletes", func(t *testing.T) {
   127  		probeInvertedIndex(t, db, jsons)
   128  	})
   129  }
   130  
   131  func probeInvertedIndex(t *testing.T, db *sqlutils.SQLRunner, jsons []json.JSON) {
   132  	perm := rand.Perm(len(jsons))
   133  
   134  	// Now probe it to make sure the data makes sense.
   135  	for i := 0; i < numProbes; i++ {
   136  		j := jsons[perm[i]]
   137  		paths, err := json.AllPaths(j)
   138  		if err != nil {
   139  			t.Fatal(err)
   140  		}
   141  
   142  		for _, p := range paths {
   143  			seenOriginal := true
   144  			numResults := 0
   145  			rows := db.Query(t, "SELECT j FROM test.jsons WHERE j @> $1", p.String())
   146  			for rows.Next() {
   147  				numResults++
   148  				var s string
   149  				if err := rows.Scan(&s); err != nil {
   150  					t.Fatal(err)
   151  				}
   152  				returnedJSON, err := json.ParseJSON(s)
   153  				if err != nil {
   154  					t.Fatal(err)
   155  				}
   156  
   157  				cmp, err := j.Compare(returnedJSON)
   158  				if err != nil {
   159  					t.Fatal(err)
   160  				}
   161  				if cmp == 0 {
   162  					seenOriginal = true
   163  				}
   164  
   165  				c, err := json.Contains(returnedJSON, p)
   166  				if err != nil {
   167  					t.Fatal(err)
   168  				}
   169  				if !c {
   170  					t.Fatalf(
   171  						"json %s was returned from inverted index query but does not contain %s",
   172  						returnedJSON,
   173  						p,
   174  					)
   175  				}
   176  			}
   177  
   178  			if !seenOriginal {
   179  				t.Fatalf("%s was not returned by querying path %s", j, p)
   180  			}
   181  
   182  			// Now let's verify the results ourselves...
   183  			countedResults := 0
   184  			for _, j := range jsons {
   185  				c, err := json.Contains(j, p)
   186  				if err != nil {
   187  					t.Fatal(err)
   188  				}
   189  				if c {
   190  					countedResults++
   191  				}
   192  			}
   193  
   194  			if countedResults != numResults {
   195  				t.Fatalf("query returned %d results but there were actually %d results", numResults, countedResults)
   196  			}
   197  
   198  			if err := rows.Close(); err != nil {
   199  				t.Fatal(err)
   200  			}
   201  		}
   202  	}
   203  }