github.com/matrixorigin/matrixone@v1.2.0/test/distributed/cases/array/array_index_knn.sql (about) 1 -- pre 2 drop database if exists vecdb3; 3 create database vecdb3; 4 use vecdb3; 5 SET GLOBAL experimental_ivf_index = 1; 6 7 8 -- 0.a Verify Experimental Flag 9 drop table if exists t6; 10 create table t6(a int primary key,b vecf32(4), c varchar(3) ); 11 insert into t6 values(1, "[1,0,0,0]" , "1"); 12 insert into t6 values(2, "[2,0,0,0]", "2"); 13 insert into t6 values(3, "[3,0,0,0]", "3"); 14 insert into t6 values(4, "[1,1,0,0]", "4"); 15 insert into t6 values(5, "[2,2,0,0]", "5"); 16 insert into t6 values(6, "[3,3,0,0]", "6"); 17 SET GLOBAL experimental_ivf_index = 0; 18 create index idx6 using ivfflat on t6(b) lists=2 op_type "vector_l2_ops"; 19 SET GLOBAL experimental_ivf_index = 1; 20 create index idx6 using ivfflat on t6(b) lists=2 op_type "vector_l2_ops"; 21 select a, b from t6 order by l2_distance(b, "[1,0,0,0]") limit 4; 22 23 -- 1.a KNN with pk and embedding alone 24 drop table if exists t1; 25 create table t1(a int primary key,b vecf32(4), c varchar(3) ); 26 insert into t1 values(1, "[1,0,0,0]" , "1"); 27 insert into t1 values(2, "[2,0,0,0]", "2"); 28 insert into t1 values(3, "[3,0,0,0]", "3"); 29 insert into t1 values(4, "[1,1,0,0]", "4"); 30 insert into t1 values(5, "[2,2,0,0]", "5"); 31 insert into t1 values(6, "[3,3,0,0]", "6"); 32 insert into t1 values(7, "[1,1,1,0]", "7"); 33 insert into t1 values(8, "[2,2,2,0]", "8"); 34 insert into t1 values(9, "[3,3,3,0]", "9"); 35 select a, b from t1 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3; 36 37 create index idx1 using ivfflat on t1(b) lists=3 op_type "vector_l2_ops"; 38 select a, b from t1 order by l2_distance(b, "[1,1,1,0]") limit 4; 39 40 insert into t1 values(10, "[4,4,4,0]", "10"); 41 select a, b from t1 order by l2_distance(b, "[1,1,1,0]") limit 4; 42 43 -- 1.b KNN with pk and embedding and non-indexed columns 44 select a,b,c from t1 order by l2_distance(b, "[1,1,1,0]") limit 4; 45 46 47 insert into t1 values(11, "[1,1,1,1]", "11"); 48 insert into t1 values(12, "[2,2,2,2]", "12"); 49 insert into t1 values(13, "[3,3,3,3]", "13"); 50 51 alter table t1 alter reindex idx1 ivfflat lists=4; 52 select a, b from t1 order by l2_distance(b, "[1,0,0,0]") limit 4; 53 select a, b from t1 order by l2_distance(b, "[1,1,0,0]") limit 4; 54 select a, b from t1 order by l2_distance(b, "[1,1,1,0]") limit 4; 55 select a, b from t1 order by l2_distance(b, "[1,1,1,1]") limit 4; 56 57 58 -- 2.a KNN with embedding alone (composite PK) 59 drop table if exists t3; 60 create table t2(a int,b vecf32(4), c varchar(3), d int, primary key(a,d)); 61 insert into t2 values(1, "[1,0,0,0]" , "1", 1); 62 insert into t2 values(2, "[2,0,0,0]", "2", 2); 63 insert into t2 values(3, "[3,0,0,0]", "3", 3); 64 insert into t2 values(4, "[1,1,0,0]", "4", 4); 65 insert into t2 values(5, "[2,2,0,0]", "5", 5); 66 insert into t2 values(6, "[3,3,0,0]", "6", 6); 67 insert into t2 values(7, "[1,1,1,0]", "7", 7); 68 insert into t2 values(8, "[2,2,2,0]", "8", 8); 69 insert into t2 values(9, "[3,3,3,0]", "9", 9); 70 select a, b from t2 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3; 71 72 create index idx2 using ivfflat on t2(b) lists=3 op_type "vector_l2_ops"; 73 select b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- speed path (should output only 3 rows) 74 select a, b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- slow path 75 76 insert into t2 values(10, "[4,4,4,0]", "10", 10); 77 select b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- speed path 78 select a, b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- slow path 79 80 -- 2.b KNN with embedding and non-indexed columns (composite PK) 81 select a,b,c from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; 82 83 insert into t2 values(11, "[1,1,1,1]", "11", 11); 84 insert into t2 values(12, "[2,2,2,2]", "12", 12); 85 insert into t2 values(13, "[3,3,3,3]", "13", 13); 86 87 alter table t2 alter reindex idx2 ivfflat lists=4; 88 select a, b from t2 order by l2_distance(b, "[1,0,0,0]") limit 4; 89 select a, b from t2 order by l2_distance(b, "[1,1,0,0]") limit 4; 90 select a, b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; 91 select a, b from t2 order by l2_distance(b, "[1,1,1,1]") limit 4; 92 93 -- 3.a KNN with no PK and embedding alone 94 drop table if exists t3; 95 create table t3(a int,b vecf32(4), c varchar(3)); 96 insert into t3 values(1, "[1,0,0,0]" , "1"); 97 insert into t3 values(2, "[2,0,0,0]", "2"); 98 insert into t3 values(3, "[3,0,0,0]", "3"); 99 insert into t3 values(4, "[1,1,0,0]", "4"); 100 insert into t3 values(5, "[2,2,0,0]", "5"); 101 insert into t3 values(6, "[3,3,0,0]", "6"); 102 insert into t3 values(7, "[1,1,1,0]", "7"); 103 insert into t3 values(8, "[2,2,2,0]", "8"); 104 insert into t3 values(9, "[3,3,3,0]", "9"); 105 select a, b from t3 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3; 106 107 create index idx3 using ivfflat on t3(b) lists=3 op_type "vector_l2_ops"; 108 select a, b from t3 order by l2_distance(b, "[1,1,1,0]") limit 4; 109 110 insert into t3 values(10, "[4,4,4,0]", "10"); 111 select a, b from t3 order by l2_distance(b, "[1,1,1,0]") limit 4; 112 113 114 -- 3.b KNN with no PK and embedding and non-indexed columns 115 select a,b,c from t3 order by l2_distance(b, "[1,1,1,0]") limit 4; 116 117 insert into t3 values(11, "[1,1,1,1]", "11"); 118 insert into t3 values(12, "[2,2,2,2]", "12"); 119 insert into t3 values(13, "[3,3,3,3]", "13"); 120 121 alter table t3 alter reindex idx3 ivfflat lists=4; 122 select a, b from t3 order by l2_distance(b, "[1,0,0,0]") limit 4; 123 select a, b from t3 order by l2_distance(b, "[1,1,0,0]") limit 4; 124 select a, b from t3 order by l2_distance(b, "[1,1,1,0]") limit 4; 125 select a, b from t3 order by l2_distance(b, "[1,1,1,1]") limit 4; 126 127 -- 4.a NULL embedding 128 drop table if exists t4; 129 create table t4(a int primary key,b vecf32(4), c varchar(3) ); 130 insert into t4 values(1, "[1,0,0,0]" , "1"); 131 insert into t4 values(2, "[2,0,0,0]", "2"); 132 insert into t4 values(3, "[3,0,0,0]", "3"); 133 insert into t4 values(4, "[1,1,0,0]", "4"); 134 insert into t4 values(5, "[2,2,0,0]", "5"); 135 insert into t4 values(6, "[3,3,0,0]", "6"); 136 insert into t4 values(7, "[1,1,1,0]", "7"); 137 insert into t4 values(8, "[2,2,2,0]", "8"); 138 insert into t4 values(9, NULL, "9"); 139 select a, b from t4 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3; 140 141 create index idx4 using ivfflat on t4(b) lists=3 op_type "vector_l2_ops"; 142 select a, b from t4 order by l2_distance(b, "[1,1,1,0]") limit 4; 143 144 insert into t4 values(10, "[4,4,4,0]", "10"); 145 select a, b from t4 order by l2_distance(b, "[1,1,1,0]") limit 4; 146 147 -- 5.a Zero Vector 148 drop table if exists t5; 149 create table t5(a int primary key,b vecf32(4), c varchar(3) ); 150 insert into t5 values(1, "[1,0,0,0]" , "1"); 151 insert into t5 values(2, "[2,0,0,0]", "2"); 152 insert into t5 values(3, "[3,0,0,0]", "3"); 153 insert into t5 values(4, "[1,1,0,0]", "4"); 154 insert into t5 values(5, "[2,2,0,0]", "5"); 155 insert into t5 values(6, "[3,3,0,0]", "6"); 156 insert into t5 values(7, "[1,1,1,0]", "7"); 157 insert into t5 values(8, "[2,2,2,0]", "8"); 158 insert into t5 values(9, "[3,3,3,3]", "9"); 159 insert into t5 values(10, "[0,0,0,0]", "10"); 160 select a, b from t5 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3; 161 162 create index idx5 using ivfflat on t5(b) lists=3 op_type "vector_l2_ops"; 163 --mysql> select * from `__mo_index_secondary_018ebf8c-c9d6-7c4e-b4c3-b6df681e9017`; 164 --+--------------------------------+---------------------------+--------------------+------------------------------+ 165 --| __mo_index_centroid_fk_version | __mo_index_centroid_fk_id | __mo_index_pri_col | __mo_index_centroid_fk_entry | 166 --+--------------------------------+---------------------------+--------------------+------------------------------+ 167 --| 0 | 1 | 1 | [1, 0, 0, 0] | 168 --| 0 | 1 | 2 | [2, 0, 0, 0] | 169 --| 0 | 1 | 3 | [3, 0, 0, 0] | 170 --| 0 | 3 | 4 | [1, 1, 0, 0] | 171 --| 0 | 3 | 5 | [2, 2, 0, 0] | 172 --| 0 | 3 | 6 | [3, 3, 0, 0] | 173 --| 0 | 3 | 7 | [1, 1, 1, 0] | 174 --| 0 | 3 | 8 | [2, 2, 2, 0] | 175 --| 0 | 3 | 9 | [3, 3, 3, 3] | 176 --| 0 | 2 | 10 | [0, 0, 0, 0] | 177 --+--------------------------------+---------------------------+--------------------+------------------------------+ 178 select a, b from t5 order by l2_distance(b, "[1,1,1,0]") limit 7; 179 180 insert into t5 values(11, "[4,4,4,0]", "11"); 181 select a, b from t5 order by l2_distance(b, "[1,1,1,0]") limit 7; 182 183 184 -- post 185 SET GLOBAL experimental_ivf_index = 0; 186 drop database vecdb3; 187