github.com/matrixorigin/matrixone@v1.2.0/test/distributed/cases/array/array_index_knn.sql (about)

     1  -- pre
     2  drop database if exists vecdb3;
     3  create database vecdb3;
     4  use vecdb3;
     5  SET GLOBAL experimental_ivf_index = 1;
     6  
     7  
     8  -- 0.a  Verify Experimental Flag
     9  drop table if exists t6;
    10  create table t6(a int primary key,b vecf32(4), c varchar(3) );
    11  insert into t6 values(1, "[1,0,0,0]" , "1");
    12  insert into t6 values(2, "[2,0,0,0]", "2");
    13  insert into t6 values(3, "[3,0,0,0]", "3");
    14  insert into t6 values(4, "[1,1,0,0]", "4");
    15  insert into t6 values(5, "[2,2,0,0]", "5");
    16  insert into t6 values(6, "[3,3,0,0]", "6");
    17  SET GLOBAL experimental_ivf_index = 0;
    18  create index idx6 using ivfflat on t6(b) lists=2 op_type "vector_l2_ops";
    19  SET GLOBAL experimental_ivf_index = 1;
    20  create index idx6 using ivfflat on t6(b) lists=2 op_type "vector_l2_ops";
    21  select a, b from t6 order by l2_distance(b, "[1,0,0,0]") limit 4;
    22  
    23  -- 1.a KNN with pk and embedding alone
    24  drop table if exists t1;
    25  create table t1(a int primary key,b vecf32(4), c varchar(3) );
    26  insert into t1 values(1, "[1,0,0,0]" , "1");
    27  insert into t1 values(2, "[2,0,0,0]", "2");
    28  insert into t1 values(3, "[3,0,0,0]", "3");
    29  insert into t1 values(4, "[1,1,0,0]", "4");
    30  insert into t1 values(5, "[2,2,0,0]", "5");
    31  insert into t1 values(6, "[3,3,0,0]", "6");
    32  insert into t1 values(7, "[1,1,1,0]", "7");
    33  insert into t1 values(8, "[2,2,2,0]", "8");
    34  insert into t1 values(9, "[3,3,3,0]", "9");
    35  select a, b from t1 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3;
    36  
    37  create index idx1 using ivfflat on t1(b) lists=3 op_type "vector_l2_ops";
    38  select a, b from t1 order by l2_distance(b, "[1,1,1,0]") limit 4;
    39  
    40  insert into t1 values(10, "[4,4,4,0]", "10");
    41  select a, b from t1 order by l2_distance(b, "[1,1,1,0]") limit 4;
    42  
    43  -- 1.b KNN with pk and embedding and non-indexed columns
    44  select a,b,c from t1 order by l2_distance(b, "[1,1,1,0]") limit 4;
    45  
    46  
    47  insert into t1 values(11, "[1,1,1,1]", "11");
    48  insert into t1 values(12, "[2,2,2,2]", "12");
    49  insert into t1 values(13, "[3,3,3,3]", "13");
    50  
    51  alter table t1 alter reindex idx1 ivfflat lists=4;
    52  select a, b from t1 order by l2_distance(b, "[1,0,0,0]") limit 4;
    53  select a, b from t1 order by l2_distance(b, "[1,1,0,0]") limit 4;
    54  select a, b from t1 order by l2_distance(b, "[1,1,1,0]") limit 4;
    55  select a, b from t1 order by l2_distance(b, "[1,1,1,1]") limit 4;
    56  
    57  
    58  -- 2.a KNN with embedding alone (composite PK)
    59  drop table if exists t3;
    60  create table t2(a int,b vecf32(4), c varchar(3), d int, primary key(a,d));
    61  insert into t2 values(1, "[1,0,0,0]" , "1", 1);
    62  insert into t2 values(2, "[2,0,0,0]", "2", 2);
    63  insert into t2 values(3, "[3,0,0,0]", "3", 3);
    64  insert into t2 values(4, "[1,1,0,0]", "4", 4);
    65  insert into t2 values(5, "[2,2,0,0]", "5", 5);
    66  insert into t2 values(6, "[3,3,0,0]", "6", 6);
    67  insert into t2 values(7, "[1,1,1,0]", "7", 7);
    68  insert into t2 values(8, "[2,2,2,0]", "8", 8);
    69  insert into t2 values(9, "[3,3,3,0]", "9", 9);
    70  select a, b from t2 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3;
    71  
    72  create index idx2 using ivfflat on t2(b) lists=3 op_type "vector_l2_ops";
    73  select b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- speed path (should output only 3 rows)
    74  select a, b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- slow path
    75  
    76  insert into t2 values(10, "[4,4,4,0]", "10", 10);
    77  select b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- speed path
    78  select a, b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4; -- slow path
    79  
    80  -- 2.b KNN with embedding and non-indexed columns (composite PK)
    81  select a,b,c from t2 order by l2_distance(b, "[1,1,1,0]") limit 4;
    82  
    83  insert into t2 values(11, "[1,1,1,1]", "11", 11);
    84  insert into t2 values(12, "[2,2,2,2]", "12", 12);
    85  insert into t2 values(13, "[3,3,3,3]", "13", 13);
    86  
    87  alter table t2 alter reindex idx2 ivfflat lists=4;
    88  select a, b from t2 order by l2_distance(b, "[1,0,0,0]") limit 4;
    89  select a, b from t2 order by l2_distance(b, "[1,1,0,0]") limit 4;
    90  select a, b from t2 order by l2_distance(b, "[1,1,1,0]") limit 4;
    91  select a, b from t2 order by l2_distance(b, "[1,1,1,1]") limit 4;
    92  
    93  -- 3.a KNN with no PK and embedding alone
    94  drop table if exists t3;
    95  create table t3(a int,b vecf32(4), c varchar(3));
    96  insert into t3 values(1, "[1,0,0,0]" , "1");
    97  insert into t3 values(2, "[2,0,0,0]", "2");
    98  insert into t3 values(3, "[3,0,0,0]", "3");
    99  insert into t3 values(4, "[1,1,0,0]", "4");
   100  insert into t3 values(5, "[2,2,0,0]", "5");
   101  insert into t3 values(6, "[3,3,0,0]", "6");
   102  insert into t3 values(7, "[1,1,1,0]", "7");
   103  insert into t3 values(8, "[2,2,2,0]", "8");
   104  insert into t3 values(9, "[3,3,3,0]", "9");
   105  select a, b from t3 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3;
   106  
   107  create index idx3 using ivfflat on t3(b) lists=3 op_type "vector_l2_ops";
   108  select a, b from t3 order by l2_distance(b, "[1,1,1,0]") limit 4;
   109  
   110  insert into t3 values(10, "[4,4,4,0]", "10");
   111  select a, b from t3 order by l2_distance(b, "[1,1,1,0]") limit 4;
   112  
   113  
   114  -- 3.b KNN with no PK and embedding and non-indexed columns
   115  select a,b,c from t3 order by l2_distance(b, "[1,1,1,0]") limit 4;
   116  
   117  insert into t3 values(11, "[1,1,1,1]", "11");
   118  insert into t3 values(12, "[2,2,2,2]", "12");
   119  insert into t3 values(13, "[3,3,3,3]", "13");
   120  
   121  alter table t3 alter reindex idx3 ivfflat lists=4;
   122  select a, b from t3 order by l2_distance(b, "[1,0,0,0]") limit 4;
   123  select a, b from t3 order by l2_distance(b, "[1,1,0,0]") limit 4;
   124  select a, b from t3 order by l2_distance(b, "[1,1,1,0]") limit 4;
   125  select a, b from t3 order by l2_distance(b, "[1,1,1,1]") limit 4;
   126  
   127  -- 4.a NULL embedding
   128  drop table if exists t4;
   129  create table t4(a int primary key,b vecf32(4), c varchar(3) );
   130  insert into t4 values(1, "[1,0,0,0]" , "1");
   131  insert into t4 values(2, "[2,0,0,0]", "2");
   132  insert into t4 values(3, "[3,0,0,0]", "3");
   133  insert into t4 values(4, "[1,1,0,0]", "4");
   134  insert into t4 values(5, "[2,2,0,0]", "5");
   135  insert into t4 values(6, "[3,3,0,0]", "6");
   136  insert into t4 values(7, "[1,1,1,0]", "7");
   137  insert into t4 values(8, "[2,2,2,0]", "8");
   138  insert into t4 values(9, NULL, "9");
   139  select a, b from t4 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3;
   140  
   141  create index idx4 using ivfflat on t4(b) lists=3 op_type "vector_l2_ops";
   142  select a, b from t4 order by l2_distance(b, "[1,1,1,0]") limit 4;
   143  
   144  insert into t4 values(10, "[4,4,4,0]", "10");
   145  select a, b from t4 order by l2_distance(b, "[1,1,1,0]") limit 4;
   146  
   147  -- 5.a Zero Vector
   148  drop table if exists t5;
   149  create table t5(a int primary key,b vecf32(4), c varchar(3) );
   150  insert into t5 values(1, "[1,0,0,0]" , "1");
   151  insert into t5 values(2, "[2,0,0,0]", "2");
   152  insert into t5 values(3, "[3,0,0,0]", "3");
   153  insert into t5 values(4, "[1,1,0,0]", "4");
   154  insert into t5 values(5, "[2,2,0,0]", "5");
   155  insert into t5 values(6, "[3,3,0,0]", "6");
   156  insert into t5 values(7, "[1,1,1,0]", "7");
   157  insert into t5 values(8, "[2,2,2,0]", "8");
   158  insert into t5 values(9, "[3,3,3,3]", "9");
   159  insert into t5 values(10, "[0,0,0,0]", "10");
   160  select a, b from t5 order by l2_distance(normalize_l2(b), normalize_l2("[1,1,1,0]")) limit 3;
   161  
   162  create index idx5 using ivfflat on t5(b) lists=3 op_type "vector_l2_ops";
   163  --mysql> select * from `__mo_index_secondary_018ebf8c-c9d6-7c4e-b4c3-b6df681e9017`;
   164  --+--------------------------------+---------------------------+--------------------+------------------------------+
   165  --| __mo_index_centroid_fk_version | __mo_index_centroid_fk_id | __mo_index_pri_col | __mo_index_centroid_fk_entry |
   166  --+--------------------------------+---------------------------+--------------------+------------------------------+
   167  --|                              0 |                         1 |                  1 | [1, 0, 0, 0]                 |
   168  --|                              0 |                         1 |                  2 | [2, 0, 0, 0]                 |
   169  --|                              0 |                         1 |                  3 | [3, 0, 0, 0]                 |
   170  --|                              0 |                         3 |                  4 | [1, 1, 0, 0]                 |
   171  --|                              0 |                         3 |                  5 | [2, 2, 0, 0]                 |
   172  --|                              0 |                         3 |                  6 | [3, 3, 0, 0]                 |
   173  --|                              0 |                         3 |                  7 | [1, 1, 1, 0]                 |
   174  --|                              0 |                         3 |                  8 | [2, 2, 2, 0]                 |
   175  --|                              0 |                         3 |                  9 | [3, 3, 3, 3]                 |
   176  --|                              0 |                         2 |                 10 | [0, 0, 0, 0]                 |
   177  --+--------------------------------+---------------------------+--------------------+------------------------------+
   178  select a, b from t5 order by l2_distance(b, "[1,1,1,0]") limit 7;
   179  
   180  insert into t5 values(11, "[4,4,4,0]", "11");
   181  select a, b from t5 order by l2_distance(b, "[1,1,1,0]") limit 7;
   182  
   183  
   184  -- post
   185  SET GLOBAL experimental_ivf_index = 0;
   186  drop database vecdb3;
   187