github.com/matrixorigin/matrixone@v1.2.0/test/distributed/cases/array/array.sql (about)

     1  -- pre
     2  drop database if exists vecdb;
     3  create database vecdb;
     4  use vecdb;
     5  drop table if exists vec_table;
     6  
     7  -- standard
     8  create table vec_table(a int, b vecf32(3), c vecf64(3));
     9  desc vec_table;
    10  insert into vec_table values(1, "[1,2,3]", "[4,5,6]");
    11  select * from vec_table;
    12  
    13  -- binary operators
    14  select b+b from vec_table;
    15  select b-b from vec_table;
    16  select b*b from vec_table;
    17  select b/b from vec_table;
    18  select * from vec_table where b> "[1,2,3]";
    19  select * from vec_table where b< "[1,2,3]";
    20  select * from vec_table where b>= "[1,2,3]";
    21  select * from vec_table where b<= "[1,2,3]";
    22  select * from vec_table where b!= "[1,2,3]";
    23  select * from vec_table where b= "[1,2,3]";
    24  select * from vec_table where b= cast("[1,2,3]" as vecf32(3));
    25  select b + "[1,2,3]" from vec_table;
    26  select b + "[1,2]" from vec_table;
    27  select b + "[1,2,3,4]" from vec_table;
    28  
    29  
    30  -- cast
    31  select cast("[1,2,3]" as vecf32(3));
    32  select b + "[1,2,3]" from vec_table;
    33  select b + sqrt(b) from vec_table;
    34  select b + c from vec_table;
    35  
    36  -- vector ops
    37  select abs(b) from vec_table;
    38  select abs(cast("[-1,-2,3]" as vecf32(3)));
    39  select sqrt(b) from vec_table;
    40  select summation(b) from vec_table;
    41  select l1_norm(b) from vec_table;
    42  select l2_norm(b) from vec_table;
    43  select vector_dims(b) from vec_table;
    44  select inner_product(b,"[1,2,3]") from vec_table;
    45  select cosine_similarity(b,"[1,2,3]") from vec_table;
    46  select l2_distance(b,"[1,2,3]") from vec_table;
    47  select cosine_distance(b,"[1,2,3]") from vec_table;
    48  select normalize_l2(b) from vec_table;
    49  
    50  -- top K
    51  select * FROM vec_table ORDER BY cosine_similarity(b, '[3,1,2]') LIMIT 5;
    52  select * FROM vec_table ORDER BY l2_distance(b, '[3,1,2]') LIMIT 5;
    53  select * FROM vec_table ORDER BY inner_product(b, '[3,1,2]') LIMIT 5;
    54  
    55  
    56  -- throw error cases
    57  select b + "[1,2,3" from vec_table;
    58  select b + "1,2,3" from vec_table;
    59  create table t2(a int, b vecf32(3) primary key);
    60  create unique index t3 on vec_table(b);
    61  create table t3(a int, b vecf32(65537));
    62  
    63  -- throw error for Nan/Inf
    64  select sqrt(cast("[1,2,-3]" as vecf32(3)));
    65  select b/(cast("[1,2,0]" as vecf32(3))) from vec_table;
    66  
    67  -- agg
    68  select count(b) from vec_table;
    69  
    70  -- insert test (more dim error)
    71  create table t4(a int, b vecf32(5), c vecf64(5));
    72  insert into t4 values(1, "[1,2,3,4,5]", "[1,2,3,4,5]");
    73  insert into t4 values(1, "[1,2]", "[1,2]");
    74  insert into t4 values(1, "[1,2,3,4,5,6]", "[1,2,3,4,5,6]");
    75  select * from t4;
    76  
    77  -- insert vector as binary
    78  create table t5(a int, b vecf32(3));
    79  insert into t5 values(1, cast(unhex('7e98b23e9e10383b2f41133f') as blob));
    80  insert into t5 values(2, cast(unhex('0363733ff13e0b3f7aa39d3e') as blob));
    81  insert into t5 values(3, cast(unhex('be1ac03e485d083ef6bc723f') as blob));
    82  
    83  insert into t5 values(4, "[0,2,3]");
    84  
    85  insert into t5 values(5, cast(unhex('05486c3f3ee2863e713d503dd58e8e3e7b88743f') as blob)); -- this is float32[5]
    86  insert into t5 values(6, cast(unhex('9be2123fcf92de3e') as blob)); -- this is float32[2]
    87  
    88  select * from t5;
    89  select * from t5 where t5.b > "[0,0,0]";
    90  
    91  -- output vector as binary (the output is little endian hex encoding)
    92  select hex(b) from t5;
    93  
    94  -- insert nulls
    95  create table t6(a int, b vecf32(3));
    96  insert into t6 values(1, null);
    97  insert into t6 (a,b) values (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[2,1,1]'), (4, '[7,8,9]'), (5, '[0,0,0]'), (6, '[3,1,2]');
    98  select * from t6;
    99  update t6 set b = NULL;
   100  select * from t6;
   101  
   102  -- vector precision/scale check
   103  create table t7(a int, b vecf32(3), c vecf32(5));
   104  insert into t7 values(1, NULL,NULL);
   105  insert into t7 values(2, "[0.8166459, 0.66616553, 0.4886152]", NULL);
   106  insert into t7 values(3, "[0.1726299, 3.2908857, 30.433094]","[0.45052445, 2.1984527, 9.579752, 123.48039, 4635.894]");
   107  insert into t7 values(4, "[8.560689, 6.790359, 821.9778]", "[0.46323407, 23.498016, 563.923, 56.076736, 8732.958]");
   108  select * from t7;
   109  select a, b + b, c + c from t7;
   110  select a, b * b, c * c from t7;
   111  select l2_norm(b), l2_norm(c) from t7;
   112  
   113  
   114  -- insert, flush and select
   115  insert into vec_table values(2, "[0,2,3]", "[4,4,6]");
   116  insert into vec_table values(3, "[1,3,3]", "[4,1,6]");
   117  -- @separator:table
   118  select mo_ctl('dn', 'flush', 'vecdb.vec_table');
   119  -- @separator:table
   120  select mo_ctl('dn', 'flush', 'vecdb.t6');
   121  select * from vec_table where b> "[1,2,3]";
   122  select * from vec_table where b!= "[1,2,3]";
   123  select * from vec_table where b= "[1,2,3]";
   124  
   125  -- create table with PK or UK or No Key (https://github.com/matrixorigin/matrixone/issues/13038)
   126  create table vec_table1(a int, b vecf32(3), c vecf64(3));
   127  insert into vec_table1 values(1, "[1,2,3]", "[4,5,6]");
   128  select * from vec_table1;
   129  create table vec_table2(a int primary key, b vecf32(3), c vecf64(3));
   130  insert into vec_table2 values(1, "[1,2,3]", "[4,5,6]");
   131  select * from vec_table2;
   132  create table vec_table3(a int unique key, b vecf32(3), c vecf64(3));
   133  insert into vec_table3 values(1, "[1,2,3]", "[4,5,6]");
   134  select * from vec_table3;
   135  
   136  -- Scalar Null check
   137  select summation(null);
   138  select l1_norm(null);
   139  select l2_norm(null);
   140  select vector_dims(null);
   141  select inner_product(null, "[1,2,3]");
   142  select cosine_similarity(null, "[1,2,3]");
   143  select l2_distance(null, "[1,2,3]");
   144  select cosine_distance(null, "[1,2,3]");
   145  select normalize_l2(null);
   146  select cast(null as vecf32(3));
   147  select cast(null as vecf64(3));
   148  
   149  -- Precision issue for Cosine Similarity/Distance
   150  create table t8(a int, b vecf32(3), c vecf32(5));
   151  INSERT INTO `t8` VALUES (1,NULL,NULL);
   152  INSERT INTO `t8` VALUES(2,'[0.8166459, 0.66616553, 0.4886152]',NULL);
   153  INSERT INTO `t8` VALUES(3,'[0.1726299, 3.2908857, 30.433094]','[0.45052445, 2.1984527, 9.579752, 123.48039, 4635.894]');
   154  INSERT INTO `t8` VALUES(4,'[8.560689, 6.790359, 821.9778]','[0.46323407, 23.498016, 563.923, 56.076736, 8732.958]');
   155  select cosine_similarity(b,b), cosine_similarity(c,c) from t8;
   156  
   157  create table t9(a int, b vecf64(3), c vecf64(5));
   158  INSERT INTO `t9` VALUES (1,NULL,NULL);
   159  INSERT INTO `t9` VALUES (2,'[0.8166459, 0.66616553, 0.4886152]',NULL);
   160  INSERT INTO `t9` VALUES (3,'[8.5606893, 6.7903588, 821.977768]','[0.46323407, 23.49801546, 563.9229458, 56.07673508, 8732.9583881]');
   161  INSERT INTO `t9` VALUES (4,'[0.9260021, 0.26637346, 0.06567037]','[0.45756745, 65.2996871, 321.623636, 3.60082066, 87.58445764]');
   162  select cosine_similarity(b,b), cosine_similarity(c,c) from t9;
   163  
   164  -- Sub Vector
   165  create table t10(a int, b vecf32(3), c vecf64(3));
   166  insert into t10 values(1, "[1,2.4,3]", "[4.1,5,6]");
   167  insert into t10 values(2, "[3,4,5]", "[6,7.3,8]");
   168  insert into t10 values(3, "[5,6,7]", "[8,9,10]");
   169  select subvector(b,1) from t10;
   170  select subvector(b,2) from t10;
   171  select subvector(b,3) from t10;
   172  select subvector(b,4) from t10;
   173  select subvector(b,-1) from t10;
   174  select subvector(b,-2) from t10;
   175  select subvector(b,-3) from t10;
   176  select subvector(b, 1, 1) from t10;
   177  select subvector(b, 1, 2) from t10;
   178  select subvector(b, 1, 3) from t10;
   179  select subvector(b, 1, 4) from t10;
   180  select subvector(b, -1, 1) from t10;
   181  select subvector(b, -2, 1) from t10;
   182  select subvector(b, -3, 1) from t10;
   183  SELECT SUBVECTOR("[1,2,3]", 2);
   184  SELECT SUBVECTOR("[1,2,3]",2,1);
   185  
   186  -- Arithmetic Operators between Vector and Scalar
   187  select b + 2 from t10;
   188  select b - 2 from t10;
   189  select b * 2 from t10;
   190  select b / 2 from t10;
   191  select 2 + b from t10;
   192  select 2 - b from t10;
   193  select 2 * b from t10;
   194  select 2 / b from t10;
   195  select b + 2.0 from t10;
   196  select b - 2.0 from t10;
   197  select b * 2.0 from t10;
   198  select b / 2.0 from t10;
   199  select 2.0 + b from t10;
   200  select 2.0 - b from t10;
   201  select 2.0 * b from t10;
   202  select 2.0 / b from t10;
   203  select cast("[1,2,3]" as vecf32(3)) + 2;
   204  select cast("[1,2,3]" as vecf32(3)) - 2;
   205  select cast("[1,2,3]" as vecf32(3)) * 2;
   206  select cast("[1,2,3]" as vecf32(3)) / 2;
   207  select 2 + cast("[1,2,3]" as vecf32(3));
   208  select 2 - cast("[1,2,3]" as vecf32(3));
   209  select 2 * cast("[1,2,3]" as vecf32(3));
   210  select 2 / cast("[1,2,3]" as vecf32(3));
   211  select cast("[1,2,3]" as vecf32(3)) + 2.0;
   212  select cast("[1,2,3]" as vecf32(3)) - 2.0;
   213  select cast("[1,2,3]" as vecf32(3)) * 2.0;
   214  select cast("[1,2,3]" as vecf32(3)) / 2.0;
   215  select 2.0 + cast("[1,2,3]" as vecf32(3));
   216  select 2.0 - cast("[1,2,3]" as vecf32(3));
   217  select 2.0 * cast("[1,2,3]" as vecf32(3));
   218  select 2.0 / cast("[1,2,3]" as vecf32(3));
   219  select cast("[1,2,3]" as vecf32(3)) / 0 ;
   220  select 5 + (-1*cast("[1,2,3]" as vecf32(3)));
   221  
   222  -- Distinct SQL
   223  create table t11(a vecf32(2));
   224  insert into t11 values('[1,0]');
   225  insert into t11 values('[1,2]');
   226  select distinct a from t11;
   227  select distinct a,a from t11;
   228  
   229  -- TinyInt + Vector
   230  drop table if exists t1;
   231  create table t1(c1 int,c2 vecf32(5),c3 tinyint unsigned,c4 bigint,c5 decimal(4,2),c6 float,c7 double);
   232  insert into t1 values(10 ,"[1, 0, 1, 6, 6]",3,10,7.1,0.36,2.10);
   233  insert into t1 values(60,"[6, 0, 8, 10,129]",2,5,3.26,4.89,1.26);
   234  insert into t1 values(20,"[ 9, 18, 1, 4, 132]",6,1,9.36,6.9,5.6);
   235  select c2+c3 from t1;
   236  
   237  -- Except
   238  select * from t8 except select * from t9;
   239  
   240  -- infinity scenario
   241  select cast("[76875768584509877574546435800000005,8955885757767774774774774456466]" as vecf32(2)) *623585864455;
   242  
   243  -- post
   244  drop database vecdb;