github.com/matrixorigin/matrixone@v1.2.0/test/distributed/cases/sample/sample_func.sql (about) 1 /* data prepare for sample from single column */ 2 drop table if exists s_t1; 3 create table s_t1 (c1 int, c2 int, c3 int); 4 insert into s_t1 values (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, null); 5 insert into s_t1 values (2, 1, 0); 6 insert into s_t1 values (2, 2, 0), (2, 2, null); 7 insert into s_t1 values (3, 1, null), (3, 2, null), (3, 2, null); 8 9 /* expected failed case */ 10 /* 1. only support one sample function in a sql */ 11 select c1, sample(c2, 1 rows), sample(c3, 1 rows) from s_t1; 12 /* 2. cannot fixed sample function (non-scalar function) and aggregate function (scalar function) */ 13 select c1, max(c2), sample(c3, 1 rows) from s_t1; 14 /* 3. sample(column list, N rows) requires 1 <= N <= 11000 */ 15 select sample(c1, 0 rows) from s_t1; 16 select sample(c1, 11001 rows) from s_t1; 17 /* 4. sample(column list, K percent) requires 0.00 <= K <= 100.00, should watch that 99.994 was accepted but will treat as 99.99 */ 18 select sample(c1, 101 percent) from s_t1; 19 select sample(c1, 100.01 percent) from s_t1; 20 /* 5. sample cannot be used in where clause */ 21 select c1, c2, c3 from s_t1 where sample(c1, 1 rows) = 1; 22 select c1, c2, c3 from s_t1 where sample(c1, 1 percent) = 1; 23 /* 6. cannot sample the group by column */ 24 select sample(c1, 1 rows) from s_t1 group by c1; 25 select sample(*, 1 rows) from s_t1 group by c1; 26 27 /* expected succeed case */ 28 /* 1. sample 2 rows from table by column c1 */ 29 select count(*) from (select sample(c1, 2 rows) from s_t1); 30 /* 2. sample 2 rows from table by column c2 */ 31 select count(*) from (select sample(c2, 2 rows) from s_t1); 32 /* 3. sample 2 rows from table by column c2 group by col1 */ 33 select count(*) from (select sample(c2, 2 rows) from s_t1 group by c1); 34 /* 4. sample 3 rows from table by column c3 where c1 = 2, expected to get only 2 rows because one row's c3 is null */ 35 select sample(c3, 3 rows) from s_t1 where c1 = 2; 36 /* 5. sample 3 rows from table by column c3 where c1 = 1, expected to get 3 rows (0),(1),(2) */ 37 select c1, c2, sample(c3, 3 rows) from s_t1 where c1 = 1 order by c3; 38 /* 6. sample 100 percent from table by column c3, expected to get all rows except null rows */ 39 select sample(c3, 100 percent) from s_t1 order by c3; 40 /* 7. sample 0 percent from table by column c1, expected to get empty */ 41 select sample(c1, 0 percent) from s_t1; 42 /* 8. some case I don't know how to describe it, in short, these cases should be OK */ 43 select count(*) from (select c1, c2, sample(c2, 100 percent), c3 from s_t1); 44 select count(*) from (select c1, sample(c2, 100 percent), c2, c3 from s_t1); 45 /* 9. with limit */ 46 select count(*) from (select sample(c2, 2 rows) from s_t1 limit 1); 47 select count(*) from (select sample(c2, 2 rows) from s_t1 group by c1 limit 2); 48 /* 10. with alias */ 49 select sample(c3, 3 rows) as k from s_t1 where c1 = 2; 50 /* 11. sample from all invalid rows, should get only one invalid row */ 51 select sample(c3, 1 rows) from s_t1 where c1 = 3; 52 select sample(c3, 2 rows) from s_t1 where c1 = 3; 53 select c1, sample(c3, 3 rows) from s_t1 where c1 = 3 group by c1; 54 select c1, sample(c3, 3 rows) from s_t1 group by c1 order by c1, c3; 55 /* 12. sample as and outer filter */ 56 select * from (select c1, sample(c3, 3 rows) as k from s_t1 group by c1) where k < 2 order by c1, k; 57 58 /* data prepare for sample from multi columns */ 59 drop table if exists s_t2; 60 create table s_t2 (cc1 int, cc2 int); 61 insert into s_t2 values (1, 1), (null, 2), (3, null), (null, null); 62 63 /* expected failed case */ 64 /* 1. multi sample with alias */ 65 select sample(cc1, cc2, 1 rows) as k from s_t2; 66 67 /* expected succeed case */ 68 /* 1. sample 2 rows from table by column cc1, cc2, expected to get 3 rows because we should sample 2 not-null value for each column */ 69 select count(*) from (select sample(cc1, cc2, 2 rows) from s_t2); 70 /* 2. sample 100 percent from table by column cc1, cc2, expected to get all rows except (null, null) */ 71 select sample(cc1, cc2, 100 percent) from s_t2 order by cc1 asc; 72 /* 3. sample 0 percent from table by column cc1, cc2, expected to get empty */ 73 select sample(cc1, cc2, 0 percent) from s_t2; 74 /* 4. should support the sample * from table */ 75 select sample(*, 100 percent) from s_t2 order by cc1 asc; 76 select sample(*, 0 percent) from s_t2; 77 select sample(*, 2 rows) from s_t2 order by cc1 asc; 78 79 /* data prepare for expression sample */ 80 drop table if exists s_t3; 81 create table s_t3 (c1 int, c2 int); 82 insert into s_t3 values (1, 3), (2, 5), (3, 6), (4, 7), (5, 8); 83 84 /* expected failed case */ 85 /* 1. sample(aggregate function, N rows) should failed */ 86 select sample(max(c1), 1 rows) from s_t3; 87 88 /* expected succeed case */ 89 /* 1. expression (not only simple column) should be OK */ 90 select c1, sample(c1+1, 100 percent) from s_t3 order by c1; 91 92 -- test `sample(expression, n rows, unit)` syntax. 93 -- it's same as sample(n rows) but will avoid centroids skewed. 94 select count(*) from (select sample(c1, 2 rows, 'row') from s_t3); 95 select count(*) from (select sample(c1, 2 rows, 'block') from s_t3); 96 select c1, sample(c2, 1 rows, 'row') from s_t3 group by c1 order by c1;