github.com/matrixorigin/matrixone@v1.2.0/test/distributed/cases/sample/sample_func.sql (about)

     1  /* data prepare for sample from single column */
     2  drop table if exists s_t1;
     3  create table s_t1 (c1 int, c2 int, c3 int);
     4  insert into s_t1 values (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, null);
     5  insert into s_t1 values (2, 1, 0);
     6  insert into s_t1 values (2, 2, 0), (2, 2, null);
     7  insert into s_t1 values (3, 1, null), (3, 2, null), (3, 2, null);
     8  
     9  /* expected failed case */
    10  /* 1. only support one sample function in a sql */
    11  select c1, sample(c2, 1 rows), sample(c3, 1 rows) from s_t1;
    12  /* 2. cannot fixed sample function (non-scalar function) and aggregate function (scalar function) */
    13  select c1, max(c2), sample(c3, 1 rows) from s_t1;
    14  /* 3. sample(column list, N rows) requires 1 <= N <= 11000 */
    15  select sample(c1, 0 rows) from s_t1;
    16  select sample(c1, 11001 rows) from s_t1;
    17  /* 4. sample(column list, K percent) requires 0.00 <= K <= 100.00, should watch that 99.994 was accepted but will treat as 99.99 */
    18  select sample(c1, 101 percent) from s_t1;
    19  select sample(c1, 100.01 percent) from s_t1;
    20  /* 5. sample cannot be used in where clause */
    21  select c1, c2, c3 from s_t1 where sample(c1, 1 rows) = 1;
    22  select c1, c2, c3 from s_t1 where sample(c1, 1 percent) = 1;
    23  /* 6. cannot sample the group by column */
    24  select sample(c1, 1 rows) from s_t1 group by c1;
    25  select sample(*, 1 rows) from s_t1 group by c1;
    26  
    27  /* expected succeed case */
    28  /* 1. sample 2 rows from table by column c1 */
    29  select count(*) from (select sample(c1, 2 rows) from s_t1);
    30  /* 2. sample 2 rows from table by column c2 */
    31  select count(*) from (select sample(c2, 2 rows) from s_t1);
    32  /* 3. sample 2 rows from table by column c2 group by col1 */
    33  select count(*) from (select sample(c2, 2 rows) from s_t1 group by c1);
    34  /* 4. sample 3 rows from table by column c3 where c1 = 2, expected to get only 2 rows because one row's c3 is null */
    35  select sample(c3, 3 rows) from s_t1 where c1 = 2;
    36  /* 5. sample 3 rows from table by column c3 where c1 = 1, expected to get 3 rows (0),(1),(2) */
    37  select c1, c2, sample(c3, 3 rows) from s_t1 where c1 = 1 order by c3;
    38  /* 6. sample 100 percent from table by column c3, expected to get all rows except null rows */
    39  select sample(c3, 100 percent) from s_t1 order by c3;
    40  /* 7. sample 0 percent from table by column c1, expected to get empty */
    41  select sample(c1, 0 percent) from s_t1;
    42  /* 8. some case I don't know how to describe it, in short, these cases should be OK */
    43  select count(*) from (select c1, c2, sample(c2, 100 percent), c3 from s_t1);
    44  select count(*) from (select c1, sample(c2, 100 percent), c2, c3 from s_t1);
    45  /* 9. with limit */
    46  select count(*) from (select sample(c2, 2 rows) from s_t1 limit 1);
    47  select count(*) from (select sample(c2, 2 rows) from s_t1 group by c1 limit 2);
    48  /* 10. with alias */
    49  select sample(c3, 3 rows) as k from s_t1 where c1 = 2;
    50  /* 11. sample from all invalid rows, should get only one invalid row */
    51  select sample(c3, 1 rows) from s_t1 where c1 = 3;
    52  select sample(c3, 2 rows) from s_t1 where c1 = 3;
    53  select c1, sample(c3, 3 rows) from s_t1 where c1 = 3 group by c1;
    54  select c1, sample(c3, 3 rows) from s_t1 group by c1 order by c1, c3;
    55  /* 12. sample as and outer filter */
    56  select * from (select c1, sample(c3, 3 rows) as k from s_t1 group by c1) where k < 2 order by c1, k;
    57  
    58  /* data prepare for sample from multi columns */
    59  drop table if exists s_t2;
    60  create table s_t2 (cc1 int, cc2 int);
    61  insert into s_t2 values (1, 1), (null, 2), (3, null), (null, null);
    62  
    63  /* expected failed case */
    64  /* 1. multi sample with alias */
    65  select sample(cc1, cc2, 1 rows) as k from s_t2;
    66  
    67  /* expected succeed case */
    68  /* 1. sample 2 rows from table by column cc1, cc2, expected to get 3 rows because we should sample 2 not-null value for each column */
    69  select count(*) from (select sample(cc1, cc2, 2 rows) from s_t2);
    70  /* 2. sample 100 percent from table by column cc1, cc2, expected to get all rows except (null, null) */
    71  select sample(cc1, cc2, 100 percent) from s_t2 order by cc1 asc;
    72  /* 3. sample 0 percent from table by column cc1, cc2, expected to get empty */
    73  select sample(cc1, cc2, 0 percent) from s_t2;
    74  /* 4. should support the sample * from table */
    75  select sample(*, 100 percent) from s_t2 order by cc1 asc;
    76  select sample(*, 0 percent) from s_t2;
    77  select sample(*, 2 rows) from s_t2 order by cc1 asc;
    78  
    79  /* data prepare for expression sample */
    80  drop table if exists s_t3;
    81  create table s_t3 (c1 int, c2 int);
    82  insert into s_t3 values (1, 3), (2, 5), (3, 6), (4, 7), (5, 8);
    83  
    84  /* expected failed case */
    85  /* 1. sample(aggregate function, N rows) should failed */
    86  select sample(max(c1), 1 rows) from s_t3;
    87  
    88  /* expected succeed case */
    89  /* 1. expression (not only simple column) should be OK */
    90  select c1, sample(c1+1, 100 percent) from s_t3 order by c1;
    91  
    92  -- test `sample(expression, n rows, unit)` syntax.
    93  -- it's same as sample(n rows) but will avoid centroids skewed.
    94  select count(*) from (select sample(c1, 2 rows, 'row') from s_t3);
    95  select count(*) from (select sample(c1, 2 rows, 'block') from s_t3);
    96  select c1, sample(c2, 1 rows, 'row') from s_t3 group by c1 order by c1;