github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/cmd/benchdb/explaintest/t/tpch.test (about)

     1  -- http://www.tpc.org/tpc_documents_current_versions/FIDelf/tpc-h_v2.17.1.FIDelf
     2  CREATE DATABASE IF NOT EXISTS TPCH;
     3  USE TPCH;
     4  CREATE TABLE IF NOT EXISTS nation  ( N_NATIONKEY  INTEGER NOT NULL,
     5                              N_NAME       CHAR(25) NOT NULL,
     6                              N_REGIONKEY  INTEGER NOT NULL,
     7                              N_COMMENT    VARCHAR(152),
     8  			    PRIMARY KEY (N_NATIONKEY));
     9  
    10  CREATE TABLE IF NOT EXISTS region  ( R_REGIONKEY  INTEGER NOT NULL,
    11         	               R_NAME       CHAR(25) NOT NULL,
    12                         R_COMMENT    VARCHAR(152),
    13  	               PRIMARY KEY (R_REGIONKEY));
    14  
    15  CREATE TABLE IF NOT EXISTS part  ( P_PARTKEY     INTEGER NOT NULL,
    16                            P_NAME        VARCHAR(55) NOT NULL,
    17                            P_MFGR        CHAR(25) NOT NULL,
    18                            P_BRAND       CHAR(10) NOT NULL,
    19                            P_TYPE        VARCHAR(25) NOT NULL,
    20                            P_SIZE        INTEGER NOT NULL,
    21                            P_CONTAINER   CHAR(10) NOT NULL,
    22                            P_RETAILPRICE DECIMAL(15,2) NOT NULL,
    23                            P_COMMENT     VARCHAR(23) NOT NULL,
    24  			  PRIMARY KEY (P_PARTKEY));
    25  
    26  CREATE TABLE IF NOT EXISTS supplier  ( S_SUPPKEY     INTEGER NOT NULL,
    27                               S_NAME        CHAR(25) NOT NULL,
    28                               S_ADDRESS     VARCHAR(40) NOT NULL,
    29                               S_NATIONKEY   INTEGER NOT NULL,
    30                               S_PHONE       CHAR(15) NOT NULL,
    31                               S_ACCTBAL     DECIMAL(15,2) NOT NULL,
    32                               S_COMMENT     VARCHAR(101) NOT NULL,
    33  			     PRIMARY KEY (S_SUPPKEY),
    34  			     CONSTRAINT FOREIGN KEY SUPPLIER_FK1 (S_NATIONKEY) references nation(N_NATIONKEY));
    35  
    36  CREATE TABLE IF NOT EXISTS partsupp ( PS_PARTKEY     INTEGER NOT NULL,
    37                               PS_SUPPKEY     INTEGER NOT NULL,
    38                               PS_AVAILQTY    INTEGER NOT NULL,
    39                               PS_SUPPLYCOST  DECIMAL(15,2)  NOT NULL,
    40                               PS_COMMENT     VARCHAR(199) NOT NULL,
    41  			     PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY),
    42  			     CONSTRAINT FOREIGN KEY PARTSUPP_FK1 (PS_SUPPKEY) references supplier(S_SUPPKEY),
    43  			     CONSTRAINT FOREIGN KEY PARTSUPP_FK2 (PS_PARTKEY) references part(P_PARTKEY));
    44  
    45  CREATE TABLE IF NOT EXISTS customer  ( C_CUSTKEY     INTEGER NOT NULL,
    46                               C_NAME        VARCHAR(25) NOT NULL,
    47                               C_ADDRESS     VARCHAR(40) NOT NULL,
    48                               C_NATIONKEY   INTEGER NOT NULL,
    49                               C_PHONE       CHAR(15) NOT NULL,
    50                               C_ACCTBAL     DECIMAL(15,2)   NOT NULL,
    51                               C_MKTSEGMENT  CHAR(10) NOT NULL,
    52                               C_COMMENT     VARCHAR(117) NOT NULL,
    53  			     PRIMARY KEY (C_CUSTKEY),
    54  			     CONSTRAINT FOREIGN KEY CUSTOMER_FK1 (C_NATIONKEY) references nation(N_NATIONKEY));
    55  
    56  CREATE TABLE IF NOT EXISTS orders  ( O_ORDERKEY       INTEGER NOT NULL,
    57                             O_CUSTKEY        INTEGER NOT NULL,
    58                             O_ORDERSTATUS    CHAR(1) NOT NULL,
    59                             O_TOTALPRICE     DECIMAL(15,2) NOT NULL,
    60                             O_ORDERDATE      DATE NOT NULL,
    61                             O_ORDERPRIORITY  CHAR(15) NOT NULL,  
    62                             O_CLERK          CHAR(15) NOT NULL, 
    63                             O_SHIPPRIORITY   INTEGER NOT NULL,
    64                             O_COMMENT        VARCHAR(79) NOT NULL,
    65  			   PRIMARY KEY (O_ORDERKEY),
    66  			   CONSTRAINT FOREIGN KEY ORDERS_FK1 (O_CUSTKEY) references customer(C_CUSTKEY));
    67  
    68  CREATE TABLE IF NOT EXISTS lineitem ( L_ORDERKEY    INTEGER NOT NULL,
    69                               L_PARTKEY     INTEGER NOT NULL,
    70                               L_SUPPKEY     INTEGER NOT NULL,
    71                               L_LINENUMBER  INTEGER NOT NULL,
    72                               L_QUANTITY    DECIMAL(15,2) NOT NULL,
    73                               L_EXTENDEDPRICE  DECIMAL(15,2) NOT NULL,
    74                               L_DISCOUNT    DECIMAL(15,2) NOT NULL,
    75                               L_TAX         DECIMAL(15,2) NOT NULL,
    76                               L_RETURNFLAG  CHAR(1) NOT NULL,
    77                               L_LINESTATUS  CHAR(1) NOT NULL,
    78                               L_SHIFIDelATE    DATE NOT NULL,
    79                               L_COMMITDATE  DATE NOT NULL,
    80                               L_RECEIPTDATE DATE NOT NULL,
    81                               L_SHIPINSTRUCT CHAR(25) NOT NULL,
    82                               L_SHIPMODE     CHAR(10) NOT NULL,
    83                               L_COMMENT      VARCHAR(44) NOT NULL,
    84  			     PRIMARY KEY (L_ORDERKEY,L_LINENUMBER),
    85  			     CONSTRAINT FOREIGN KEY LINEITEM_FK1 (L_ORDERKEY)  references orders(O_ORDERKEY),
    86  			     CONSTRAINT FOREIGN KEY LINEITEM_FK2 (L_PARTKEY,L_SUPPKEY) references partsupp(PS_PARTKEY, PS_SUPPKEY));
    87  -- load stats.
    88  load stats 's/tpch_stats/nation.json';
    89  load stats 's/tpch_stats/region.json';
    90  load stats 's/tpch_stats/part.json';
    91  load stats 's/tpch_stats/supplier.json';
    92  load stats 's/tpch_stats/partsupp.json';
    93  load stats 's/tpch_stats/customer.json';
    94  load stats 's/tpch_stats/orders.json';
    95  load stats 's/tpch_stats/lineitem.json';
    96  
    97  set @@stochastik.milevadb_opt_agg_push_down = 0;
    98  
    99  /*
   100      Q1 Pricing Summary Report
   101      This query reports the amount of business that was billed, shipped, and returned.
   102  
   103      The Pricing Summary Report Query provides a summary pricing report for all lineitems shipped as of a given date.
   104      The date is within 60 - 120 days of the greatest ship date contained in the database. The query lists totals for
   105      extended price, discounted extended price, discounted extended price plus tax, average quantity, average extended
   106      price, and average discount. These aggregates are grouped by RETURNFLAG and LINESTATUS, and listed in
   107      ascending order of RETURNFLAG and LINESTATUS. A count of the number of lineitems in each group is
   108      included.
   109  
   110      CausetAppend enhancement: none.
   111  */
   112  explain
   113  select
   114  	l_returnflag,
   115  	l_linestatus,
   116  	sum(l_quantity) as sum_qty,
   117  	sum(l_extendedprice) as sum_base_price,
   118  	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
   119  	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
   120  	avg(l_quantity) as avg_qty,
   121  	avg(l_extendedprice) as avg_price,
   122  	avg(l_discount) as avg_disc,
   123  	count(*) as count_order
   124  from
   125  	lineitem
   126  where
   127  	l_shiFIDelate <= date_sub('1998-12-01', interval 108 day)
   128  group by
   129  	l_returnflag,
   130  	l_linestatus
   131  order by
   132  	l_returnflag,
   133  	l_linestatus;
   134  
   135  /*
   136      Q2 Minimum Cost Supplier Query
   137      This query finds which supplier should be selected to place an order for a given part in a given region.
   138  
   139      The Minimum Cost Supplier Query finds, in a given region, for each part of a certain type and size, the supplier who
   140      can supply it at minimum cost. If several suppliers in that region offer the desired part type and size at the same
   141      (minimum) cost, the query lists the parts from suppliers with the 100 highest account balances. For each supplier,
   142      the query lists the supplier's account balance, name and nation; the part's number and manufacturer; the supplier's
   143      address, phone number and comment information.
   144  
   145      CausetAppend enhancement: join reorder.
   146  */
   147  explain
   148  select
   149  	s_acctbal,
   150  	s_name,
   151  	n_name,
   152  	p_partkey,
   153  	p_mfgr,
   154  	s_address,
   155  	s_phone,
   156  	s_comment
   157  from
   158  	part,
   159  	supplier,
   160  	partsupp,
   161  	nation,
   162  	region
   163  where
   164  	p_partkey = ps_partkey
   165  	and s_suppkey = ps_suppkey
   166  	and p_size = 30
   167  	and p_type like '%STEEL'
   168  	and s_nationkey = n_nationkey
   169  	and n_regionkey = r_regionkey
   170  	and r_name = 'ASIA'
   171  	and ps_supplycost = (
   172  		select
   173  			min(ps_supplycost)
   174  		from
   175  			partsupp,
   176  			supplier,
   177  			nation,
   178  			region
   179  		where
   180  			p_partkey = ps_partkey
   181  			and s_suppkey = ps_suppkey
   182  			and s_nationkey = n_nationkey
   183  			and n_regionkey = r_regionkey
   184  			and r_name = 'ASIA'
   185  	)
   186  order by
   187  	s_acctbal desc,
   188  	n_name,
   189  	s_name,
   190  	p_partkey
   191  limit 100;
   192  
   193  /*
   194      Q3 Shipping Priority Query
   195      This query retrieves the 10 unshipped orders with the highest value.
   196  
   197      The Shipping Priority Query retrieves the shipping priority and potential revenue, defined as the sum of
   198      l_extendedprice * (1-l_discount), of the orders having the largest revenue among those that had not been shipped as
   199      of a given date. Orders are listed in decreasing order of revenue. If more than 10 unshipped orders exist, only the 10
   200      orders with the largest revenue are listed.
   201  
   202      causet enhancement: if group-by item have primary key, non-priamry key is useless.
   203  */
   204  explain
   205  select
   206  	l_orderkey,
   207  	sum(l_extendedprice * (1 - l_discount)) as revenue,
   208  	o_orderdate,
   209  	o_shippriority
   210  from
   211  	customer,
   212  	orders,
   213  	lineitem
   214  where
   215  	c_mktsegment = 'AUTOMOBILE'
   216  	and c_custkey = o_custkey
   217  	and l_orderkey = o_orderkey
   218  	and o_orderdate < '1995-03-13'
   219  	and l_shiFIDelate > '1995-03-13'
   220  group by
   221  	l_orderkey,
   222  	o_orderdate,
   223  	o_shippriority
   224  order by
   225  	revenue desc,
   226  	o_orderdate
   227  limit 10;
   228  
   229  /*
   230      Q4 Order Priority Checking Query
   231      This query determines how well the order priority system is working and gives an assessment of customer satisfaction.
   232  
   233      The Order Priority Checking Query counts the number of orders ordered in a given quarter of a given year in which
   234      at least one lineitem was received by the customer later than its committed date. The query lists the count of such
   235      orders for each order priority sorted in ascending priority order.
   236  */
   237  explain
   238  select
   239  	o_orderpriority,
   240  	count(*) as order_count
   241  from
   242  	orders
   243  where
   244  	o_orderdate >= '1995-01-01'
   245  	and o_orderdate < date_add('1995-01-01', interval '3' month)
   246  	and exists (
   247  		select
   248  			*
   249  		from
   250  			lineitem
   251  		where
   252  			l_orderkey = o_orderkey
   253  			and l_commitdate < l_receiptdate
   254  	)
   255  group by
   256  	o_orderpriority
   257  order by
   258  	o_orderpriority;
   259  
   260  /*
   261      Q5 Local Supplier Volume Query
   262      This query lists the revenue volume done through local suppliers.
   263  
   264      The Local Supplier Volume Query lists for each nation in a region the revenue volume that resulted from lineitem
   265      transactions in which the customer ordering parts and the supplier filling them were both within that nation. The
   266      query is run in order to determine whether to institute local distribution centers in a given region. The query considers
   267      only parts ordered in a given year. The query displays the nations and revenue volume in descending order by
   268      revenue. Revenue volume for all qualifying lineitems in a particular nation is defined as sum(l_extendedprice * (1 -
   269      l_discount)).
   270  
   271      CausetAppend enhancement: join reorder.
   272  */
   273  explain
   274  select
   275  	n_name,
   276  	sum(l_extendedprice * (1 - l_discount)) as revenue
   277  from
   278  	customer,
   279  	orders,
   280  	lineitem,
   281  	supplier,
   282  	nation,
   283  	region
   284  where
   285  	c_custkey = o_custkey
   286  	and l_orderkey = o_orderkey
   287  	and l_suppkey = s_suppkey
   288  	and c_nationkey = s_nationkey
   289  	and s_nationkey = n_nationkey
   290  	and n_regionkey = r_regionkey
   291  	and r_name = 'MIDBSE EAST'
   292  	and o_orderdate >= '1994-01-01'
   293  	and o_orderdate < date_add('1994-01-01', interval '1' year)
   294  group by
   295  	n_name
   296  order by
   297  	revenue desc;
   298  
   299  /*
   300      Q6 Forecasting Revenue Change Query
   301      This query quantifies the amount of revenue increase that would have resulted from eliminating certain companywide
   302      discounts in a given percentage range in a given year. Asking this type of "what if" query can be used to look
   303      for ways to increase revenues.
   304  
   305      The Forecasting Revenue Change Query considers all the lineitems shipped in a given year with discounts between
   306      DISCOUNT-0.01 and DISCOUNT+0.01. The query lists the amount by which the total revenue would have
   307      increased if these discounts had been eliminated for lineitems with l_quantity less than quantity. Note that the
   308      potential revenue increase is equal to the sum of [l_extendedprice * l_discount] for all lineitems with discounts and
   309      quantities in the qualifying range.
   310  */
   311  explain
   312  select
   313  	sum(l_extendedprice * l_discount) as revenue
   314  from
   315  	lineitem
   316  where
   317  	l_shiFIDelate >= '1994-01-01'
   318  	and l_shiFIDelate < date_add('1994-01-01', interval '1' year)
   319  	and l_discount between 0.06 - 0.01 and 0.06 + 0.01
   320  	and l_quantity < 24;
   321  
   322  /*
   323      Q7 Volume Shipping Query
   324      This query determines the value of goods shipped between certain nations to help in the re-negotiation of shipping
   325      contracts.
   326  
   327      The Volume Shipping Query finds, for two given nations, the gross discounted revenues derived from lineitems in
   328      which parts were shipped from a supplier in either nation to a customer in the other nation during 1995 and 1996.
   329      The query lists the supplier nation, the customer nation, the year, and the revenue from shipments that took place in
   330      that year. The query orders the answer by Supplier nation, Customer nation, and year (all ascending).
   331  
   332      CausetAppend enahancement: join reorder.
   333  */
   334  explain
   335  select
   336  	supp_nation,
   337  	cust_nation,
   338  	l_year,
   339  	sum(volume) as revenue
   340  from
   341  	(
   342  		select
   343  			n1.n_name as supp_nation,
   344  			n2.n_name as cust_nation,
   345  			extract(year from l_shiFIDelate) as l_year,
   346  			l_extendedprice * (1 - l_discount) as volume
   347  		from
   348  			supplier,
   349  			lineitem,
   350  			orders,
   351  			customer,
   352  			nation n1,
   353  			nation n2
   354  		where
   355  			s_suppkey = l_suppkey
   356  			and o_orderkey = l_orderkey
   357  			and c_custkey = o_custkey
   358  			and s_nationkey = n1.n_nationkey
   359  			and c_nationkey = n2.n_nationkey
   360  			and (
   361  				(n1.n_name = 'JAPAN' and n2.n_name = 'INDIA')
   362  				or (n1.n_name = 'INDIA' and n2.n_name = 'JAPAN')
   363  			)
   364  			and l_shiFIDelate between '1995-01-01' and '1996-12-31'
   365  	) as shipping
   366  group by
   367  	supp_nation,
   368  	cust_nation,
   369  	l_year
   370  order by
   371  	supp_nation,
   372  	cust_nation,
   373  	l_year;
   374  
   375  /*
   376      Q8 National Market Share Query
   377      This query determines how the market share of a given nation within a given region has changed over two years for
   378      a given part type.
   379  
   380      The market share for a given nation within a given region is defined as the fraction of the revenue, the sum of
   381      [l_extendedprice * (1-l_discount)], from the products of a specified type in that region that was supplied by suppliers
   382      from the given nation. The query determines this for the years 1995 and 1996 presented in this order.
   383  
   384      CausetAppend enhancement: join reorder.
   385  */
   386  explain
   387  select
   388  	o_year,
   389  	sum(case
   390  		when nation = 'INDIA' then volume
   391  		else 0
   392  	end) / sum(volume) as mkt_share
   393  from
   394  	(
   395  		select
   396  			extract(year from o_orderdate) as o_year,
   397  			l_extendedprice * (1 - l_discount) as volume,
   398  			n2.n_name as nation
   399  		from
   400  			part,
   401  			supplier,
   402  			lineitem,
   403  			orders,
   404  			customer,
   405  			nation n1,
   406  			nation n2,
   407  			region
   408  		where
   409  			p_partkey = l_partkey
   410  			and s_suppkey = l_suppkey
   411  			and l_orderkey = o_orderkey
   412  			and o_custkey = c_custkey
   413  			and c_nationkey = n1.n_nationkey
   414  			and n1.n_regionkey = r_regionkey
   415  			and r_name = 'ASIA'
   416  			and s_nationkey = n2.n_nationkey
   417  			and o_orderdate between '1995-01-01' and '1996-12-31'
   418  			and p_type = 'SMALL PLATED COPPER'
   419  	) as all_nations
   420  group by
   421  	o_year
   422  order by
   423  	o_year;
   424  
   425  /*
   426      Q9 Product Type Profit Measure Query
   427      This query determines how much profit is made on a given line of parts, broken out by supplier nation and year.
   428  
   429      The Product Type Profit Measure Query finds, for each nation and each year, the profit for all parts ordered in that
   430      year that contain a specified substring in their names and that were filled by a supplier in that nation. The profit is
   431      defined as the sum of [(l_extendedprice*(1-l_discount)) - (ps_supplycost * l_quantity)] for all lineitems describing
   432      parts in the specified line. The query lists the nations in ascending alphabetical order and, for each nation, the year
   433      and profit in descending order by year (most recent first).
   434  
   435      CausetAppend enhancement: join reorder.
   436  */
   437  explain
   438  select
   439  	nation,
   440  	o_year,
   441  	sum(amount) as sum_profit
   442  from
   443  	(
   444  		select
   445  			n_name as nation,
   446  			extract(year from o_orderdate) as o_year,
   447  			l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
   448  		from
   449  			part,
   450  			supplier,
   451  			lineitem,
   452  			partsupp,
   453  			orders,
   454  			nation
   455  		where
   456  			s_suppkey = l_suppkey
   457  			and ps_suppkey = l_suppkey
   458  			and ps_partkey = l_partkey
   459  			and p_partkey = l_partkey
   460  			and o_orderkey = l_orderkey
   461  			and s_nationkey = n_nationkey
   462  			and p_name like '%dim%'
   463  	) as profit
   464  group by
   465  	nation,
   466  	o_year
   467  order by
   468  	nation,
   469  	o_year desc;
   470  
   471  /*
   472      Q10 Returned Item Reporting Query
   473      The query identifies customers who might be having problems with the parts that are shipped to them.
   474  
   475      The Returned Item Reporting Query finds the top 20 customers, in terms of their effect on lost revenue for a given
   476      quarter, who have returned parts. The query considers only parts that were ordered in the specified quarter. The
   477      query lists the customer's name, address, nation, phone number, account balance, comment information and revenue
   478      lost. The customers are listed in descending order of lost revenue. Revenue lost is defined as
   479      sum(l_extendedprice*(1-l_discount)) for all qualifying lineitems.
   480  
   481      CausetAppend enhancement: join reorder, if group-by item have primary key, non-priamry key is useless.
   482  */
   483  explain
   484  select
   485  	c_custkey,
   486  	c_name,
   487  	sum(l_extendedprice * (1 - l_discount)) as revenue,
   488  	c_acctbal,
   489  	n_name,
   490  	c_address,
   491  	c_phone,
   492  	c_comment
   493  from
   494  	customer,
   495  	orders,
   496  	lineitem,
   497  	nation
   498  where
   499  	c_custkey = o_custkey
   500  	and l_orderkey = o_orderkey
   501  	and o_orderdate >= '1993-08-01'
   502  	and o_orderdate < date_add('1993-08-01', interval '3' month)
   503  	and l_returnflag = 'R'
   504  	and c_nationkey = n_nationkey
   505  group by
   506  	c_custkey,
   507  	c_name,
   508  	c_acctbal,
   509  	c_phone,
   510  	n_name,
   511  	c_address,
   512  	c_comment
   513  order by
   514  	revenue desc
   515  limit 20;
   516  
   517  /*
   518      Q11 Important Stock Identification Query
   519      This query finds the most important subset of suppliers' stock in a given nation.
   520  
   521      The Important Stock Identification Query finds, from scanning the available stock of suppliers in a given nation, all
   522      the parts that represent a significant percentage of the total value of all available parts. The query displays the part
   523      number and the value of those parts in descending order of value.
   524  */
   525  explain
   526  select
   527  	ps_partkey,
   528  	sum(ps_supplycost * ps_availqty) as value
   529  from
   530  	partsupp,
   531  	supplier,
   532  	nation
   533  where
   534  	ps_suppkey = s_suppkey
   535  	and s_nationkey = n_nationkey
   536  	and n_name = 'MOZAMBIQUE'
   537  group by
   538  	ps_partkey having
   539  		sum(ps_supplycost * ps_availqty) > (
   540  			select
   541  				sum(ps_supplycost * ps_availqty) * 0.0001000000
   542  			from
   543  				partsupp,
   544  				supplier,
   545  				nation
   546  			where
   547  				ps_suppkey = s_suppkey
   548  				and s_nationkey = n_nationkey
   549  				and n_name = 'MOZAMBIQUE'
   550  		)
   551  order by
   552  	value desc;
   553  
   554  /*
   555      Q12 Shipping Modes and Order Priority Query
   556      This query determines whether selecting less expensive modes of shipping is negatively affecting the critical-priority
   557      orders by causing more parts to be received by customers after the committed date.
   558  
   559      The Shipping Modes and Order Priority Query counts, by ship mode, for lineitems actually received by customers in
   560      a given year, the number of lineitems belonging to orders for which the l_receiptdate exceeds the l_commitdate for
   561      two different specified ship modes. Only lineitems that were actually shipped before the l_commitdate are considered.
   562      The late lineitems are partitioned into two groups, those with priority URGENT or HIGH, and those with a
   563      priority other than URGENT or HIGH.
   564  */
   565  explain
   566  select
   567  	l_shipmode,
   568  	sum(case
   569  		when o_orderpriority = '1-URGENT'
   570  			or o_orderpriority = '2-HIGH'
   571  			then 1
   572  		else 0
   573  	end) as high_line_count,
   574  	sum(case
   575  		when o_orderpriority <> '1-URGENT'
   576  			and o_orderpriority <> '2-HIGH'
   577  			then 1
   578  		else 0
   579  	end) as low_line_count
   580  from
   581  	orders,
   582  	lineitem
   583  where
   584  	o_orderkey = l_orderkey
   585  	and l_shipmode in ('RAIL', 'FOB')
   586  	and l_commitdate < l_receiptdate
   587  	and l_shiFIDelate < l_commitdate
   588  	and l_receiptdate >= '1997-01-01'
   589  	and l_receiptdate < date_add('1997-01-01', interval '1' year)
   590  group by
   591  	l_shipmode
   592  order by
   593  	l_shipmode;
   594  
   595  /*
   596      Q13 Customer Distribution Query
   597      This query seeks relationships between customers and the size of their orders.
   598  
   599      This query determines the distribution of customers by the number of orders they have made, including customers
   600      who have no record of orders, past or present. It counts and reports how many customers have no orders, how many
   601      have 1, 2, 3, etc. A check is made to ensure that the orders counted do not fall into one of several special categories
   602      of orders. Special categories are identified in the order comment column by looking for a particular pattern.
   603  */
   604  explain
   605  select
   606  	c_count,
   607  	count(*) as custdist
   608  from
   609  	(
   610  		select
   611  			c_custkey,
   612  			count(o_orderkey) as c_count
   613  		from
   614  			customer left outer join orders on
   615  				c_custkey = o_custkey
   616  				and o_comment not like '%pending%deposits%'
   617  		group by
   618  			c_custkey
   619  	) c_orders
   620  group by
   621  	c_count
   622  order by
   623  	custdist desc,
   624  	c_count desc;
   625  
   626  /*
   627      Q14 Promotion Effect Query
   628      This query monitors the market response to a promotion such as TV advertisements or a special campaign.
   629  
   630      The Promotion Effect Query determines what percentage of the revenue in a given year and month was derived from
   631      promotional parts. The query considers only parts actually shipped in that month and gives the percentage. Revenue
   632      is defined as (l_extendedprice * (1-l_discount)).
   633  */
   634  explain
   635  select
   636  	100.00 * sum(case
   637  		when p_type like 'PROMO%'
   638  			then l_extendedprice * (1 - l_discount)
   639  		else 0
   640  	end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
   641  from
   642  	lineitem,
   643  	part
   644  where
   645  	l_partkey = p_partkey
   646  	and l_shiFIDelate >= '1996-12-01'
   647  	and l_shiFIDelate < date_add('1996-12-01', interval '1' month);
   648  
   649  /*
   650      Q15 Top Supplier Query
   651      This query determines the top supplier so it can be rewarded, given more business, or identified for special recognition.
   652  
   653      The Top Supplier Query finds the supplier who contributed the most to the overall revenue for parts shipped during
   654      a given quarter of a given year. In case of a tie, the query lists all suppliers whose contribution was equal to the
   655      maximum, presented in supplier number order.
   656  
   657      CausetAppend enhancement: support view.
   658  
   659  create view revenue0 (supplier_no, total_revenue) as
   660  	select
   661  		l_suppkey,
   662  		sum(l_extendedprice * (1 - l_discount))
   663  	from
   664  		lineitem
   665  	where
   666  		l_shiFIDelate >= '1997-07-01'
   667  		and l_shiFIDelate < date_add('1997-07-01', interval '3' month)
   668  	group by
   669  		l_suppkey
   670  
   671  
   672  select
   673  	s_suppkey,
   674  	s_name,
   675  	s_address,
   676  	s_phone,
   677  	total_revenue
   678  from
   679  	supplier,
   680  	revenue0
   681  where
   682  	s_suppkey = supplier_no
   683  	and total_revenue = (
   684  		select
   685  			max(total_revenue)
   686  		from
   687  			revenue0
   688  	)
   689  order by
   690  	s_suppkey
   691  
   692  drop view revenue0
   693  */
   694  
   695  /*
   696      Q16 Parts/Supplier Relationship Query
   697      This query finds out how many suppliers can supply parts with given attributes. It might be used, for example, to
   698      determine whether there is a sufficient number of suppliers for heavily ordered parts.
   699  
   700      The Parts/Supplier Relationship Query counts the number of suppliers who can supply parts that satisfy a particular
   701      customer's requirements. The customer is interested in parts of eight different sizes as long as they are not of a given
   702      type, not of a given brand, and not from a supplier who has had complaints registered at the Better Business Bureau.
   703      Results must be presented in descending count and ascending brand, type, and size.
   704  */
   705  explain
   706  select
   707  	p_brand,
   708  	p_type,
   709  	p_size,
   710  	count(distinct ps_suppkey) as supplier_cnt
   711  from
   712  	partsupp,
   713  	part
   714  where
   715  	p_partkey = ps_partkey
   716  	and p_brand <> 'Brand#34'
   717  	and p_type not like 'LARGE BRUSHED%'
   718  	and p_size in (48, 19, 12, 4, 41, 7, 21, 39)
   719  	and ps_suppkey not in (
   720  		select
   721  			s_suppkey
   722  		from
   723  			supplier
   724  		where
   725  			s_comment like '%Customer%Complaints%'
   726  	)
   727  group by
   728  	p_brand,
   729  	p_type,
   730  	p_size
   731  order by
   732  	supplier_cnt desc,
   733  	p_brand,
   734  	p_type,
   735  	p_size;
   736  
   737  /*
   738      Q17 Small-Quantity-Order Revenue Query
   739      This query determines how much average yearly revenue would be lost if orders were no longer filled for small
   740      quantities of certain parts. This may reduce overhead expenses by concentrating sales on larger shipments.
   741  
   742      The Small-Quantity-Order Revenue Query considers parts of a given brand and with a given container type and
   743      determines the average lineitem quantity of such parts ordered for all orders (past and pending) in the 7-year database.
   744      What would be the average yearly gross (undiscounted) loss in revenue if orders for these parts with a quantity
   745      of less than 20% of this average were no longer taken?
   746  
   747      CausetAppend enahancement: aggregation pull up through join.
   748  */
   749  explain
   750  select
   751  	sum(l_extendedprice) / 7.0 as avg_yearly
   752  from
   753  	lineitem,
   754  	part
   755  where
   756  	p_partkey = l_partkey
   757  	and p_brand = 'Brand#44'
   758  	and p_container = 'WRAP PKG'
   759  	and l_quantity < (
   760  		select
   761  			0.2 * avg(l_quantity)
   762  		from
   763  			lineitem
   764  		where
   765  			l_partkey = p_partkey
   766  	);
   767  
   768  /*
   769      Q18 Large Volume Customer Query
   770      The Large Volume Customer Query ranks customers based on their having placed a large quantity order. Large
   771      quantity orders are defined as those orders whose total quantity is above a certain level.
   772  
   773      The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders.
   774      The query lists the customer name, customer key, the order key, date and total price and the quantity for the order.
   775  
   776      CausetAppend enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows.
   777  */
   778  explain
   779  select
   780  	c_name,
   781  	c_custkey,
   782  	o_orderkey,
   783  	o_orderdate,
   784  	o_totalprice,
   785  	sum(l_quantity)
   786  from
   787  	customer,
   788  	orders,
   789  	lineitem
   790  where
   791  	o_orderkey in (
   792  		select
   793  			l_orderkey
   794  		from
   795  			lineitem
   796  		group by
   797  			l_orderkey having
   798  				sum(l_quantity) > 314
   799  	)
   800  	and c_custkey = o_custkey
   801  	and o_orderkey = l_orderkey
   802  group by
   803  	c_name,
   804  	c_custkey,
   805  	o_orderkey,
   806  	o_orderdate,
   807  	o_totalprice
   808  order by
   809  	o_totalprice desc,
   810  	o_orderdate
   811  limit 100;
   812  
   813  /*
   814      Q19 Discounted Revenue Query
   815      The Discounted Revenue Query reports the gross discounted revenue attributed to the sale of selected parts handled
   816      in a particular manner. This query is an example of code such as might be produced programmatically by a data
   817      mining tool.
   818  
   819      The Discounted Revenue query finds the gross discounted revenue for all orders for three different types of parts
   820      that were shipped by air and delivered in person. Parts are selected based on the combination of specific brands, a
   821      list of containers, and a range of sizes.
   822  */
   823  explain
   824  select
   825  	sum(l_extendedprice* (1 - l_discount)) as revenue
   826  from
   827  	lineitem,
   828  	part
   829  where
   830  	(
   831  		p_partkey = l_partkey
   832  		and p_brand = 'Brand#52'
   833  		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
   834  		and l_quantity >= 4 and l_quantity <= 4 + 10
   835  		and p_size between 1 and 5
   836  		and l_shipmode in ('AIR', 'AIR REG')
   837  		and l_shipinstruct = 'DELIVER IN PERSON'
   838  	)
   839  	or
   840  	(
   841  		p_partkey = l_partkey
   842  		and p_brand = 'Brand#11'
   843  		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
   844  		and l_quantity >= 18 and l_quantity <= 18 + 10
   845  		and p_size between 1 and 10
   846  		and l_shipmode in ('AIR', 'AIR REG')
   847  		and l_shipinstruct = 'DELIVER IN PERSON'
   848  	)
   849  	or
   850  	(
   851  		p_partkey = l_partkey
   852  		and p_brand = 'Brand#51'
   853  		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
   854  		and l_quantity >= 29 and l_quantity <= 29 + 10
   855  		and p_size between 1 and 15
   856  		and l_shipmode in ('AIR', 'AIR REG')
   857  		and l_shipinstruct = 'DELIVER IN PERSON'
   858  	);
   859  
   860  /*
   861      Q20 Potential Part Promotion Query
   862      The Potential Part Promotion Query identifies suppliers in a particular nation having selected parts that may be candidates
   863      for a promotional offer.
   864  
   865      The Potential Part Promotion query identifies suppliers who have an excess of a given part available; an excess is
   866      defined to be more than 50% of the parts like the given part that the supplier shipped in a given year for a given
   867      nation. Only parts whose names share a certain naming convention are considered.
   868  */
   869  explain
   870  select
   871  	s_name,
   872  	s_address
   873  from
   874  	supplier,
   875  	nation
   876  where
   877  	s_suppkey in (
   878  		select
   879  			ps_suppkey
   880  		from
   881  			partsupp
   882  		where
   883  			ps_partkey in (
   884  				select
   885  					p_partkey
   886  				from
   887  					part
   888  				where
   889  					p_name like 'green%'
   890  			)
   891  			and ps_availqty > (
   892  				select
   893  					0.5 * sum(l_quantity)
   894  				from
   895  					lineitem
   896  				where
   897  					l_partkey = ps_partkey
   898  					and l_suppkey = ps_suppkey
   899  					and l_shiFIDelate >= '1993-01-01'
   900  					and l_shiFIDelate < date_add('1993-01-01', interval '1' year)
   901  			)
   902  	)
   903  	and s_nationkey = n_nationkey
   904  	and n_name = 'ALGERIA'
   905  order by
   906  	s_name;
   907  
   908  /*
   909      Q21 Suppliers Who Kept Orders Waiting Query
   910      This query identifies certain suppliers who were not able to ship required parts in a timely manner.
   911  
   912      The Suppliers Who Kept Orders Waiting query identifies suppliers, for a given nation, whose product was part of a
   913      multi-supplier order (with current status of 'F') where they were the only supplier who failed to meet the committed
   914      delivery date.
   915  */
   916  explain
   917  select
   918  	s_name,
   919  	count(*) as numwait
   920  from
   921  	supplier,
   922  	lineitem l1,
   923  	orders,
   924  	nation
   925  where
   926  	s_suppkey = l1.l_suppkey
   927  	and o_orderkey = l1.l_orderkey
   928  	and o_orderstatus = 'F'
   929  	and l1.l_receiptdate > l1.l_commitdate
   930  	and exists (
   931  		select
   932  			*
   933  		from
   934  			lineitem l2
   935  		where
   936  			l2.l_orderkey = l1.l_orderkey
   937  			and l2.l_suppkey <> l1.l_suppkey
   938  	)
   939  	and not exists (
   940  		select
   941  			*
   942  		from
   943  			lineitem l3
   944  		where
   945  			l3.l_orderkey = l1.l_orderkey
   946  			and l3.l_suppkey <> l1.l_suppkey
   947  			and l3.l_receiptdate > l3.l_commitdate
   948  	)
   949  	and s_nationkey = n_nationkey
   950  	and n_name = 'EGYPT'
   951  group by
   952  	s_name
   953  order by
   954  	numwait desc,
   955  	s_name
   956  limit 100;
   957  
   958  /*
   959      Q22 Global Sales Opportunity Query
   960      The Global Sales Opportunity Query identifies geographies where there are customers who may be likely to make a
   961      purchase.
   962  
   963      This query counts how many customers within a specific range of country codes have not placed orders for 7 years
   964      but who have a greater than average “positive” account balance. It also reflects the magnitude of that balance.
   965      Country code is defined as the first two characters of c_phone.
   966  */
   967  explain
   968  select
   969  	cntrycode,
   970  	count(*) as numcust,
   971  	sum(c_acctbal) as totacctbal
   972  from
   973  	(
   974  		select
   975  			substring(c_phone from 1 for 2) as cntrycode,
   976  			c_acctbal
   977  		from
   978  			customer
   979  		where
   980  			substring(c_phone from 1 for 2) in
   981  				('20', '40', '22', '30', '39', '42', '21')
   982  			and c_acctbal > (
   983  				select
   984  					avg(c_acctbal)
   985  				from
   986  					customer
   987  				where
   988  					c_acctbal > 0.00
   989  					and substring(c_phone from 1 for 2) in
   990  						('20', '40', '22', '30', '39', '42', '21')
   991  			)
   992  			and not exists (
   993  				select
   994  					*
   995  				from
   996  					orders
   997  				where
   998  					o_custkey = c_custkey
   999  			)
  1000  	) as custsale
  1001  group by
  1002  	cntrycode
  1003  order by
  1004  	cntrycode;