storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/mint/run/core/s3select/sql_ops.py (about) 1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # MinIO Python Library for Amazon S3 Compatible Cloud Storage, 4 # (C) 2020 MinIO, Inc. 5 # 6 # Licensed under the Apache License, Version 2.0 (the "License"); 7 # you may not use this file except in compliance with the License. 8 # You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 18 import io 19 from datetime import datetime 20 21 from minio.select import (FILE_HEADER_INFO_NONE, JSON_TYPE_DOCUMENT, 22 QUOTE_FIELDS_ASNEEDED, CSVInputSerialization, 23 CSVOutputSerialization, JSONInputSerialization, 24 JSONOutputSerialization, SelectRequest) 25 26 from utils import generate_bucket_name, generate_object_name 27 28 29 def test_sql_expressions_custom_input_output(client, input_bytes, sql_input, 30 sql_output, tests, log_output): 31 bucket_name = generate_bucket_name() 32 object_name = generate_object_name() 33 34 log_output.args['total_tests'] = 0 35 log_output.args['total_success'] = 0 36 37 client.make_bucket(bucket_name) 38 try: 39 content = io.BytesIO(bytes(input_bytes, 'utf-8')) 40 client.put_object(bucket_name, object_name, content, len(input_bytes)) 41 42 for idx, (test_name, select_expression, expected_output) in enumerate(tests): 43 if select_expression == '': 44 continue 45 try: 46 log_output.args['total_tests'] += 1 47 sreq = SelectRequest( 48 select_expression, 49 sql_input, 50 sql_output, 51 request_progress=False 52 ) 53 54 data = client.select_object_content( 55 bucket_name, object_name, sreq) 56 57 # Get the records 58 records = io.BytesIO() 59 for d in data.stream(10*1024): 60 records.write(d) 61 got_output = records.getvalue() 62 63 if got_output != expected_output: 64 if type(expected_output) == datetime: 65 # Attempt to parse the date which will throw an exception for any issue 66 datetime.strptime(got_output.decode( 67 "utf-8").strip(), '%Y-%m-%dT%H:%M:%S.%f%z') 68 else: 69 raise ValueError('Test {}: data mismatch. Expected : {}. Received: {}.'.format( 70 idx+1, expected_output, got_output)) 71 72 log_output.args['total_success'] += 1 73 except Exception as err: 74 continue # TODO, raise instead 75 # raise Exception(err) 76 finally: 77 client.remove_object(bucket_name, object_name) 78 client.remove_bucket(bucket_name) 79 80 81 def test_sql_expressions(client, input_json_bytes, tests, log_output): 82 input_serialization = JSONInputSerialization( 83 compression_type="NONE", 84 json_type=JSON_TYPE_DOCUMENT, 85 ) 86 87 output_serialization = CSVOutputSerialization( 88 quote_fields=QUOTE_FIELDS_ASNEEDED) 89 90 test_sql_expressions_custom_input_output(client, input_json_bytes, 91 input_serialization, output_serialization, tests, log_output) 92 93 94 def test_sql_operators(client, log_output): 95 96 json_testfile = """{"id": 1, "name": "John", "age": 3} 97 {"id": 2, "name": "Elliot", "age": 4} 98 {"id": 3, "name": "Yves", "age": 5} 99 {"id": 4, "name": null, "age": 0} 100 """ 101 102 tests = [ 103 # Logical operators 104 ("AND", "select * from S3Object s where s.id = 1 AND s.name = 'John'", b'1,John,3\n'), 105 ("NOT", "select * from S3Object s where NOT s.id = 1", 106 b'2,Elliot,4\n3,Yves,5\n4,,0\n'), 107 ("OR", "select * from S3Object s where s.id = 1 OR s.id = 3", 108 b'1,John,3\n3,Yves,5\n'), 109 # Comparison Operators 110 ("<", "select * from S3Object s where s.age < 4", b'1,John,3\n4,,0\n'), 111 (">", "select * from S3Object s where s.age > 4", b'3,Yves,5\n'), 112 ("<=", "select * from S3Object s where s.age <= 4", 113 b'1,John,3\n2,Elliot,4\n4,,0\n'), 114 (">=", "select * from S3Object s where s.age >= 4", b'2,Elliot,4\n3,Yves,5\n'), 115 ("=", "select * from S3Object s where s.age = 4", b'2,Elliot,4\n'), 116 ("<>", "select * from S3Object s where s.age <> 4", 117 b'1,John,3\n3,Yves,5\n4,,0\n'), 118 ("!=", "select * from S3Object s where s.age != 4", 119 b'1,John,3\n3,Yves,5\n4,,0\n'), 120 ("BETWEEN", "select * from S3Object s where s.age BETWEEN 4 AND 5", 121 b'2,Elliot,4\n3,Yves,5\n'), 122 ("IN", "select * from S3Object s where s.age IN (3,5)", b'1,John,3\n3,Yves,5\n'), 123 # Pattern Matching Operators 124 ("LIKE_", "select * from S3Object s where s.name LIKE '_ves'", b'3,Yves,5\n'), 125 ("LIKE%", "select * from S3Object s where s.name LIKE 'Ell%t'", b'2,Elliot,4\n'), 126 # Unitary Operators 127 ("NULL", "select * from S3Object s where s.name IS NULL", b'4,,0\n'), 128 ("NOT_NULL", "select * from S3Object s where s.age IS NOT NULL", 129 b'1,John,3\n2,Elliot,4\n3,Yves,5\n4,,0\n'), 130 # Math Operators 131 ("+", "select * from S3Object s where s.age = 1+3 ", b'2,Elliot,4\n'), 132 ("-", "select * from S3Object s where s.age = 5-1 ", b'2,Elliot,4\n'), 133 ("*", "select * from S3Object s where s.age = 2*2 ", b'2,Elliot,4\n'), 134 ("%", "select * from S3Object s where s.age = 10%6 ", b'2,Elliot,4\n'), 135 ] 136 137 try: 138 test_sql_expressions(client, json_testfile, tests, log_output) 139 except Exception as select_err: 140 raise select_err 141 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 142 # pass 143 144 # Test passes 145 print(log_output.json_report()) 146 147 148 def test_sql_operators_precedence(client, log_output): 149 150 json_testfile = """{"id": 1, "name": "Eric"}""" 151 152 tests = [ 153 ("-_1", "select -3*3 from S3Object", b'-9\n'), 154 ("*", "select 10-3*2 from S3Object", b'4\n'), 155 ("/", "select 13-10/5 from S3Object", b'11\n'), 156 ("%", "select 13-10%5 from S3Object", b'13\n'), 157 ("+", "select 1+1*3 from S3Object", b'4\n'), 158 ("-_2", "select 1-1*3 from S3Object", b'-2\n'), 159 ("=", "select * from S3Object as s where s.id = 13-12", b'1,Eric\n'), 160 ("<>", "select * from S3Object as s where s.id <> 1-1", b'1,Eric\n'), 161 ("NOT", "select * from S3Object where false OR NOT false", b'1,Eric\n'), 162 ("AND", "select * from S3Object where true AND true OR false ", b'1,Eric\n'), 163 ("OR", "select * from S3Object where false OR NOT false", b'1,Eric\n'), 164 ("IN", "select * from S3Object as s where s.id <> -1 AND s.id IN (1,2,3)", b'1,Eric\n'), 165 ("BETWEEN", "select * from S3Object as s where s.id <> -1 AND s.id BETWEEN -1 AND 3", b'1,Eric\n'), 166 ("LIKE", "select * from S3Object as s where s.id <> -1 AND s.name LIKE 'E%'", b'1,Eric\n'), 167 ] 168 169 try: 170 test_sql_expressions(client, json_testfile, tests, log_output) 171 except Exception as select_err: 172 raise select_err 173 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 174 # pass 175 176 # Test passes 177 print(log_output.json_report()) 178 179 180 def test_sql_functions_agg_cond_conv(client, log_output): 181 182 json_testfile = """{"id": 1, "name": "John", "age": 3} 183 {"id": 2, "name": "Elliot", "age": 4} 184 {"id": 3, "name": "Yves", "age": 5} 185 {"id": 4, "name": "Christine", "age": null} 186 {"id": 5, "name": "Eric", "age": 0} 187 """ 188 tests = [ 189 # Aggregate functions 190 ("COUNT", "select count(*) from S3Object s", b'5\n'), 191 ("AVG", "select avg(s.age) from S3Object s", b'3\n'), 192 ("MAX", "select max(s.age) from S3Object s", b'5\n'), 193 ("MIN", "select min(s.age) from S3Object s", b'0\n'), 194 ("SUM", "select sum(s.age) from S3Object s", b'12\n'), 195 # Conditional functions 196 ("COALESCE", "SELECT COALESCE(s.age, 99) FROM S3Object s", b'3\n4\n5\n99\n0\n'), 197 ("NULLIF", "SELECT NULLIF(s.age, 0) FROM S3Object s", b'3\n4\n5\n\n\n'), 198 # Conversion functions 199 ("CAST", "SELECT CAST(s.age AS FLOAT) FROM S3Object s", 200 b'3.0\n4.0\n5.0\n\n0.0\n'), 201 202 ] 203 204 try: 205 test_sql_expressions(client, json_testfile, tests, log_output) 206 except Exception as select_err: 207 raise select_err 208 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 209 # pass 210 211 # Test passes 212 print(log_output.json_report()) 213 214 215 def test_sql_functions_date(client, log_output): 216 217 json_testfile = """ 218 {"id": 1, "name": "John", "datez": "2017-01-02T03:04:05.006+07:30"} 219 """ 220 221 tests = [ 222 # DATE_ADD 223 ("DATE_ADD_1", "select DATE_ADD(year, 5, TO_TIMESTAMP(s.datez)) from S3Object as s", 224 b'2022-01-02T03:04:05.006+07:30\n'), 225 ("DATE_ADD_2", "select DATE_ADD(month, 1, TO_TIMESTAMP(s.datez)) from S3Object as s", 226 b'2017-02-02T03:04:05.006+07:30\n'), 227 ("DATE_ADD_3", "select DATE_ADD(day, -1, TO_TIMESTAMP(s.datez)) from S3Object as s", 228 b'2017-01-01T03:04:05.006+07:30\n'), 229 ("DATE_ADD_4", "select DATE_ADD(hour, 1, TO_TIMESTAMP(s.datez)) from S3Object as s", 230 b'2017-01-02T04:04:05.006+07:30\n'), 231 ("DATE_ADD_5", "select DATE_ADD(minute, 5, TO_TIMESTAMP(s.datez)) from S3Object as s", 232 b'2017-01-02T03:09:05.006+07:30\n'), 233 ("DATE_ADD_6", "select DATE_ADD(second, 5, TO_TIMESTAMP(s.datez)) from S3Object as s", 234 b'2017-01-02T03:04:10.006+07:30\n'), 235 # DATE_DIFF 236 ("DATE_DIFF_1", "select DATE_DIFF(year, TO_TIMESTAMP(s.datez), TO_TIMESTAMP('2011-01-01T')) from S3Object as s", b'-6\n'), 237 ("DATE_DIFF_2", "select DATE_DIFF(month, TO_TIMESTAMP(s.datez), TO_TIMESTAMP('2011T')) from S3Object as s", b'-72\n'), 238 ("DATE_DIFF_3", "select DATE_DIFF(day, TO_TIMESTAMP(s.datez), TO_TIMESTAMP('2010-01-02T')) from S3Object as s", b'-2556\n'), 239 # EXTRACT 240 ("EXTRACT_1", "select EXTRACT(year FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017\n'), 241 ("EXTRACT_2", "select EXTRACT(month FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'1\n'), 242 ("EXTRACT_3", "select EXTRACT(hour FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'3\n'), 243 ("EXTRACT_4", "select EXTRACT(minute FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'4\n'), 244 ("EXTRACT_5", "select EXTRACT(timezone_hour FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'7\n'), 245 ("EXTRACT_6", "select EXTRACT(timezone_minute FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'30\n'), 246 # TO_STRING 247 ("TO_STRING_1", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MMMM d, y') from S3Object as s", 248 b'"January 2, 2017"\n'), 249 ("TO_STRING_2", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MMM d, yyyy') from S3Object as s", b'"Jan 2, 2017"\n'), 250 ("TO_STRING_3", "select TO_STRING(TO_TIMESTAMP(s.datez), 'M-d-yy') from S3Object as s", b'1-2-17\n'), 251 ("TO_STRING_4", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MM-d-y') from S3Object as s", b'01-2-2017\n'), 252 ("TO_STRING_5", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MMMM d, y h:m a') from S3Object as s", 253 b'"January 2, 2017 3:4 AM"\n'), 254 ("TO_STRING_6", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssX') from S3Object as s", 255 b'2017-01-02T3:4:05+0730\n'), 256 ("TO_STRING_7", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssX') from S3Object as s", 257 b'2017-01-02T3:4:05+0730\n'), 258 ("TO_STRING_8", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssXXXX') from S3Object as s", 259 b'2017-01-02T3:4:05+0730\n'), 260 ("TO_STRING_9", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssXXXXX') from S3Object as s", 261 b'2017-01-02T3:4:05+07:30\n'), 262 ("TO_TIMESTAMP", "select TO_TIMESTAMP(s.datez) from S3Object as s", 263 b'2017-01-02T03:04:05.006+07:30\n'), 264 ("UTCNOW", "select UTCNOW() from S3Object", datetime(1, 1, 1)), 265 266 ] 267 268 try: 269 test_sql_expressions(client, json_testfile, tests, log_output) 270 except Exception as select_err: 271 raise select_err 272 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 273 # pass 274 275 # Test passes 276 print(log_output.json_report()) 277 278 279 def test_sql_functions_string(client, log_output): 280 281 json_testfile = """ 282 {"id": 1, "name": "John"} 283 {"id": 2, "name": " \tfoobar\t "} 284 {"id": 3, "name": "1112211foobar22211122"} 285 """ 286 287 tests = [ 288 # CHAR_LENGTH 289 ("CHAR_LENGTH", "select CHAR_LENGTH(s.name) from S3Object as s", b'4\n24\n21\n'), 290 ("CHARACTER_LENGTH", 291 "select CHARACTER_LENGTH(s.name) from S3Object as s", b'4\n24\n21\n'), 292 # LOWER 293 ("LOWER", "select LOWER(s.name) from S3Object as s where s.id= 1", b'john\n'), 294 # SUBSTRING 295 ("SUBSTRING_1", "select SUBSTRING(s.name FROM 2) from S3Object as s where s.id = 1", b'ohn\n'), 296 ("SUBSTRING_2", "select SUBSTRING(s.name FROM 2 FOR 2) from S3Object as s where s.id = 1", b'oh\n'), 297 ("SUBSTRING_3", "select SUBSTRING(s.name FROM -1 FOR 2) from S3Object as s where s.id = 1", b'\n'), 298 # TRIM 299 ("TRIM_1", "select TRIM(s.name) from S3Object as s where s.id = 2", b'\tfoobar\t\n'), 300 ("TRIM_2", "select TRIM(LEADING FROM s.name) from S3Object as s where s.id = 2", 301 b'\tfoobar\t \n'), 302 ("TRIM_3", "select TRIM(TRAILING FROM s.name) from S3Object as s where s.id = 2", 303 b' \tfoobar\t\n'), 304 ("TRIM_4", "select TRIM(BOTH FROM s.name) from S3Object as s where s.id = 2", b'\tfoobar\t\n'), 305 ("TRIM_5", "select TRIM(BOTH '12' FROM s.name) from S3Object as s where s.id = 3", b'foobar\n'), 306 # UPPER 307 ("UPPER", "select UPPER(s.name) from S3Object as s where s.id= 1", b'JOHN\n'), 308 ] 309 310 try: 311 test_sql_expressions(client, json_testfile, tests, log_output) 312 except Exception as select_err: 313 raise select_err 314 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 315 # pass 316 317 # Test passes 318 print(log_output.json_report()) 319 320 321 def test_sql_datatypes(client, log_output): 322 json_testfile = """ 323 {"name": "John"} 324 """ 325 tests = [ 326 ("bool", "select CAST('true' AS BOOL) from S3Object", b'true\n'), 327 ("int", "select CAST('13' AS INT) from S3Object", b'13\n'), 328 ("integer", "select CAST('13' AS INTEGER) from S3Object", b'13\n'), 329 ("string", "select CAST(true AS STRING) from S3Object", b'true\n'), 330 ("float", "select CAST('13.3' AS FLOAT) from S3Object", b'13.3\n'), 331 ("decimal", "select CAST('14.3' AS FLOAT) from S3Object", b'14.3\n'), 332 ("numeric", "select CAST('14.3' AS FLOAT) from S3Object", b'14.3\n'), 333 ("timestamp", "select CAST('2007-04-05T14:30Z' AS TIMESTAMP) from S3Object", 334 b'2007-04-05T14:30Z\n'), 335 ] 336 337 try: 338 test_sql_expressions(client, json_testfile, tests, log_output) 339 except Exception as select_err: 340 raise select_err 341 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 342 # pass 343 344 # Test passes 345 print(log_output.json_report()) 346 347 348 def test_sql_select(client, log_output): 349 350 json_testfile = """{"id": 1, "created": "June 27", "modified": "July 6" } 351 {"id": 2, "Created": "June 28", "Modified": "July 7", "Cast": "Random Date" }""" 352 tests = [ 353 ("select_1", "select * from S3Object", 354 b'1,June 27,July 6\n2,June 28,July 7,Random Date\n'), 355 ("select_2", "select * from S3Object s", 356 b'1,June 27,July 6\n2,June 28,July 7,Random Date\n'), 357 ("select_3", "select * from S3Object as s", 358 b'1,June 27,July 6\n2,June 28,July 7,Random Date\n'), 359 ("select_4", "select s.line from S3Object as s", b'\n\n'), 360 ("select_5", 'select s."Created" from S3Object as s', b'\nJune 28\n'), 361 ("select_5", 'select s."Cast" from S3Object as s', b'\nRandom Date\n'), 362 ("where", 'select s.created from S3Object as s', b'June 27\nJune 28\n'), 363 ("limit", 'select * from S3Object as s LIMIT 1', b'1,June 27,July 6\n'), 364 ] 365 366 try: 367 test_sql_expressions(client, json_testfile, tests, log_output) 368 except Exception as select_err: 369 raise select_err 370 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 371 # pass 372 373 # Test passes 374 print(log_output.json_report()) 375 376 377 def test_sql_select_json(client, log_output): 378 json_testcontent = """{ "Rules": [ {"id": "1"}, {"expr": "y > x"}, {"id": "2", "expr": "z = DEBUG"} ]} 379 { "created": "June 27", "modified": "July 6" } 380 """ 381 tests = [ 382 ("select_1", "SELECT id FROM S3Object[*].Rules[*].id", 383 b'{"id":"1"}\n{}\n{"id":"2"}\n{}\n'), 384 ("select_2", 385 "SELECT id FROM S3Object[*].Rules[*].id WHERE id IS NOT MISSING", b'{"id":"1"}\n{"id":"2"}\n'), 386 ("select_3", "SELECT d.created, d.modified FROM S3Object[*] d", 387 b'{}\n{"created":"June 27","modified":"July 6"}\n'), 388 ("select_4", "SELECT _1.created, _1.modified FROM S3Object[*]", 389 b'{}\n{"created":"June 27","modified":"July 6"}\n'), 390 ("select_5", 391 "Select s.rules[1].expr from S3Object s", b'{"expr":"y > x"}\n{}\n'), 392 ] 393 394 input_serialization = JSONInputSerialization(json_type=JSON_TYPE_DOCUMENT) 395 output_serialization = JSONOutputSerialization() 396 try: 397 test_sql_expressions_custom_input_output(client, json_testcontent, 398 input_serialization, output_serialization, tests, log_output) 399 except Exception as select_err: 400 raise select_err 401 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 402 # pass 403 404 # Test passes 405 print(log_output.json_report()) 406 407 408 def test_sql_select_csv_no_header(client, log_output): 409 json_testcontent = """val1,val2,val3 410 val4,val5,val6 411 """ 412 tests = [ 413 ("select_1", "SELECT s._2 FROM S3Object as s", b'val2\nval5\n'), 414 ] 415 416 input_serialization = CSVInputSerialization( 417 file_header_info=FILE_HEADER_INFO_NONE, 418 allow_quoted_record_delimiter="FALSE", 419 ) 420 output_serialization = CSVOutputSerialization() 421 try: 422 test_sql_expressions_custom_input_output(client, json_testcontent, 423 input_serialization, output_serialization, tests, log_output) 424 except Exception as select_err: 425 raise select_err 426 # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) 427 # pass 428 429 # Test passes 430 print(log_output.json_report())