github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cli/interactive_tests/test_sql_mem_monitor.tcl (about) 1 #! /usr/bin/env expect -f 2 3 source [file join [file dirname $argv0] common.tcl] 4 5 # This test ensures that the memory monitor does its main job, namely 6 # prevent the server from dying because of lack of memory when a 7 # client runs a "large" query. 8 # To test this 4 steps are needed: 9 # 1. a baseline memory usage is measured; 10 # 2. memory is limited using 'ulimit', and the server restarted with 11 # that limit; 12 # 3. a first test ensure that the server does indeed crash when memory 13 # consumption is not limited by a monitor; 14 # 4. the monitor is configured with a limit and a 2nd test ensures 15 # that the server does not crash any more. 16 # Note that step 3 is needed so as to ensure that the mechanism used 17 # in step 4 does indeed push memory consumption past the limit. 18 19 # Set up the initial cluster. 20 start_server $argv 21 stop_server $argv 22 23 # Start the cluster anew. This ensures fresh memory state. 24 start_server $argv 25 26 # Make some initial request to check the data is there and define the 27 # baseline memory consumption. 28 system "echo 'select * from system.information_schema.columns;' | $argv sql >/dev/null" 29 30 # What memory is currently consumed by the server? 31 set vmem [ exec ps --no-headers o vsz -p [ exec cat server_pid ] ] 32 33 # Now play. First, shut down the running server. 34 stop_server $argv 35 36 # Spawn a shell, so we get access to 'ulimit'. 37 spawn /bin/bash 38 set shell_spawn_id $spawn_id 39 send "PS1=':''/# '\r" 40 eexpect ":/# " 41 42 # Set the max memory usage to the baseline plus some margin. 43 send "ulimit -v [ expr {3*$vmem/2} ]\r" 44 eexpect ":/# " 45 46 # Start a server with this limit set. The server will now run in the foreground. 47 send "$argv start-single-node --insecure --max-sql-memory=25% --no-redirect-stderr -s=path=logs/db \r" 48 eexpect "restarted pre-existing node" 49 sleep 1 50 51 # Spawn a client. 52 spawn $argv sql 53 set client_spawn_id $spawn_id 54 eexpect root@ 55 56 # Test the client is sane. 57 send "select 1;\r" 58 eexpect "1 row" 59 eexpect root@ 60 61 start_test "Ensure that memory over-allocation without monitoring crashes the server" 62 # Now try to run a large-ish query on the client. 63 # The query is a 4-way cross-join on information_schema.columns, 64 # resulting in ~8 million rows loaded into memory when run on an 65 # empty database. 66 send "set database=system;\r" 67 eexpect root@ 68 # Disable query distribution to force in-memory computation. 69 send "set distsql=off;\r" 70 eexpect SET 71 send "with a as (select * from generate_series(1,10000)) select * from a as a, a as b, a as c, a as d limit 10;\r" 72 73 # Check that the query crashed the server 74 set spawn_id $shell_spawn_id 75 # Error is either "out of memory" (Go) or "cannot allocate memory" (C++) 76 expect { 77 "out of memory" {} 78 "cannot allocate memory" {} 79 "std::bad_alloc" {} 80 "Resource temporarily unavailable" {} 81 # TODO(peter): Pebble's behavior is to segfault on failed manual 82 # allocations. We should provide a cleaner signal. 83 "signal SIGSEGV" {} 84 timeout { handle_timeout "memory allocation error" } 85 } 86 eexpect ":/# " 87 88 # Check that the client got a bad connection error 89 set spawn_id $client_spawn_id 90 eexpect "bad connection" 91 eexpect root@ 92 end_test 93 94 start_test "Ensure that memory monitoring prevents crashes" 95 # Re-launch a server with relatively lower limit for SQL memory 96 set spawn_id $shell_spawn_id 97 send "$argv start-single-node --insecure --max-sql-memory=1000K --no-redirect-stderr -s=path=logs/db \r" 98 eexpect "restarted pre-existing node" 99 sleep 2 100 101 # Now try the large query again. 102 set spawn_id $client_spawn_id 103 send "select 1;\r" 104 eexpect root@ 105 send "set database=system;\r" 106 eexpect root@ 107 send "with a as (select * from generate_series(1,100000)) select * from a as a, a as b, a as c, a as d limit 10;\r" 108 eexpect "memory budget exceeded" 109 eexpect root@ 110 111 # Check we can send another query without error -- the server has survived. 112 send "select 1;\r" 113 eexpect "1 row" 114 eexpect root@ 115 end_test 116 117 interrupt 118 eexpect eof 119 120 set spawn_id $shell_spawn_id 121 interrupt 122 interrupt 123 eexpect ":/# " 124 send "exit\r" 125 eexpect eof