github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/tidb-lightning.toml (about) 1 ### tidb-lightning configuartion 2 [lightning] 3 4 # Listening address for the HTTP server (set to empty string to disable). 5 # The server is responsible for the web interface, submitting import tasks, 6 # serving Prometheus metrics and exposing debug profiling data. 7 status-addr = ":8289" 8 9 # Toggle server mode. 10 # If "false", running Lightning will immediately start the import job, and exits 11 # after the job is finished. 12 # If "true", running Lightning will wait for user to submit tasks, via the HTTP API 13 # (`curl http://lightning-ip:8289/tasks --data-binary @tidb-lightning.toml`). 14 # The program will keep running and waiting for more tasks, until receiving the SIGINT signal. 15 server-mode = false 16 17 # check if the cluster satisfies the minimum requirement before starting 18 # check-requirements = true 19 20 # index-concurrency controls the maximum handled index concurrently while reading Mydumper SQL files. It can affect the tikv-importer disk usage. 21 index-concurrency = 2 22 # table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files. It can affect the tikv-importer memory usage. 23 table-concurrency = 6 24 # region-concurrency changes the concurrency number of data. It is set to the number of logical CPU cores by default and needs no configuration. 25 # In mixed configuration, you can set it to 75% of the size of logical CPU cores. 26 # region-concurrency default to runtime.NumCPU() 27 # region-concurrency = 28 # io-concurrency controls the maximum IO concurrency 29 # Excessive IO concurrency causes an increase in IO latency because the disk 30 # internal buffer is frequently refreshed causing a cache miss. For different 31 # disk media, concurrency has different effects on IO latency, which can be 32 # adjusted according to monitoring. 33 # Ref: https://en.wikipedia.org/wiki/Disk_buffer#Read-ahead/read-behind 34 # io-concurrency = 5 35 36 # logging 37 level = "info" 38 # file path for log. If set to empty, log will be written to /tmp/lightning.log.{timestamp} 39 # Set to "-" to write logs to stdout. 40 file = "tidb-lightning.log" 41 max-size = 128 # MB 42 max-days = 28 43 max-backups = 14 44 45 [security] 46 # specifies certificates and keys for TLS connections within the cluster. 47 # public certificate of the CA. Leave empty to disable TLS. 48 # ca-path = "/path/to/ca.pem" 49 # public certificate of this service. 50 # cert-path = "/path/to/lightning.pem" 51 # private key of this service. 52 # key-path = "/path/to/lightning.key" 53 # If set to true, lightning will redact sensitive infomation in log. 54 # redact-info-log = false 55 56 [checkpoint] 57 # Whether to enable checkpoints. 58 # While importing, Lightning will record which tables have been imported, so even if Lightning or other component 59 # crashed, we could start from a known good state instead of redoing everything. 60 enable = true 61 # The schema name (database name) to store the checkpoints 62 schema = "tidb_lightning_checkpoint" 63 # Where to store the checkpoints. 64 # Set to "file" to store as a local file. 65 # Set to "mysql" to store into a remote MySQL-compatible database 66 driver = "file" 67 # The data source name (DSN) indicating the location of the checkpoint storage. 68 # For "file" driver, the DSN is a path. If not specified, Lightning would default to "/tmp/CHKPTSCHEMA.pb". 69 # For "mysql" driver, the DSN is a URL in the form "USER:PASS@tcp(HOST:PORT)/". 70 # If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints. 71 #dsn = "/tmp/tidb_lightning_checkpoint.pb" 72 # Whether to keep the checkpoints after all data are imported. If false, the checkpoints will be deleted. The schema 73 # needs to be dropped manually, however. 74 #keep-after-success = false 75 76 [tikv-importer] 77 # Delivery backend, can be "importer", "local" or "tidb". 78 backend = "importer" 79 # Address of tikv-importer when the backend is 'importer' 80 addr = "127.0.0.1:8287" 81 # What to do on duplicated record (unique key conflict) when the backend is 'tidb'. Possible values are: 82 # - replace: replace the old record by the new record (i.e. insert rows using "REPLACE INTO") 83 # - ignore: keep the old record and ignore the new record (i.e. insert rows using "INSERT IGNORE INTO") 84 # - error: stop Lightning and report an error (i.e. insert rows using "INSERT INTO") 85 #on-duplicate = "replace" 86 # Maximum KV size of SST files produced in the 'local' backend. This should be the same as 87 # the TiKV region size to avoid further region splitting. The default value is 96 MiB. 88 #region-split-size = '96MiB' 89 # write key-values pairs to tikv batch size 90 #send-kv-pairs = 32768 91 # local storage directory used in "local" backend. 92 #sorted-kv-dir = "" 93 # Maximum size of the local storage directory. Periodically, Lightning will check if the total storage size exceeds this 94 # value. If so the "local" backend will block and immediately ingest the largest engines into the target TiKV until the 95 # usage falls below the specified capacity. 96 # Note that the disk-quota IS NOT A HARD LIMIT. There are chances that the usage overshoots the quota before it was 97 # detected. The overshoot is up to 6.3 GiB in default settings (8 open engines, 40 region-concurrency, check quota every 98 # minute). 99 # Setting the disk quota too low may cause engines to overlap each other too much and slow down import. 100 # This setting is ignored in "tidb" and "importer" backends. 101 # The default value of 0 means letting Lightning to automatically pick an appropriate capacity using the free disk space 102 # of sorted-kv-dir, subtracting the overshoot. 103 #disk-quota = 0 104 # range-concurrency controls the maximum ingest concurrently while writing to tikv, It can affect the network traffic. 105 # this default config can make full use of a 10Gib bandwidth network, if the network bandwidth is higher, you can increase 106 # this to gain better performance. Larger value will also increase the memory usage slightly. 107 #range-concurrency = 16 108 109 [mydumper] 110 # block size of file reading 111 read-block-size = '64KiB' 112 # minimum size (in terms of source data file) of each batch of import. 113 # Lightning will split a large table into multiple engine files according to this size. 114 #batch-size = '100GiB' 115 116 # Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be 117 # imported nearly the same time, and this will create a queue and this wastes resources. Therefore, 118 # Lightning will slightly increase the size of the first few batches to properly distribute 119 # resources. The scale up is controlled by this parameter, which expresses the ratio of duration 120 # between the "import" and "write" steps with full concurrency. This can be calculated as the ratio 121 # (import duration / write duration) of a single table of size around 1 GB. The exact timing can be 122 # found in the log. If "import" is faster, the batch size anomaly is smaller, and a ratio of 123 # zero means uniform batch size. This value should be in the range (0 <= batch-import-ratio < 1). 124 batch-import-ratio = 0.75 125 126 # mydumper local source data directory 127 data-source-dir = "/tmp/export-20180328-200751" 128 # if no-schema is set true, lightning will get schema information from tidb-server directly without creating them. 129 no-schema=false 130 # the character set of the schema files; only supports one of: 131 # - utf8mb4: the schema files must be encoded as UTF-8, otherwise will emit errors 132 # - gb18030: the schema files must be encoded as GB-18030, otherwise will emit errors 133 # - auto: (default) automatically detect if the schema is UTF-8 or GB-18030, error if the encoding is neither 134 # - binary: do not try to decode the schema files 135 # note that the *data* files are always parsed as binary regardless of schema encoding. 136 #character-set = "auto" 137 138 # make table and database names case-sensitive, i.e. treats `DB`.`TBL` and `db`.`tbl` as two 139 # different objects. Currently only affects [[routes]]. 140 case-sensitive = false 141 142 # if strict-format is ture, lightning will use '\r' and '\n' to determine the end of each line. Make sure your data 143 # doesn't contain '\r' or '\n' if strict-format is enabled, or csv parser may parse incorrect result. 144 strict-format = false 145 # if strict-format is true, large CSV files will be split to multiple chunks, which Lightning 146 # will restore in parallel. The size of each chunk is `max-region-size`, where the default is 256 MiB. 147 #max-region-size = '256MiB' 148 149 # enable file router to use the default rules. By default, it will be set to true if no `mydumper.files` 150 # rule is provided, else false. You can explicitly set it to `true` to enable the default rules, they will 151 # take effect on files that on other rules are match. 152 # The default file routing rules' behavior is the same as former versions without this conf, that is: 153 # {schema}-schema-create.sql --> schema create sql file 154 # {schema}.{table}-schema.sql --> table schema sql file 155 # {schema}.{table}.{0001}.{sql|csv|parquet} --> data source file 156 # *-schema-view.sql, *-schema-trigger.sql, *-schema-post.sql --> ignore all the sql files end with these pattern 157 #default-file-rules = false 158 159 # only import tables if the wildcard rules are matched. See documention for details. 160 filter = ['*.*', '!mysql.*', '!sys.*', '!INFORMATION_SCHEMA.*', '!PERFORMANCE_SCHEMA.*', '!METRICS_SCHEMA.*', '!INSPECTION_SCHEMA.*'] 161 162 # CSV files are imported according to MySQL's LOAD DATA INFILE rules. 163 [mydumper.csv] 164 # separator between fields, can be one or more characters but empty. The value can 165 # not be prefix of `delimiter`. 166 separator = ',' 167 # string delimiter, can either be one or more characters or empty string. If not empty, 168 # the value should not be prefix of `separator` 169 delimiter = '"' 170 # whether the CSV files contain a header. If true, the first line will be skipped 171 header = true 172 # whether the CSV contains any NULL value. If true, all columns from CSV cannot be NULL. 173 not-null = false 174 # if non-null = false (i.e. CSV can contain NULL), fields equal to this value will be treated as NULL 175 null = '\N' 176 # whether to interpret backslash-escape inside strings. 177 backslash-escape = true 178 # if a line ends with a separator, remove it. 179 trim-last-separator = false 180 181 # file level routing rule that map file path to schema,table,type,sort-key 182 # The schema, table , type and key can be either a constant string or template strings 183 # supported by go regexp. 184 #[[mydumper.files]] 185 # pattern and path determine target source files, you can use either of them but not both. 186 # pattern is a regexp in Go syntax that can match one or more files in `source-dir`. 187 #pattern = '(?i)^(?:[^/]*/)(?P<schema>[^/.]+)\.([^/.]+)(?:\.([0-9]+))?\.(sql|csv)$' 188 # path is the target file path, both absolute file path or relative path to `mydump.source-dir` are supported. 189 # the path separator is always converted to '/', regardless of operating system. 190 #path = "schema_name.table_name.00001.sql" 191 # schema(database) name 192 #schema = "$schema" 193 # table name 194 #table = "$2" 195 # file type, can be one of schema-schema, table-schema, sql, csv 196 #type = "$4" 197 # an arbitrary string used to maintain the sort order among the files for row ID allocation and checkpoint resumption 198 #key = "$3" 199 200 # configuration for tidb server address(one is enough) and pd server address(one is enough). 201 [tidb] 202 host = "127.0.0.1" 203 port = 4000 204 user = "root" 205 password = "" 206 # table schema information is fetched from tidb via this status-port. 207 status-port = 10080 208 pd-addr = "127.0.0.1:2379" 209 # lightning uses some code of tidb(used as library), and the flag controls it's log level. 210 log-level = "error" 211 212 # sets maximum packet size allowed for SQL connections. 213 # set this to 0 to automatically fetch the `max_allowed_packet` variable from server on every connection. 214 # max-allowed-packet = 67_108_864 215 216 # whether to use TLS for SQL connections. valid values are: 217 # * "" - force TLS (same as "cluster") if [tidb.security] section is populated, otherwise same as "false" 218 # * "false" - disable TLS 219 # * "cluster" - force TLS and verify the server's certificate with the CA specified in the [tidb.security] section 220 # * "skip-verify" - force TLS but do not verify the server's certificate (insecure!) 221 # * "preferred" - same as "skip-verify", but if the server does not support TLS, fallback to unencrypted connection 222 # tls = "" 223 224 # set tidb session variables to speed up checksum/analyze table. 225 # see https://pingcap.com/docs/sql/statistics/#control-analyze-concurrency for the meaning of each setting 226 build-stats-concurrency = 20 227 distsql-scan-concurrency = 15 228 index-serial-scan-concurrency = 20 229 # checksum-table-concurrency controls the maximum checksum table tasks to run concurrently. 230 checksum-table-concurrency = 2 231 232 # specifies certificates and keys for TLS-enabled MySQL connections. 233 # defaults to a copy of the [security] section. 234 #[tidb.security] 235 # public certificate of the CA. Set to empty string to disable TLS. 236 # ca-path = "/path/to/ca.pem" 237 # public certificate of this service. Default to copy of `security.cert-path` 238 # cert-path = "/path/to/lightning.pem" 239 # private key of this service. Default to copy of `security.key-path` 240 # key-path = "/path/to/lightning.key" 241 242 # post-restore provide some options which will be executed after all kv data has been imported into the tikv cluster. 243 # the execution order are(if set true): checksum -> analyze 244 [post-restore] 245 # config whether to do `ADMIN CHECKSUM TABLE <table>` after restore finished for each table. 246 # valid options: 247 # - "off". do not do checksum. 248 # - "optional". do execute admin checksum, but will ignore any error if checksum fails. 249 # - "required". default option. do execute admin checksum, if checksum fails, lightning will exit with failure. 250 # NOTE: for backward compatibility, bool values `true` and `false` is also allowed for this field. `true` is 251 # equivalent to "required" and `false` is equivalent to "off". 252 checksum = "required" 253 # if set true, analyze will do `ANALYZE TABLE <table>` for each table. 254 # the config options is the same as 'post-restore.checksum'. 255 analyze = "optional" 256 # if set to true, compact will do level 1 compaction to tikv data. 257 # if this setting is missing, the default value is false. 258 level-1-compact = false 259 # if set true, compact will do full compaction to tikv data. 260 # if this setting is missing, the default value is false. 261 compact = false 262 # if set to true, lightning will run checksum and analyze for all tables together at last 263 post-process-at-last = true 264 265 # cron performs some periodic actions in background 266 [cron] 267 # duration between which Lightning will automatically refresh the import mode status. 268 # should be shorter than the corresponding TiKV setting 269 switch-mode = "5m" 270 # the duration which the an import progress will be printed to the log. 271 log-progress = "5m" 272 # the duration which tikv-importer.sorted-kv-dir-capacity is checked. 273 check-disk-quota = "1m"