github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/tidb-lightning.toml (about) 1 ### tidb-lightning configuartion 2 [lightning] 3 4 # Listening address for the HTTP server (set to empty string to disable). 5 # The server is responsible for the web interface, submitting import tasks, 6 # serving Prometheus metrics and exposing debug profiling data. 7 status-addr = ":8289" 8 9 # Toggle server mode. 10 # If "false", running Lightning will immediately start the import job, and exits 11 # after the job is finished. 12 # If "true", running Lightning will wait for user to submit tasks, via the HTTP API 13 # (`curl http://lightning-ip:8289/tasks --data-binary @tidb-lightning.toml`). 14 # The program will keep running and waiting for more tasks, until receiving the SIGINT signal. 15 server-mode = false 16 17 # check if the cluster satisfies the minimum requirement before starting 18 # check-requirements = true 19 20 # index-concurrency controls the maximum handled index concurrently while reading Mydumper SQL files. It can affect the tikv-importer disk usage. 21 index-concurrency = 2 22 # table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files. It can affect the tikv-importer memory usage. 23 table-concurrency = 6 24 # region-concurrency changes the concurrency number of data. It is set to the number of logical CPU cores by default and needs no configuration. 25 # In mixed configuration, you can set it to 75% of the size of logical CPU cores. 26 # region-concurrency default to runtime.NumCPU() 27 # region-concurrency = 28 # io-concurrency controls the maximum IO concurrency 29 # Excessive IO concurrency causes an increase in IO latency because the disk 30 # internal buffer is frequently refreshed causing a cache miss. For different 31 # disk media, concurrency has different effects on IO latency, which can be 32 # adjusted according to monitoring. 33 # Ref: https://en.wikipedia.org/wiki/Disk_buffer#Read-ahead/read-behind 34 # io-concurrency = 5 35 # meta-schema-name is (database name) to store lightning task and table metadata. 36 # the meta schema and tables is store in target tidb cluster. 37 # this config is only used in "local" and "importer" backend. 38 # meta-schema-name = "lightning_metadata" 39 40 # logging 41 level = "info" 42 # file path for log. If set to empty, log will be written to /tmp/lightning.log.{timestamp} 43 # Set to "-" to write logs to stdout. 44 file = "tidb-lightning.log" 45 max-size = 128 # MB 46 max-days = 28 47 max-backups = 14 48 49 [security] 50 # specifies certificates and keys for TLS connections within the cluster. 51 # public certificate of the CA. Leave empty to disable TLS. 52 # ca-path = "/path/to/ca.pem" 53 # public certificate of this service. 54 # cert-path = "/path/to/lightning.pem" 55 # private key of this service. 56 # key-path = "/path/to/lightning.key" 57 # If set to true, lightning will redact sensitive infomation in log. 58 # redact-info-log = false 59 60 [checkpoint] 61 # Whether to enable checkpoints. 62 # While importing, Lightning will record which tables have been imported, so even if Lightning or other component 63 # crashed, we could start from a known good state instead of redoing everything. 64 enable = true 65 # The schema name (database name) to store the checkpoints 66 schema = "tidb_lightning_checkpoint" 67 # Where to store the checkpoints. 68 # Set to "file" to store as a local file. 69 # Set to "mysql" to store into a remote MySQL-compatible database 70 driver = "file" 71 # The data source name (DSN) indicating the location of the checkpoint storage. 72 # For "file" driver, the DSN is a path. If not specified, Lightning would default to "/tmp/CHKPTSCHEMA.pb". 73 # For "mysql" driver, the DSN is a URL in the form "USER:PASS@tcp(HOST:PORT)/". 74 # If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints. 75 #dsn = "/tmp/tidb_lightning_checkpoint.pb" 76 # Whether to keep the checkpoints after all data are imported. If false, the checkpoints will be deleted. The schema 77 # needs to be dropped manually, however. 78 #keep-after-success = false 79 80 [tikv-importer] 81 # Delivery backend, can be "importer", "local" or "tidb". 82 backend = "importer" 83 # Address of tikv-importer when the backend is 'importer' 84 addr = "127.0.0.1:8287" 85 # What to do on duplicated record (unique key conflict) when the backend is 'tidb'. Possible values are: 86 # - replace: replace the old record by the new record (i.e. insert rows using "REPLACE INTO") 87 # - ignore: keep the old record and ignore the new record (i.e. insert rows using "INSERT IGNORE INTO") 88 # - error: stop Lightning and report an error (i.e. insert rows using "INSERT INTO") 89 #on-duplicate = "replace" 90 # Maximum KV size of SST files produced in the 'local' backend. This should be the same as 91 # the TiKV region size to avoid further region splitting. The default value is 96 MiB. 92 #region-split-size = '96MiB' 93 # write key-values pairs to tikv batch size 94 #send-kv-pairs = 32768 95 # local storage directory used in "local" backend. 96 #sorted-kv-dir = "" 97 # Maximum size of the local storage directory. Periodically, Lightning will check if the total storage size exceeds this 98 # value. If so the "local" backend will block and immediately ingest the largest engines into the target TiKV until the 99 # usage falls below the specified capacity. 100 # Note that the disk-quota IS NOT A HARD LIMIT. There are chances that the usage overshoots the quota before it was 101 # detected. The overshoot is up to 6.3 GiB in default settings (8 open engines, 40 region-concurrency, check quota every 102 # minute). 103 # Setting the disk quota too low may cause engines to overlap each other too much and slow down import. 104 # This setting is ignored in "tidb" and "importer" backends. 105 # The default value of 0 means letting Lightning to automatically pick an appropriate capacity using the free disk space 106 # of sorted-kv-dir, subtracting the overshoot. 107 #disk-quota = 0 108 # range-concurrency controls the maximum ingest concurrently while writing to tikv, It can affect the network traffic. 109 # this default config can make full use of a 10Gib bandwidth network, if the network bandwidth is higher, you can increase 110 # this to gain better performance. Larger value will also increase the memory usage slightly. 111 #range-concurrency = 16 112 # The memory cache used in local backend for each engine. The memory usage during write-KV phase by the engines is bound 113 # by (index-concurrency + table-concurrency) * engine-mem-cache-size. 114 #engine-mem-cache-size = '512MiB' 115 # The memory cache used in for local sorting during the encode-KV phase before flushing into the engines. The memory 116 # usage is bound by region-concurrency * local-writer-mem-cache-size. 117 #local-writer-mem-cache-size = '128MiB' 118 119 [mydumper] 120 # block size of file reading 121 read-block-size = '64KiB' 122 # minimum size (in terms of source data file) of each batch of import. 123 # Lightning will split a large table into multiple engine files according to this size. 124 #batch-size = '100GiB' 125 126 # Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be 127 # imported nearly the same time, and this will create a queue and this wastes resources. Therefore, 128 # Lightning will slightly increase the size of the first few batches to properly distribute 129 # resources. The scale up is controlled by this parameter, which expresses the ratio of duration 130 # between the "import" and "write" steps with full concurrency. This can be calculated as the ratio 131 # (import duration / write duration) of a single table of size around 1 GB. The exact timing can be 132 # found in the log. If "import" is faster, the batch size anomaly is smaller, and a ratio of 133 # zero means uniform batch size. This value should be in the range (0 <= batch-import-ratio < 1). 134 batch-import-ratio = 0.75 135 136 # mydumper local source data directory 137 data-source-dir = "/tmp/export-20180328-200751" 138 # if no-schema is set true, lightning will get schema information from tidb-server directly without creating them. 139 no-schema=false 140 # the character set of the schema files; only supports one of: 141 # - utf8mb4: the schema files must be encoded as UTF-8, otherwise will emit errors 142 # - gb18030: the schema files must be encoded as GB-18030, otherwise will emit errors 143 # - auto: (default) automatically detect if the schema is UTF-8 or GB-18030, error if the encoding is neither 144 # - binary: do not try to decode the schema files 145 # note that the *data* files are always parsed as binary regardless of schema encoding. 146 #character-set = "auto" 147 148 # make table and database names case-sensitive, i.e. treats `DB`.`TBL` and `db`.`tbl` as two 149 # different objects. Currently only affects [[routes]]. 150 case-sensitive = false 151 152 # if strict-format is ture, lightning will use '\r' and '\n' to determine the end of each line. Make sure your data 153 # doesn't contain '\r' or '\n' if strict-format is enabled, or csv parser may parse incorrect result. 154 strict-format = false 155 # if strict-format is true, large CSV files will be split to multiple chunks, which Lightning 156 # will restore in parallel. The size of each chunk is `max-region-size`, where the default is 256 MiB. 157 #max-region-size = '256MiB' 158 159 # enable file router to use the default rules. By default, it will be set to true if no `mydumper.files` 160 # rule is provided, else false. You can explicitly set it to `true` to enable the default rules, they will 161 # take effect on files that on other rules are match. 162 # The default file routing rules' behavior is the same as former versions without this conf, that is: 163 # {schema}-schema-create.sql --> schema create sql file 164 # {schema}.{table}-schema.sql --> table schema sql file 165 # {schema}.{table}.{0001}.{sql|csv|parquet} --> data source file 166 # *-schema-view.sql, *-schema-trigger.sql, *-schema-post.sql --> ignore all the sql files end with these pattern 167 #default-file-rules = false 168 169 # only import tables if the wildcard rules are matched. See documention for details. 170 filter = ['*.*', '!mysql.*', '!sys.*', '!INFORMATION_SCHEMA.*', '!PERFORMANCE_SCHEMA.*', '!METRICS_SCHEMA.*', '!INSPECTION_SCHEMA.*'] 171 172 # CSV files are imported according to MySQL's LOAD DATA INFILE rules. 173 [mydumper.csv] 174 # separator between fields, can be one or more characters but empty. The value can 175 # not be prefix of `delimiter`. 176 separator = ',' 177 # string delimiter, can either be one or more characters or empty string. If not empty, 178 # the value should not be prefix of `separator` 179 delimiter = '"' 180 # row terminator, can be an empty string or not. 181 # An empty string means both \r and \n are considered a terminator. This is the normal CSV behavior. 182 # A non-empty string means the row ends only when such terminator is matched exactly (or reaching the end of file). 183 # If the file content matches both the terminator and separator, the terminator takes precedence. 184 terminator = '' 185 # whether the CSV files contain a header. If true, the first line will be skipped 186 header = true 187 # whether the CSV contains any NULL value. If true, all columns from CSV cannot be NULL. 188 not-null = false 189 # if non-null = false (i.e. CSV can contain NULL), fields equal to this value will be treated as NULL 190 null = '\N' 191 # whether to interpret backslash-escape inside strings. 192 backslash-escape = true 193 # if a line ends with a separator, remove it. 194 # deprecated - consider using the terminator option instead. 195 #trim-last-separator = false 196 197 # file level routing rule that map file path to schema,table,type,sort-key 198 # The schema, table , type and key can be either a constant string or template strings 199 # supported by go regexp. 200 #[[mydumper.files]] 201 # pattern and path determine target source files, you can use either of them but not both. 202 # pattern is a regexp in Go syntax that can match one or more files in `source-dir`. 203 #pattern = '(?i)^(?:[^/]*/)(?P<schema>[^/.]+)\.([^/.]+)(?:\.([0-9]+))?\.(sql|csv)$' 204 # path is the target file path, both absolute file path or relative path to `mydump.source-dir` are supported. 205 # the path separator is always converted to '/', regardless of operating system. 206 #path = "schema_name.table_name.00001.sql" 207 # schema(database) name 208 #schema = "$schema" 209 # table name 210 #table = "$2" 211 # file type, can be one of schema-schema, table-schema, sql, csv 212 #type = "$4" 213 # an arbitrary string used to maintain the sort order among the files for row ID allocation and checkpoint resumption 214 #key = "$3" 215 216 # configuration for tidb server address(one is enough) and pd server address(one is enough). 217 [tidb] 218 host = "127.0.0.1" 219 port = 4000 220 user = "root" 221 password = "" 222 # table schema information is fetched from tidb via this status-port. 223 status-port = 10080 224 pd-addr = "127.0.0.1:2379" 225 # lightning uses some code of tidb(used as library), and the flag controls it's log level. 226 log-level = "error" 227 228 # sets maximum packet size allowed for SQL connections. 229 # set this to 0 to automatically fetch the `max_allowed_packet` variable from server on every connection. 230 # max-allowed-packet = 67_108_864 231 232 # whether to use TLS for SQL connections. valid values are: 233 # * "" - force TLS (same as "cluster") if [tidb.security] section is populated, otherwise same as "false" 234 # * "false" - disable TLS 235 # * "cluster" - force TLS and verify the server's certificate with the CA specified in the [tidb.security] section 236 # * "skip-verify" - force TLS but do not verify the server's certificate (insecure!) 237 # * "preferred" - same as "skip-verify", but if the server does not support TLS, fallback to unencrypted connection 238 # tls = "" 239 240 # set tidb session variables to speed up checksum/analyze table. 241 # see https://pingcap.com/docs/sql/statistics/#control-analyze-concurrency for the meaning of each setting 242 build-stats-concurrency = 20 243 distsql-scan-concurrency = 15 244 index-serial-scan-concurrency = 20 245 # checksum-table-concurrency controls the maximum checksum table tasks to run concurrently. 246 checksum-table-concurrency = 2 247 248 # specifies certificates and keys for TLS-enabled MySQL connections. 249 # defaults to a copy of the [security] section. 250 #[tidb.security] 251 # public certificate of the CA. Set to empty string to disable TLS. 252 # ca-path = "/path/to/ca.pem" 253 # public certificate of this service. Default to copy of `security.cert-path` 254 # cert-path = "/path/to/lightning.pem" 255 # private key of this service. Default to copy of `security.key-path` 256 # key-path = "/path/to/lightning.key" 257 258 # post-restore provide some options which will be executed after all kv data has been imported into the tikv cluster. 259 # the execution order are(if set true): checksum -> analyze 260 [post-restore] 261 # config whether to do `ADMIN CHECKSUM TABLE <table>` after restore finished for each table. 262 # valid options: 263 # - "off". do not do checksum. 264 # - "optional". do execute admin checksum, but will ignore any error if checksum fails. 265 # - "required". default option. do execute admin checksum, if checksum fails, lightning will exit with failure. 266 # NOTE: for backward compatibility, bool values `true` and `false` is also allowed for this field. `true` is 267 # equivalent to "required" and `false` is equivalent to "off". 268 checksum = "required" 269 # if set true, analyze will do `ANALYZE TABLE <table>` for each table. 270 # the config options is the same as 'post-restore.checksum'. 271 analyze = "optional" 272 # if set to true, compact will do level 1 compaction to tikv data. 273 # if this setting is missing, the default value is false. 274 level-1-compact = false 275 # if set true, compact will do full compaction to tikv data. 276 # if this setting is missing, the default value is false. 277 compact = false 278 # if set to true, lightning will run checksum and analyze for all tables together at last 279 post-process-at-last = true 280 281 # cron performs some periodic actions in background 282 [cron] 283 # duration between which Lightning will automatically refresh the import mode status. 284 # should be shorter than the corresponding TiKV setting 285 switch-mode = "5m" 286 # the duration which the an import progress will be printed to the log. 287 log-progress = "5m" 288 # the duration which tikv-importer.sorted-kv-dir-capacity is checked. 289 check-disk-quota = "1m"