github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/tidb-lightning.toml (about)

     1  ### tidb-lightning configuartion
     2  [lightning]
     3  
     4  # Listening address for the HTTP server (set to empty string to disable).
     5  # The server is responsible for the web interface, submitting import tasks,
     6  # serving Prometheus metrics and exposing debug profiling data.
     7  status-addr = ":8289"
     8  
     9  # Toggle server mode.
    10  # If "false", running Lightning will immediately start the import job, and exits
    11  # after the job is finished.
    12  # If "true", running Lightning will wait for user to submit tasks, via the HTTP API
    13  # (`curl http://lightning-ip:8289/tasks --data-binary @tidb-lightning.toml`).
    14  # The program will keep running and waiting for more tasks, until receiving the SIGINT signal.
    15  server-mode = false
    16  
    17  # check if the cluster satisfies the minimum requirement before starting
    18  # check-requirements = true
    19  
    20  # index-concurrency controls the maximum handled index concurrently while reading Mydumper SQL files. It can affect the tikv-importer disk usage.
    21  index-concurrency = 2
    22  # table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files. It can affect the tikv-importer memory usage.
    23  table-concurrency = 6
    24  # region-concurrency changes the concurrency number of data. It is set to the number of logical CPU cores by default and needs no configuration.
    25  # In mixed configuration, you can set it to 75% of the size of logical CPU cores.
    26  # region-concurrency default to runtime.NumCPU()
    27  # region-concurrency =
    28  # io-concurrency controls the maximum IO concurrency
    29  # Excessive IO concurrency causes an increase in IO latency because the disk
    30  # internal buffer is frequently refreshed causing a cache miss. For different
    31  # disk media, concurrency has different effects on IO latency, which can be
    32  # adjusted according to monitoring.
    33  # Ref: https://en.wikipedia.org/wiki/Disk_buffer#Read-ahead/read-behind
    34  # io-concurrency = 5
    35  
    36  # logging
    37  level = "info"
    38  # file path for log. If set to empty, log will be written to /tmp/lightning.log.{timestamp}
    39  # Set to "-" to write logs to stdout.
    40  file = "tidb-lightning.log"
    41  max-size = 128 # MB
    42  max-days = 28
    43  max-backups = 14
    44  
    45  [security]
    46  # specifies certificates and keys for TLS connections within the cluster.
    47  # public certificate of the CA. Leave empty to disable TLS.
    48  # ca-path = "/path/to/ca.pem"
    49  # public certificate of this service.
    50  # cert-path = "/path/to/lightning.pem"
    51  # private key of this service.
    52  # key-path = "/path/to/lightning.key"
    53  # If set to true, lightning will redact sensitive infomation in log.
    54  # redact-info-log = false
    55  
    56  [checkpoint]
    57  # Whether to enable checkpoints.
    58  # While importing, Lightning will record which tables have been imported, so even if Lightning or other component
    59  # crashed, we could start from a known good state instead of redoing everything.
    60  enable = true
    61  # The schema name (database name) to store the checkpoints
    62  schema = "tidb_lightning_checkpoint"
    63  # Where to store the checkpoints.
    64  # Set to "file" to store as a local file.
    65  # Set to "mysql" to store into a remote MySQL-compatible database
    66  driver = "file"
    67  # The data source name (DSN) indicating the location of the checkpoint storage.
    68  # For "file" driver, the DSN is a path. If not specified, Lightning would default to "/tmp/CHKPTSCHEMA.pb".
    69  # For "mysql" driver, the DSN is a URL in the form "USER:PASS@tcp(HOST:PORT)/".
    70  # If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints.
    71  #dsn = "/tmp/tidb_lightning_checkpoint.pb"
    72  # Whether to keep the checkpoints after all data are imported. If false, the checkpoints will be deleted. The schema
    73  # needs to be dropped manually, however.
    74  #keep-after-success = false
    75  
    76  [tikv-importer]
    77  # Delivery backend, can be "importer", "local" or "tidb".
    78  backend = "importer"
    79  # Address of tikv-importer when the backend is 'importer'
    80  addr = "127.0.0.1:8287"
    81  # What to do on duplicated record (unique key conflict) when the backend is 'tidb'. Possible values are:
    82  #  - replace: replace the old record by the new record (i.e. insert rows using "REPLACE INTO")
    83  #  - ignore: keep the old record and ignore the new record (i.e. insert rows using "INSERT IGNORE INTO")
    84  #  - error: stop Lightning and report an error (i.e. insert rows using "INSERT INTO")
    85  #on-duplicate = "replace"
    86  # Maximum KV size of SST files produced in the 'local' backend. This should be the same as
    87  # the TiKV region size to avoid further region splitting. The default value is 96 MiB.
    88  #region-split-size = '96MiB'
    89  # write key-values pairs to tikv batch size
    90  #send-kv-pairs = 32768
    91  # local storage directory used in "local" backend.
    92  #sorted-kv-dir = ""
    93  # Maximum size of the local storage directory. Periodically, Lightning will check if the total storage size exceeds this
    94  # value. If so the "local" backend will block and immediately ingest the largest engines into the target TiKV until the
    95  # usage falls below the specified capacity.
    96  # Note that the disk-quota IS NOT A HARD LIMIT. There are chances that the usage overshoots the quota before it was
    97  # detected. The overshoot is up to 6.3 GiB in default settings (8 open engines, 40 region-concurrency, check quota every
    98  # minute).
    99  # Setting the disk quota too low may cause engines to overlap each other too much and slow down import.
   100  # This setting is ignored in "tidb" and "importer" backends.
   101  # The default value of 0 means letting Lightning to automatically pick an appropriate capacity using the free disk space
   102  # of sorted-kv-dir, subtracting the overshoot.
   103  #disk-quota = 0
   104  # range-concurrency controls the maximum ingest concurrently while writing to tikv, It can affect the network traffic.
   105  # this default config can make full use of a 10Gib bandwidth network, if the network bandwidth is higher, you can increase
   106  # this to gain better performance. Larger value will also increase the memory usage slightly.
   107  #range-concurrency = 16
   108  
   109  [mydumper]
   110  # block size of file reading
   111  read-block-size = '64KiB'
   112  # minimum size (in terms of source data file) of each batch of import.
   113  # Lightning will split a large table into multiple engine files according to this size.
   114  #batch-size = '100GiB'
   115  
   116  # Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be
   117  # imported nearly the same time, and this will create a queue and this wastes resources. Therefore,
   118  # Lightning will slightly increase the size of the first few batches to properly distribute
   119  # resources. The scale up is controlled by this parameter, which expresses the ratio of duration
   120  # between the "import" and "write" steps with full concurrency. This can be calculated as the ratio
   121  # (import duration / write duration) of a single table of size around 1 GB. The exact timing can be
   122  # found in the log. If "import" is faster, the batch size anomaly is smaller, and a ratio of
   123  # zero means uniform batch size. This value should be in the range (0 <= batch-import-ratio < 1).
   124  batch-import-ratio = 0.75
   125  
   126  # mydumper local source data directory
   127  data-source-dir = "/tmp/export-20180328-200751"
   128  # if no-schema is set true, lightning will get schema information from tidb-server directly without creating them.
   129  no-schema=false
   130  # the character set of the schema files; only supports one of:
   131  #  - utf8mb4: the schema files must be encoded as UTF-8, otherwise will emit errors
   132  #  - gb18030: the schema files must be encoded as GB-18030, otherwise will emit errors
   133  #  - auto:    (default) automatically detect if the schema is UTF-8 or GB-18030, error if the encoding is neither
   134  #  - binary:  do not try to decode the schema files
   135  # note that the *data* files are always parsed as binary regardless of schema encoding.
   136  #character-set = "auto"
   137  
   138  # make table and database names case-sensitive, i.e. treats `DB`.`TBL` and `db`.`tbl` as two
   139  # different objects. Currently only affects [[routes]].
   140  case-sensitive = false
   141  
   142  # if strict-format is ture, lightning will use '\r' and '\n' to determine the end of each line. Make sure your data
   143  # doesn't contain '\r' or '\n' if strict-format is enabled, or csv parser may parse incorrect result.
   144  strict-format = false
   145  # if strict-format is true, large CSV files will be split to multiple chunks, which Lightning
   146  # will restore in parallel. The size of each chunk is `max-region-size`, where the default is 256 MiB.
   147  #max-region-size = '256MiB'
   148  
   149  # enable file router to use the default rules. By default, it will be set to true if no `mydumper.files`
   150  # rule is provided, else false. You can explicitly set it to `true` to enable the default rules, they will
   151  # take effect on files that on other rules are match.
   152  # The default file routing rules' behavior is the same as former versions without this conf, that is:
   153  #   {schema}-schema-create.sql --> schema create sql file
   154  #   {schema}.{table}-schema.sql --> table schema sql file
   155  #   {schema}.{table}.{0001}.{sql|csv|parquet} --> data source file
   156  #   *-schema-view.sql, *-schema-trigger.sql, *-schema-post.sql --> ignore all the sql files end with these pattern
   157  #default-file-rules = false
   158  
   159  # only import tables if the wildcard rules are matched. See documention for details.
   160  filter = ['*.*', '!mysql.*', '!sys.*', '!INFORMATION_SCHEMA.*', '!PERFORMANCE_SCHEMA.*', '!METRICS_SCHEMA.*', '!INSPECTION_SCHEMA.*']
   161  
   162  # CSV files are imported according to MySQL's LOAD DATA INFILE rules.
   163  [mydumper.csv]
   164  # separator between fields, can be one or more characters but empty. The value can
   165  # not be prefix of `delimiter`.
   166  separator = ','
   167  # string delimiter, can either be one or more characters or empty string. If not empty,
   168  # the value should not be prefix of `separator`
   169  delimiter = '"'
   170  # whether the CSV files contain a header. If true, the first line will be skipped
   171  header = true
   172  # whether the CSV contains any NULL value. If true, all columns from CSV cannot be NULL.
   173  not-null = false
   174  # if non-null = false (i.e. CSV can contain NULL), fields equal to this value will be treated as NULL
   175  null = '\N'
   176  # whether to interpret backslash-escape inside strings.
   177  backslash-escape = true
   178  # if a line ends with a separator, remove it.
   179  trim-last-separator = false
   180  
   181  # file level routing rule that map file path to schema,table,type,sort-key
   182  # The schema, table , type and key can be either a constant string or template strings
   183  # supported by go regexp.
   184  #[[mydumper.files]]
   185  # pattern and path determine target source files, you can use either of them but not both.
   186  # pattern is a regexp in Go syntax that can match one or more files in `source-dir`.
   187  #pattern = '(?i)^(?:[^/]*/)(?P<schema>[^/.]+)\.([^/.]+)(?:\.([0-9]+))?\.(sql|csv)$'
   188  # path is the target file path, both absolute file path or relative path to `mydump.source-dir` are supported.
   189  # the path separator is always converted to '/', regardless of operating system.
   190  #path = "schema_name.table_name.00001.sql"
   191  # schema(database) name
   192  #schema = "$schema"
   193  # table name
   194  #table = "$2"
   195  # file type, can be one of schema-schema, table-schema, sql, csv
   196  #type = "$4"
   197  # an arbitrary string used to maintain the sort order among the files for row ID allocation and checkpoint resumption
   198  #key = "$3"
   199  
   200  # configuration for tidb server address(one is enough) and pd server address(one is enough).
   201  [tidb]
   202  host = "127.0.0.1"
   203  port = 4000
   204  user = "root"
   205  password = ""
   206  # table schema information is fetched from tidb via this status-port.
   207  status-port = 10080
   208  pd-addr = "127.0.0.1:2379"
   209  # lightning uses some code of tidb(used as library), and the flag controls it's log level.
   210  log-level = "error"
   211  
   212  # sets maximum packet size allowed for SQL connections.
   213  # set this to 0 to automatically fetch the `max_allowed_packet` variable from server on every connection.
   214  # max-allowed-packet = 67_108_864
   215  
   216  # whether to use TLS for SQL connections. valid values are:
   217  #  * ""            - force TLS (same as "cluster") if [tidb.security] section is populated, otherwise same as "false"
   218  #  * "false"       - disable TLS
   219  #  * "cluster"     - force TLS and verify the server's certificate with the CA specified in the [tidb.security] section
   220  #  * "skip-verify" - force TLS but do not verify the server's certificate (insecure!)
   221  #  * "preferred"   - same as "skip-verify", but if the server does not support TLS, fallback to unencrypted connection
   222  # tls = ""
   223  
   224  # set tidb session variables to speed up checksum/analyze table.
   225  # see https://pingcap.com/docs/sql/statistics/#control-analyze-concurrency for the meaning of each setting
   226  build-stats-concurrency = 20
   227  distsql-scan-concurrency = 15
   228  index-serial-scan-concurrency = 20
   229  # checksum-table-concurrency controls the maximum checksum table tasks to run concurrently.
   230  checksum-table-concurrency = 2
   231  
   232  # specifies certificates and keys for TLS-enabled MySQL connections.
   233  # defaults to a copy of the [security] section.
   234  #[tidb.security]
   235  # public certificate of the CA. Set to empty string to disable TLS.
   236  # ca-path = "/path/to/ca.pem"
   237  # public certificate of this service. Default to copy of `security.cert-path`
   238  # cert-path = "/path/to/lightning.pem"
   239  # private key of this service. Default to copy of `security.key-path`
   240  # key-path = "/path/to/lightning.key"
   241  
   242  # post-restore provide some options which will be executed after all kv data has been imported into the tikv cluster.
   243  # the execution order are(if set true): checksum -> analyze
   244  [post-restore]
   245  # config whether to do `ADMIN CHECKSUM TABLE <table>` after restore finished for each table.
   246  # valid options:
   247  # - "off". do not do checksum.
   248  # - "optional". do execute admin checksum, but will ignore any error if checksum fails.
   249  # - "required". default option. do execute admin checksum, if checksum fails, lightning will exit with failure.
   250  # NOTE: for backward compatibility, bool values `true` and `false` is also allowed for this field. `true` is
   251  # equivalent to "required" and `false` is equivalent to "off".
   252  checksum = "required"
   253  # if set true, analyze will do `ANALYZE TABLE <table>` for each table.
   254  # the config options is the same as 'post-restore.checksum'.
   255  analyze = "optional"
   256  # if set to true, compact will do level 1 compaction to tikv data.
   257  # if this setting is missing, the default value is false.
   258  level-1-compact = false
   259  # if set true, compact will do full compaction to tikv data.
   260  # if this setting is missing, the default value is false.
   261  compact = false
   262  # if set to true, lightning will run checksum and analyze for all tables together at last
   263  post-process-at-last = true
   264  
   265  # cron performs some periodic actions in background
   266  [cron]
   267  # duration between which Lightning will automatically refresh the import mode status.
   268  # should be shorter than the corresponding TiKV setting
   269  switch-mode = "5m"
   270  # the duration which the an import progress will be printed to the log.
   271  log-progress = "5m"
   272  # the duration which tikv-importer.sorted-kv-dir-capacity is checked.
   273  check-disk-quota = "1m"