github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/tidb-lightning.toml

github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/tidb-lightning.toml (about)

     1  ### tidb-lightning configuartion
     2  [lightning]
     3  
     4  # Listening address for the HTTP server (set to empty string to disable).
     5  # The server is responsible for the web interface, submitting import tasks,
     6  # serving Prometheus metrics and exposing debug profiling data.
     7  status-addr = ":8289"
     8  
     9  # Toggle server mode.
    10  # If "false", running Lightning will immediately start the import job, and exits
    11  # after the job is finished.
    12  # If "true", running Lightning will wait for user to submit tasks, via the HTTP API
    13  # (`curl http://lightning-ip:8289/tasks --data-binary @tidb-lightning.toml`).
    14  # The program will keep running and waiting for more tasks, until receiving the SIGINT signal.
    15  server-mode = false
    16  
    17  # check if the cluster satisfies the minimum requirement before starting
    18  # check-requirements = true
    19  
    20  # index-concurrency controls the maximum handled index concurrently while reading Mydumper SQL files. It can affect the tikv-importer disk usage.
    21  index-concurrency = 2
    22  # table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files. It can affect the tikv-importer memory usage.
    23  table-concurrency = 6
    24  # region-concurrency changes the concurrency number of data. It is set to the number of logical CPU cores by default and needs no configuration.
    25  # In mixed configuration, you can set it to 75% of the size of logical CPU cores.
    26  # region-concurrency default to runtime.NumCPU()
    27  # region-concurrency =
    28  # io-concurrency controls the maximum IO concurrency
    29  # Excessive IO concurrency causes an increase in IO latency because the disk
    30  # internal buffer is frequently refreshed causing a cache miss. For different
    31  # disk media, concurrency has different effects on IO latency, which can be
    32  # adjusted according to monitoring.
    33  # Ref: https://en.wikipedia.org/wiki/Disk_buffer#Read-ahead/read-behind
    34  # io-concurrency = 5
    35  # meta-schema-name is (database name) to store lightning task and table metadata.
    36  # the meta schema and tables is store in target tidb cluster.
    37  # this config is only used in "local" and "importer" backend.
    38  # meta-schema-name = "lightning_metadata"
    39  
    40  # logging
    41  level = "info"
    42  # file path for log. If set to empty, log will be written to /tmp/lightning.log.{timestamp}
    43  # Set to "-" to write logs to stdout.
    44  file = "tidb-lightning.log"
    45  max-size = 128 # MB
    46  max-days = 28
    47  max-backups = 14
    48  
    49  [security]
    50  # specifies certificates and keys for TLS connections within the cluster.
    51  # public certificate of the CA. Leave empty to disable TLS.
    52  # ca-path = "/path/to/ca.pem"
    53  # public certificate of this service.
    54  # cert-path = "/path/to/lightning.pem"
    55  # private key of this service.
    56  # key-path = "/path/to/lightning.key"
    57  # If set to true, lightning will redact sensitive infomation in log.
    58  # redact-info-log = false
    59  
    60  [checkpoint]
    61  # Whether to enable checkpoints.
    62  # While importing, Lightning will record which tables have been imported, so even if Lightning or other component
    63  # crashed, we could start from a known good state instead of redoing everything.
    64  enable = true
    65  # The schema name (database name) to store the checkpoints
    66  schema = "tidb_lightning_checkpoint"
    67  # Where to store the checkpoints.
    68  # Set to "file" to store as a local file.
    69  # Set to "mysql" to store into a remote MySQL-compatible database
    70  driver = "file"
    71  # The data source name (DSN) indicating the location of the checkpoint storage.
    72  # For "file" driver, the DSN is a path. If not specified, Lightning would default to "/tmp/CHKPTSCHEMA.pb".
    73  # For "mysql" driver, the DSN is a URL in the form "USER:PASS@tcp(HOST:PORT)/".
    74  # If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints.
    75  #dsn = "/tmp/tidb_lightning_checkpoint.pb"
    76  # Whether to keep the checkpoints after all data are imported. If false, the checkpoints will be deleted. The schema
    77  # needs to be dropped manually, however.
    78  #keep-after-success = false
    79  
    80  [tikv-importer]
    81  # Delivery backend, can be "importer", "local" or "tidb".
    82  backend = "importer"
    83  # Address of tikv-importer when the backend is 'importer'
    84  addr = "127.0.0.1:8287"
    85  # What to do on duplicated record (unique key conflict) when the backend is 'tidb'. Possible values are:
    86  #  - replace: replace the old record by the new record (i.e. insert rows using "REPLACE INTO")
    87  #  - ignore: keep the old record and ignore the new record (i.e. insert rows using "INSERT IGNORE INTO")
    88  #  - error: stop Lightning and report an error (i.e. insert rows using "INSERT INTO")
    89  #on-duplicate = "replace"
    90  # Maximum KV size of SST files produced in the 'local' backend. This should be the same as
    91  # the TiKV region size to avoid further region splitting. The default value is 96 MiB.
    92  #region-split-size = '96MiB'
    93  # write key-values pairs to tikv batch size
    94  #send-kv-pairs = 32768
    95  # local storage directory used in "local" backend.
    96  #sorted-kv-dir = ""
    97  # Maximum size of the local storage directory. Periodically, Lightning will check if the total storage size exceeds this
    98  # value. If so the "local" backend will block and immediately ingest the largest engines into the target TiKV until the
    99  # usage falls below the specified capacity.
   100  # Note that the disk-quota IS NOT A HARD LIMIT. There are chances that the usage overshoots the quota before it was
   101  # detected. The overshoot is up to 6.3 GiB in default settings (8 open engines, 40 region-concurrency, check quota every
   102  # minute).
   103  # Setting the disk quota too low may cause engines to overlap each other too much and slow down import.
   104  # This setting is ignored in "tidb" and "importer" backends.
   105  # The default value of 0 means letting Lightning to automatically pick an appropriate capacity using the free disk space
   106  # of sorted-kv-dir, subtracting the overshoot.
   107  #disk-quota = 0
   108  # range-concurrency controls the maximum ingest concurrently while writing to tikv, It can affect the network traffic.
   109  # this default config can make full use of a 10Gib bandwidth network, if the network bandwidth is higher, you can increase
   110  # this to gain better performance. Larger value will also increase the memory usage slightly.
   111  #range-concurrency = 16
   112  # The memory cache used in local backend for each engine. The memory usage during write-KV phase by the engines is bound
   113  # by (index-concurrency + table-concurrency) * engine-mem-cache-size.
   114  #engine-mem-cache-size = '512MiB'
   115  # The memory cache used in for local sorting during the encode-KV phase before flushing into the engines. The memory
   116  # usage is bound by region-concurrency * local-writer-mem-cache-size.
   117  #local-writer-mem-cache-size = '128MiB'
   118  
   119  [mydumper]
   120  # block size of file reading
   121  read-block-size = '64KiB'
   122  # minimum size (in terms of source data file) of each batch of import.
   123  # Lightning will split a large table into multiple engine files according to this size.
   124  #batch-size = '100GiB'
   125  
   126  # Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be
   127  # imported nearly the same time, and this will create a queue and this wastes resources. Therefore,
   128  # Lightning will slightly increase the size of the first few batches to properly distribute
   129  # resources. The scale up is controlled by this parameter, which expresses the ratio of duration
   130  # between the "import" and "write" steps with full concurrency. This can be calculated as the ratio
   131  # (import duration / write duration) of a single table of size around 1 GB. The exact timing can be
   132  # found in the log. If "import" is faster, the batch size anomaly is smaller, and a ratio of
   133  # zero means uniform batch size. This value should be in the range (0 <= batch-import-ratio < 1).
   134  batch-import-ratio = 0.75
   135  
   136  # mydumper local source data directory
   137  data-source-dir = "/tmp/export-20180328-200751"
   138  # if no-schema is set true, lightning will get schema information from tidb-server directly without creating them.
   139  no-schema=false
   140  # the character set of the schema files; only supports one of:
   141  #  - utf8mb4: the schema files must be encoded as UTF-8, otherwise will emit errors
   142  #  - gb18030: the schema files must be encoded as GB-18030, otherwise will emit errors
   143  #  - auto:    (default) automatically detect if the schema is UTF-8 or GB-18030, error if the encoding is neither
   144  #  - binary:  do not try to decode the schema files
   145  # note that the *data* files are always parsed as binary regardless of schema encoding.
   146  #character-set = "auto"
   147  
   148  # make table and database names case-sensitive, i.e. treats `DB`.`TBL` and `db`.`tbl` as two
   149  # different objects. Currently only affects [[routes]].
   150  case-sensitive = false
   151  
   152  # if strict-format is ture, lightning will use '\r' and '\n' to determine the end of each line. Make sure your data
   153  # doesn't contain '\r' or '\n' if strict-format is enabled, or csv parser may parse incorrect result.
   154  strict-format = false
   155  # if strict-format is true, large CSV files will be split to multiple chunks, which Lightning
   156  # will restore in parallel. The size of each chunk is `max-region-size`, where the default is 256 MiB.
   157  #max-region-size = '256MiB'
   158  
   159  # enable file router to use the default rules. By default, it will be set to true if no `mydumper.files`
   160  # rule is provided, else false. You can explicitly set it to `true` to enable the default rules, they will
   161  # take effect on files that on other rules are match.
   162  # The default file routing rules' behavior is the same as former versions without this conf, that is:
   163  #   {schema}-schema-create.sql --> schema create sql file
   164  #   {schema}.{table}-schema.sql --> table schema sql file
   165  #   {schema}.{table}.{0001}.{sql|csv|parquet} --> data source file
   166  #   *-schema-view.sql, *-schema-trigger.sql, *-schema-post.sql --> ignore all the sql files end with these pattern
   167  #default-file-rules = false
   168  
   169  # only import tables if the wildcard rules are matched. See documention for details.
   170  filter = ['*.*', '!mysql.*', '!sys.*', '!INFORMATION_SCHEMA.*', '!PERFORMANCE_SCHEMA.*', '!METRICS_SCHEMA.*', '!INSPECTION_SCHEMA.*']
   171  
   172  # CSV files are imported according to MySQL's LOAD DATA INFILE rules.
   173  [mydumper.csv]
   174  # separator between fields, can be one or more characters but empty. The value can
   175  # not be prefix of `delimiter`.
   176  separator = ','
   177  # string delimiter, can either be one or more characters or empty string. If not empty,
   178  # the value should not be prefix of `separator`
   179  delimiter = '"'
   180  # row terminator, can be an empty string or not.
   181  # An empty string means both \r and \n are considered a terminator. This is the normal CSV behavior.
   182  # A non-empty string means the row ends only when such terminator is matched exactly (or reaching the end of file).
   183  # If the file content matches both the terminator and separator, the terminator takes precedence.
   184  terminator = ''
   185  # whether the CSV files contain a header. If true, the first line will be skipped
   186  header = true
   187  # whether the CSV contains any NULL value. If true, all columns from CSV cannot be NULL.
   188  not-null = false
   189  # if non-null = false (i.e. CSV can contain NULL), fields equal to this value will be treated as NULL
   190  null = '\N'
   191  # whether to interpret backslash-escape inside strings.
   192  backslash-escape = true
   193  # if a line ends with a separator, remove it.
   194  # deprecated - consider using the terminator option instead.
   195  #trim-last-separator = false
   196  
   197  # file level routing rule that map file path to schema,table,type,sort-key
   198  # The schema, table , type and key can be either a constant string or template strings
   199  # supported by go regexp.
   200  #[[mydumper.files]]
   201  # pattern and path determine target source files, you can use either of them but not both.
   202  # pattern is a regexp in Go syntax that can match one or more files in `source-dir`.
   203  #pattern = '(?i)^(?:[^/]*/)(?P<schema>[^/.]+)\.([^/.]+)(?:\.([0-9]+))?\.(sql|csv)$'
   204  # path is the target file path, both absolute file path or relative path to `mydump.source-dir` are supported.
   205  # the path separator is always converted to '/', regardless of operating system.
   206  #path = "schema_name.table_name.00001.sql"
   207  # schema(database) name
   208  #schema = "$schema"
   209  # table name
   210  #table = "$2"
   211  # file type, can be one of schema-schema, table-schema, sql, csv
   212  #type = "$4"
   213  # an arbitrary string used to maintain the sort order among the files for row ID allocation and checkpoint resumption
   214  #key = "$3"
   215  
   216  # configuration for tidb server address(one is enough) and pd server address(one is enough).
   217  [tidb]
   218  host = "127.0.0.1"
   219  port = 4000
   220  user = "root"
   221  password = ""
   222  # table schema information is fetched from tidb via this status-port.
   223  status-port = 10080
   224  pd-addr = "127.0.0.1:2379"
   225  # lightning uses some code of tidb(used as library), and the flag controls it's log level.
   226  log-level = "error"
   227  
   228  # sets maximum packet size allowed for SQL connections.
   229  # set this to 0 to automatically fetch the `max_allowed_packet` variable from server on every connection.
   230  # max-allowed-packet = 67_108_864
   231  
   232  # whether to use TLS for SQL connections. valid values are:
   233  #  * ""            - force TLS (same as "cluster") if [tidb.security] section is populated, otherwise same as "false"
   234  #  * "false"       - disable TLS
   235  #  * "cluster"     - force TLS and verify the server's certificate with the CA specified in the [tidb.security] section
   236  #  * "skip-verify" - force TLS but do not verify the server's certificate (insecure!)
   237  #  * "preferred"   - same as "skip-verify", but if the server does not support TLS, fallback to unencrypted connection
   238  # tls = ""
   239  
   240  # set tidb session variables to speed up checksum/analyze table.
   241  # see https://pingcap.com/docs/sql/statistics/#control-analyze-concurrency for the meaning of each setting
   242  build-stats-concurrency = 20
   243  distsql-scan-concurrency = 15
   244  index-serial-scan-concurrency = 20
   245  # checksum-table-concurrency controls the maximum checksum table tasks to run concurrently.
   246  checksum-table-concurrency = 2
   247  
   248  # specifies certificates and keys for TLS-enabled MySQL connections.
   249  # defaults to a copy of the [security] section.
   250  #[tidb.security]
   251  # public certificate of the CA. Set to empty string to disable TLS.
   252  # ca-path = "/path/to/ca.pem"
   253  # public certificate of this service. Default to copy of `security.cert-path`
   254  # cert-path = "/path/to/lightning.pem"
   255  # private key of this service. Default to copy of `security.key-path`
   256  # key-path = "/path/to/lightning.key"
   257  
   258  # post-restore provide some options which will be executed after all kv data has been imported into the tikv cluster.
   259  # the execution order are(if set true): checksum -> analyze
   260  [post-restore]
   261  # config whether to do `ADMIN CHECKSUM TABLE <table>` after restore finished for each table.
   262  # valid options:
   263  # - "off". do not do checksum.
   264  # - "optional". do execute admin checksum, but will ignore any error if checksum fails.
   265  # - "required". default option. do execute admin checksum, if checksum fails, lightning will exit with failure.
   266  # NOTE: for backward compatibility, bool values `true` and `false` is also allowed for this field. `true` is
   267  # equivalent to "required" and `false` is equivalent to "off".
   268  checksum = "required"
   269  # if set true, analyze will do `ANALYZE TABLE <table>` for each table.
   270  # the config options is the same as 'post-restore.checksum'.
   271  analyze = "optional"
   272  # if set to true, compact will do level 1 compaction to tikv data.
   273  # if this setting is missing, the default value is false.
   274  level-1-compact = false
   275  # if set true, compact will do full compaction to tikv data.
   276  # if this setting is missing, the default value is false.
   277  compact = false
   278  # if set to true, lightning will run checksum and analyze for all tables together at last
   279  post-process-at-last = true
   280  
   281  # cron performs some periodic actions in background
   282  [cron]
   283  # duration between which Lightning will automatically refresh the import mode status.
   284  # should be shorter than the corresponding TiKV setting
   285  switch-mode = "5m"
   286  # the duration which the an import progress will be printed to the log.
   287  log-progress = "5m"
   288  # the duration which tikv-importer.sorted-kv-dir-capacity is checked.
   289  check-disk-quota = "1m"