github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/cloudstorage/config.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cloudstorage 15 16 import ( 17 "context" 18 "fmt" 19 "net/http" 20 "net/url" 21 "strings" 22 "time" 23 24 "github.com/gin-gonic/gin/binding" 25 "github.com/imdario/mergo" 26 "github.com/pingcap/log" 27 "github.com/pingcap/tiflow/pkg/config" 28 cerror "github.com/pingcap/tiflow/pkg/errors" 29 psink "github.com/pingcap/tiflow/pkg/sink" 30 "github.com/pingcap/tiflow/pkg/util" 31 "go.uber.org/zap" 32 ) 33 34 const ( 35 // defaultWorkerCount is the default value of worker-count. 36 defaultWorkerCount = 16 37 // the upper limit of worker-count. 38 maxWorkerCount = 512 39 // defaultFlushInterval is the default value of flush-interval. 40 defaultFlushInterval = 5 * time.Second 41 // the lower limit of flush-interval. 42 minFlushInterval = 2 * time.Second 43 // the upper limit of flush-interval. 44 maxFlushInterval = 10 * time.Minute 45 // defaultFlushConcurrency is the default value of flush-concurrency. 46 defaultFlushConcurrency = 1 47 // the lower limit of flush-concurrency. 48 minFlushConcurrency = 1 49 // the upper limit of flush-concurrency. 50 maxFlushConcurrency = 512 51 // defaultFileSize is the default value of file-size. 52 defaultFileSize = 64 * 1024 * 1024 53 // the lower limit of file size 54 minFileSize = 1024 * 1024 55 // the upper limit of file size 56 maxFileSize = 512 * 1024 * 1024 57 58 // disable file cleanup by default 59 defaultFileExpirationDays = 0 60 // Second | Minute | Hour | Dom | Month | DowOptional 61 // `0 0 2 * * ?` means 2:00:00 AM every day 62 defaultFileCleanupCronSpec = "0 0 2 * * *" 63 ) 64 65 type urlConfig struct { 66 WorkerCount *int `form:"worker-count"` 67 FlushInterval *string `form:"flush-interval"` 68 FileSize *int `form:"file-size"` 69 } 70 71 // Config is the configuration for cloud storage sink. 72 type Config struct { 73 WorkerCount int 74 FlushInterval time.Duration 75 FileSize int 76 FileIndexWidth int 77 DateSeparator string 78 FileExpirationDays int 79 FileCleanupCronSpec string 80 EnablePartitionSeparator bool 81 OutputColumnID bool 82 FlushConcurrency int 83 } 84 85 // NewConfig returns the default cloud storage sink config. 86 func NewConfig() *Config { 87 return &Config{ 88 WorkerCount: defaultWorkerCount, 89 FlushInterval: defaultFlushInterval, 90 FileSize: defaultFileSize, 91 FileExpirationDays: defaultFileExpirationDays, 92 FileCleanupCronSpec: defaultFileCleanupCronSpec, 93 } 94 } 95 96 // Apply applies the sink URI parameters to the config. 97 func (c *Config) Apply( 98 ctx context.Context, 99 sinkURI *url.URL, 100 replicaConfig *config.ReplicaConfig, 101 ) (err error) { 102 if sinkURI == nil { 103 return cerror.ErrStorageSinkInvalidConfig.GenWithStack( 104 "failed to open cloud storage sink, empty SinkURI") 105 } 106 107 scheme := strings.ToLower(sinkURI.Scheme) 108 if !psink.IsStorageScheme(scheme) { 109 return cerror.ErrStorageSinkInvalidConfig.GenWithStack( 110 "can't create cloud storage sink with unsupported scheme: %s", scheme) 111 } 112 req := &http.Request{URL: sinkURI} 113 urlParameter := &urlConfig{} 114 if err := binding.Query.Bind(req, urlParameter); err != nil { 115 return cerror.WrapError(cerror.ErrStorageSinkInvalidConfig, err) 116 } 117 if urlParameter, err = mergeConfig(replicaConfig, urlParameter); err != nil { 118 return err 119 } 120 if err = getWorkerCount(urlParameter, &c.WorkerCount); err != nil { 121 return err 122 } 123 err = getFlushInterval(urlParameter, &c.FlushInterval) 124 if err != nil { 125 return err 126 } 127 err = getFileSize(urlParameter, &c.FileSize) 128 if err != nil { 129 return err 130 } 131 132 c.DateSeparator = util.GetOrZero(replicaConfig.Sink.DateSeparator) 133 c.EnablePartitionSeparator = util.GetOrZero(replicaConfig.Sink.EnablePartitionSeparator) 134 c.FileIndexWidth = util.GetOrZero(replicaConfig.Sink.FileIndexWidth) 135 if replicaConfig.Sink.CloudStorageConfig != nil { 136 c.OutputColumnID = util.GetOrZero(replicaConfig.Sink.CloudStorageConfig.OutputColumnID) 137 if replicaConfig.Sink.CloudStorageConfig.FileExpirationDays != nil { 138 c.FileExpirationDays = *replicaConfig.Sink.CloudStorageConfig.FileExpirationDays 139 } 140 if replicaConfig.Sink.CloudStorageConfig.FileCleanupCronSpec != nil { 141 c.FileCleanupCronSpec = *replicaConfig.Sink.CloudStorageConfig.FileCleanupCronSpec 142 } 143 c.FlushConcurrency = util.GetOrZero(replicaConfig.Sink.CloudStorageConfig.FlushConcurrency) 144 } 145 146 if c.FileIndexWidth < config.MinFileIndexWidth || c.FileIndexWidth > config.MaxFileIndexWidth { 147 c.FileIndexWidth = config.DefaultFileIndexWidth 148 } 149 if c.FlushConcurrency < minFlushConcurrency || c.FlushConcurrency > maxFlushConcurrency { 150 c.FlushConcurrency = defaultFlushConcurrency 151 } 152 153 return nil 154 } 155 156 func mergeConfig( 157 replicaConfig *config.ReplicaConfig, 158 urlParameters *urlConfig, 159 ) (*urlConfig, error) { 160 dest := &urlConfig{} 161 if replicaConfig.Sink != nil && replicaConfig.Sink.CloudStorageConfig != nil { 162 dest.WorkerCount = replicaConfig.Sink.CloudStorageConfig.WorkerCount 163 dest.FlushInterval = replicaConfig.Sink.CloudStorageConfig.FlushInterval 164 dest.FileSize = replicaConfig.Sink.CloudStorageConfig.FileSize 165 } 166 if err := mergo.Merge(dest, urlParameters, mergo.WithOverride); err != nil { 167 return nil, cerror.WrapError(cerror.ErrStorageSinkInvalidConfig, err) 168 } 169 return dest, nil 170 } 171 172 func getWorkerCount(values *urlConfig, workerCount *int) error { 173 if values.WorkerCount == nil { 174 return nil 175 } 176 177 c := *values.WorkerCount 178 if c <= 0 { 179 return cerror.WrapError(cerror.ErrStorageSinkInvalidConfig, 180 fmt.Errorf("invalid worker-count %d, it must be greater than 0", c)) 181 } 182 if c > maxWorkerCount { 183 log.Warn("worker-count is too large", 184 zap.Int("original", c), zap.Int("override", maxWorkerCount)) 185 c = maxWorkerCount 186 } 187 188 *workerCount = c 189 return nil 190 } 191 192 func getFlushInterval(values *urlConfig, flushInterval *time.Duration) error { 193 if values.FlushInterval == nil || len(*values.FlushInterval) == 0 { 194 return nil 195 } 196 197 d, err := time.ParseDuration(*values.FlushInterval) 198 if err != nil { 199 return cerror.WrapError(cerror.ErrStorageSinkInvalidConfig, err) 200 } 201 202 if d > maxFlushInterval { 203 log.Warn("flush-interval is too large", zap.Duration("original", d), 204 zap.Duration("override", maxFlushInterval)) 205 d = maxFlushInterval 206 } 207 if d < minFlushInterval { 208 log.Warn("flush-interval is too small", zap.Duration("original", d), 209 zap.Duration("override", minFlushInterval)) 210 d = minFlushInterval 211 } 212 213 *flushInterval = d 214 return nil 215 } 216 217 func getFileSize(values *urlConfig, fileSize *int) error { 218 if values.FileSize == nil { 219 return nil 220 } 221 222 sz := *values.FileSize 223 if sz > maxFileSize { 224 log.Warn("file-size is too large", 225 zap.Int("original", sz), zap.Int("override", maxFileSize)) 226 sz = maxFileSize 227 } 228 if sz < minFileSize { 229 log.Warn("file-size is too small", 230 zap.Int("original", sz), zap.Int("override", minFileSize)) 231 sz = minFileSize 232 } 233 *fileSize = sz 234 return nil 235 }