github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/sink/codec/schema_registry.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package codec 15 16 import ( 17 "bytes" 18 "context" 19 "encoding/json" 20 "io/ioutil" 21 "net/http" 22 "net/url" 23 "regexp" 24 "strings" 25 "sync" 26 "time" 27 28 "github.com/cenkalti/backoff" 29 "github.com/linkedin/goavro/v2" 30 "github.com/pingcap/errors" 31 "github.com/pingcap/log" 32 "github.com/pingcap/ticdc/cdc/model" 33 cerror "github.com/pingcap/ticdc/pkg/errors" 34 "github.com/pingcap/ticdc/pkg/httputil" 35 "github.com/pingcap/ticdc/pkg/security" 36 "go.uber.org/zap" 37 ) 38 39 // AvroSchemaManager is used to register Avro Schemas to the Registry server, 40 // look up local cache according to the table's name, and fetch from the Registry 41 // in cache the local cache entry is missing. 42 type AvroSchemaManager struct { 43 registryURL string 44 subjectSuffix string 45 46 credential *security.Credential 47 48 cacheRWLock sync.RWMutex 49 cache map[string]*schemaCacheEntry 50 } 51 52 type schemaCacheEntry struct { 53 tiSchemaID uint64 54 registryID int 55 codec *goavro.Codec 56 } 57 58 type registerRequest struct { 59 Schema string `json:"schema"` 60 // Commented out for compatibility with Confluent 5.4.x 61 // SchemaType string `json:"schemaType"` 62 } 63 64 type registerResponse struct { 65 ID int `json:"id"` 66 } 67 68 type lookupResponse struct { 69 Name string `json:"name"` 70 RegistryID int `json:"id"` 71 Schema string `json:"schema"` 72 } 73 74 // NewAvroSchemaManager creates a new AvroSchemaManager 75 func NewAvroSchemaManager( 76 ctx context.Context, credential *security.Credential, registryURL string, subjectSuffix string, 77 ) (*AvroSchemaManager, error) { 78 registryURL = strings.TrimRight(registryURL, "/") 79 // Test connectivity to the Schema Registry 80 req, err := http.NewRequestWithContext(ctx, "GET", registryURL, nil) 81 if err != nil { 82 return nil, cerror.WrapError(cerror.ErrAvroSchemaAPIError, err) 83 } 84 httpCli, err := httputil.NewClient(credential) 85 if err != nil { 86 return nil, errors.Trace(err) 87 } 88 resp, err := httpCli.Do(req) 89 if err != nil { 90 return nil, errors.Annotate( 91 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Test connection to Schema Registry failed") 92 } 93 defer resp.Body.Close() 94 95 text, err := ioutil.ReadAll(resp.Body) 96 if err != nil { 97 return nil, errors.Annotate( 98 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Reading response from Schema Registry failed") 99 } 100 101 if string(text[:]) != "{}" { 102 return nil, cerror.ErrAvroSchemaAPIError.GenWithStack("Unexpected response from Schema Registry") 103 } 104 105 log.Info("Successfully tested connectivity to Schema Registry", zap.String("registryURL", registryURL)) 106 107 return &AvroSchemaManager{ 108 registryURL: registryURL, 109 cache: make(map[string]*schemaCacheEntry, 1), 110 subjectSuffix: subjectSuffix, 111 credential: credential, 112 }, nil 113 } 114 115 var regexRemoveSpaces = regexp.MustCompile(`\s`) 116 117 // Register the latest schema for a table to the Registry, by passing in a Codec 118 // Returns the Schema's ID and err 119 func (m *AvroSchemaManager) Register(ctx context.Context, tableName model.TableName, codec *goavro.Codec) (int, error) { 120 // The Schema Registry expects the JSON to be without newline characters 121 reqBody := registerRequest{ 122 Schema: regexRemoveSpaces.ReplaceAllString(codec.Schema(), ""), 123 // Commented out for compatibility with Confluent 5.4.x 124 // SchemaType: "AVRO", 125 } 126 payload, err := json.Marshal(&reqBody) 127 if err != nil { 128 return 0, errors.Annotate( 129 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Could not marshal request to the Registry") 130 } 131 uri := m.registryURL + "/subjects/" + url.QueryEscape(m.tableNameToSchemaSubject(tableName)) + "/versions" 132 log.Debug("Registering schema", zap.String("uri", uri), zap.ByteString("payload", payload)) 133 134 req, err := http.NewRequestWithContext(ctx, "POST", uri, bytes.NewReader(payload)) 135 if err != nil { 136 return 0, cerror.ErrAvroSchemaAPIError.GenWithStackByArgs() 137 } 138 req.Header.Add("Accept", "application/vnd.schemaregistry.v1+json") 139 resp, err := httpRetry(ctx, m.credential, req, false) 140 if err != nil { 141 return 0, err 142 } 143 defer resp.Body.Close() 144 145 body, err := ioutil.ReadAll(resp.Body) 146 if err != nil { 147 return 0, errors.Annotate(err, "Failed to read response from Registry") 148 } 149 150 if resp.StatusCode != 200 { 151 log.Warn("Failed to register schema to the Registry, HTTP error", 152 zap.Int("status", resp.StatusCode), 153 zap.String("uri", uri), 154 zap.ByteString("requestBody", payload), 155 zap.ByteString("responseBody", body)) 156 return 0, cerror.ErrAvroSchemaAPIError.GenWithStack("Failed to register schema to the Registry, HTTP error") 157 } 158 159 var jsonResp registerResponse 160 err = json.Unmarshal(body, &jsonResp) 161 162 if err != nil { 163 return 0, errors.Annotate( 164 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Failed to parse result from Registry") 165 } 166 167 if jsonResp.ID == 0 { 168 return 0, cerror.ErrAvroSchemaAPIError.GenWithStack("Illegal schema ID returned from Registry %d", jsonResp.ID) 169 } 170 171 log.Info("Registered schema successfully", 172 zap.Int("id", jsonResp.ID), 173 zap.String("uri", uri), 174 zap.ByteString("body", body)) 175 176 return jsonResp.ID, nil 177 } 178 179 // Lookup the latest schema and the Registry designated ID for that schema. 180 // TiSchemaId is only used to trigger fetching from the Registry server. 181 // Calling this method with a tiSchemaID other than that used last time will invariably trigger a RESTful request to the Registry. 182 // Returns (codec, registry schema ID, error) 183 // NOT USED for now, reserved for future use. 184 func (m *AvroSchemaManager) Lookup(ctx context.Context, tableName model.TableName, tiSchemaID uint64) (*goavro.Codec, int, error) { 185 key := m.tableNameToSchemaSubject(tableName) 186 m.cacheRWLock.RLock() 187 if entry, exists := m.cache[key]; exists && entry.tiSchemaID == tiSchemaID { 188 log.Info("Avro schema lookup cache hit", 189 zap.String("key", key), 190 zap.Uint64("tiSchemaID", tiSchemaID), 191 zap.Int("registryID", entry.registryID)) 192 m.cacheRWLock.RUnlock() 193 return entry.codec, entry.registryID, nil 194 } 195 m.cacheRWLock.RUnlock() 196 197 log.Info("Avro schema lookup cache miss", 198 zap.String("key", key), 199 zap.Uint64("tiSchemaID", tiSchemaID)) 200 201 uri := m.registryURL + "/subjects/" + url.QueryEscape(m.tableNameToSchemaSubject(tableName)) + "/versions/latest" 202 log.Debug("Querying for latest schema", zap.String("uri", uri)) 203 204 req, err := http.NewRequestWithContext(ctx, "GET", uri, nil) 205 if err != nil { 206 return nil, 0, errors.Annotate( 207 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Error constructing request for Registry lookup") 208 } 209 req.Header.Add("Accept", "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json") 210 211 resp, err := httpRetry(ctx, m.credential, req, true) 212 if err != nil { 213 return nil, 0, err 214 } 215 defer resp.Body.Close() 216 217 body, err := ioutil.ReadAll(resp.Body) 218 if err != nil { 219 return nil, 0, errors.Annotate( 220 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Failed to read response from Registry") 221 } 222 223 if resp.StatusCode != 200 && resp.StatusCode != 404 { 224 log.Warn("Failed to query schema from the Registry, HTTP error", 225 zap.Int("status", resp.StatusCode), 226 zap.String("uri", uri), 227 228 zap.ByteString("responseBody", body)) 229 return nil, 0, cerror.ErrAvroSchemaAPIError.GenWithStack("Failed to query schema from the Registry, HTTP error") 230 } 231 232 if resp.StatusCode == 404 { 233 log.Warn("Specified schema not found in Registry", 234 zap.String("key", key), 235 zap.Uint64("tiSchemaID", tiSchemaID)) 236 237 return nil, 0, cerror.ErrAvroSchemaAPIError.GenWithStackByArgs("Schema not found in Registry") 238 } 239 240 var jsonResp lookupResponse 241 err = json.Unmarshal(body, &jsonResp) 242 if err != nil { 243 return nil, 0, errors.Annotate( 244 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Failed to parse result from Registry") 245 } 246 247 cacheEntry := new(schemaCacheEntry) 248 cacheEntry.codec, err = goavro.NewCodec(jsonResp.Schema) 249 if err != nil { 250 return nil, 0, errors.Annotate( 251 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Creating Avro codec failed") 252 } 253 cacheEntry.registryID = jsonResp.RegistryID 254 cacheEntry.tiSchemaID = tiSchemaID 255 256 m.cacheRWLock.Lock() 257 m.cache[m.tableNameToSchemaSubject(tableName)] = cacheEntry 258 m.cacheRWLock.Unlock() 259 260 log.Info("Avro schema lookup successful with cache miss", 261 zap.Uint64("tiSchemaID", cacheEntry.tiSchemaID), 262 zap.Int("registryID", cacheEntry.registryID), 263 zap.String("schema", cacheEntry.codec.Schema())) 264 265 return cacheEntry.codec, cacheEntry.registryID, nil 266 } 267 268 // SchemaGenerator represents a function that returns an Avro schema in JSON. 269 // Used for lazy evaluation 270 type SchemaGenerator func() (string, error) 271 272 // GetCachedOrRegister checks if the suitable Avro schema has been cached. 273 // If not, a new schema is generated, registered and cached. 274 func (m *AvroSchemaManager) GetCachedOrRegister(ctx context.Context, tableName model.TableName, tiSchemaID uint64, schemaGen SchemaGenerator) (*goavro.Codec, int, error) { 275 key := m.tableNameToSchemaSubject(tableName) 276 m.cacheRWLock.RLock() 277 if entry, exists := m.cache[key]; exists && entry.tiSchemaID == tiSchemaID { 278 log.Debug("Avro schema GetCachedOrRegister cache hit", 279 zap.String("key", key), 280 zap.Uint64("tiSchemaID", tiSchemaID), 281 zap.Int("registryID", entry.registryID)) 282 m.cacheRWLock.RUnlock() 283 return entry.codec, entry.registryID, nil 284 } 285 m.cacheRWLock.RUnlock() 286 287 log.Info("Avro schema lookup cache miss", 288 zap.String("key", key), 289 zap.Uint64("tiSchemaID", tiSchemaID)) 290 291 schema, err := schemaGen() 292 if err != nil { 293 return nil, 0, errors.Annotate(err, "GetCachedOrRegister: SchemaGen failed") 294 } 295 296 codec, err := goavro.NewCodec(schema) 297 if err != nil { 298 return nil, 0, errors.Annotate( 299 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "GetCachedOrRegister: Could not make goavro codec") 300 } 301 302 id, err := m.Register(ctx, tableName, codec) 303 if err != nil { 304 return nil, 0, errors.Annotate( 305 cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "GetCachedOrRegister: Could not register schema") 306 } 307 308 cacheEntry := new(schemaCacheEntry) 309 cacheEntry.codec = codec 310 cacheEntry.registryID = id 311 cacheEntry.tiSchemaID = tiSchemaID 312 313 m.cacheRWLock.Lock() 314 m.cache[m.tableNameToSchemaSubject(tableName)] = cacheEntry 315 m.cacheRWLock.Unlock() 316 317 log.Info("Avro schema GetCachedOrRegister successful with cache miss", 318 zap.Uint64("tiSchemaID", cacheEntry.tiSchemaID), 319 zap.Int("registryID", cacheEntry.registryID), 320 zap.String("schema", cacheEntry.codec.Schema())) 321 322 return codec, id, nil 323 } 324 325 // ClearRegistry clears the Registry subject for the given table. Should be idempotent. 326 // Exported for testing. 327 // NOT USED for now, reserved for future use. 328 func (m *AvroSchemaManager) ClearRegistry(ctx context.Context, tableName model.TableName) error { 329 uri := m.registryURL + "/subjects/" + url.QueryEscape(m.tableNameToSchemaSubject(tableName)) 330 req, err := http.NewRequestWithContext(ctx, "DELETE", uri, nil) 331 if err != nil { 332 log.Error("Could not construct request for clearRegistry", zap.String("uri", uri)) 333 return cerror.WrapError(cerror.ErrAvroSchemaAPIError, err) 334 } 335 req.Header.Add("Accept", "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json") 336 resp, err := httpRetry(ctx, m.credential, req, true) 337 if err != nil { 338 return err 339 } 340 341 if resp.StatusCode == 200 { 342 log.Info("Clearing Registry successful") 343 return nil 344 } 345 346 if resp.StatusCode == 404 { 347 log.Info("Registry already cleaned") 348 return nil 349 } 350 351 log.Error("Error when clearing Registry", zap.Int("status", resp.StatusCode)) 352 return cerror.ErrAvroSchemaAPIError.GenWithStack("Error when clearing Registry, status = %d", resp.StatusCode) 353 } 354 355 func httpRetry(ctx context.Context, credential *security.Credential, r *http.Request, allow404 bool) (*http.Response, error) { 356 var ( 357 err error 358 resp *http.Response 359 data []byte 360 ) 361 362 expBackoff := backoff.NewExponentialBackOff() 363 expBackoff.MaxInterval = time.Second * 30 364 httpCli, err := httputil.NewClient(credential) 365 366 if r.Body != nil { 367 data, err = ioutil.ReadAll(r.Body) 368 _ = r.Body.Close() 369 } 370 371 if err != nil { 372 return nil, cerror.WrapError(cerror.ErrAvroSchemaAPIError, err) 373 } 374 for { 375 if data != nil { 376 r.Body = ioutil.NopCloser(bytes.NewReader(data)) 377 } 378 resp, err = httpCli.Do(r) 379 380 if err != nil { 381 log.Warn("HTTP request failed", zap.String("msg", err.Error())) 382 goto checkCtx 383 } 384 385 if resp.StatusCode >= 200 && resp.StatusCode < 300 || (resp.StatusCode == 404 && allow404) { 386 break 387 } 388 log.Warn("HTTP server returned with error", zap.Int("status", resp.StatusCode)) 389 _ = resp.Body.Close() 390 391 checkCtx: 392 select { 393 case <-ctx.Done(): 394 return nil, errors.New("HTTP retry cancelled") 395 396 default: 397 } 398 399 time.Sleep(expBackoff.NextBackOff()) 400 } 401 402 return resp, nil 403 } 404 405 func (m *AvroSchemaManager) tableNameToSchemaSubject(tableName model.TableName) string { 406 // We should guarantee unique names for subjects 407 return tableName.Schema + "_" + tableName.Table + m.subjectSuffix 408 }