github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/sink/codec/schema_registry.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package codec
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"encoding/json"
    20  	"io/ioutil"
    21  	"net/http"
    22  	"net/url"
    23  	"regexp"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/cenkalti/backoff"
    29  	"github.com/linkedin/goavro/v2"
    30  	"github.com/pingcap/errors"
    31  	"github.com/pingcap/log"
    32  	"github.com/pingcap/ticdc/cdc/model"
    33  	cerror "github.com/pingcap/ticdc/pkg/errors"
    34  	"github.com/pingcap/ticdc/pkg/httputil"
    35  	"github.com/pingcap/ticdc/pkg/security"
    36  	"go.uber.org/zap"
    37  )
    38  
    39  // AvroSchemaManager is used to register Avro Schemas to the Registry server,
    40  // look up local cache according to the table's name, and fetch from the Registry
    41  // in cache the local cache entry is missing.
    42  type AvroSchemaManager struct {
    43  	registryURL   string
    44  	subjectSuffix string
    45  
    46  	credential *security.Credential
    47  
    48  	cacheRWLock sync.RWMutex
    49  	cache       map[string]*schemaCacheEntry
    50  }
    51  
    52  type schemaCacheEntry struct {
    53  	tiSchemaID uint64
    54  	registryID int
    55  	codec      *goavro.Codec
    56  }
    57  
    58  type registerRequest struct {
    59  	Schema string `json:"schema"`
    60  	// Commented out for compatibility with Confluent 5.4.x
    61  	// SchemaType string `json:"schemaType"`
    62  }
    63  
    64  type registerResponse struct {
    65  	ID int `json:"id"`
    66  }
    67  
    68  type lookupResponse struct {
    69  	Name       string `json:"name"`
    70  	RegistryID int    `json:"id"`
    71  	Schema     string `json:"schema"`
    72  }
    73  
    74  // NewAvroSchemaManager creates a new AvroSchemaManager
    75  func NewAvroSchemaManager(
    76  	ctx context.Context, credential *security.Credential, registryURL string, subjectSuffix string,
    77  ) (*AvroSchemaManager, error) {
    78  	registryURL = strings.TrimRight(registryURL, "/")
    79  	// Test connectivity to the Schema Registry
    80  	req, err := http.NewRequestWithContext(ctx, "GET", registryURL, nil)
    81  	if err != nil {
    82  		return nil, cerror.WrapError(cerror.ErrAvroSchemaAPIError, err)
    83  	}
    84  	httpCli, err := httputil.NewClient(credential)
    85  	if err != nil {
    86  		return nil, errors.Trace(err)
    87  	}
    88  	resp, err := httpCli.Do(req)
    89  	if err != nil {
    90  		return nil, errors.Annotate(
    91  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Test connection to Schema Registry failed")
    92  	}
    93  	defer resp.Body.Close()
    94  
    95  	text, err := ioutil.ReadAll(resp.Body)
    96  	if err != nil {
    97  		return nil, errors.Annotate(
    98  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Reading response from Schema Registry failed")
    99  	}
   100  
   101  	if string(text[:]) != "{}" {
   102  		return nil, cerror.ErrAvroSchemaAPIError.GenWithStack("Unexpected response from Schema Registry")
   103  	}
   104  
   105  	log.Info("Successfully tested connectivity to Schema Registry", zap.String("registryURL", registryURL))
   106  
   107  	return &AvroSchemaManager{
   108  		registryURL:   registryURL,
   109  		cache:         make(map[string]*schemaCacheEntry, 1),
   110  		subjectSuffix: subjectSuffix,
   111  		credential:    credential,
   112  	}, nil
   113  }
   114  
   115  var regexRemoveSpaces = regexp.MustCompile(`\s`)
   116  
   117  // Register the latest schema for a table to the Registry, by passing in a Codec
   118  // Returns the Schema's ID and err
   119  func (m *AvroSchemaManager) Register(ctx context.Context, tableName model.TableName, codec *goavro.Codec) (int, error) {
   120  	// The Schema Registry expects the JSON to be without newline characters
   121  	reqBody := registerRequest{
   122  		Schema: regexRemoveSpaces.ReplaceAllString(codec.Schema(), ""),
   123  		// Commented out for compatibility with Confluent 5.4.x
   124  		// SchemaType: "AVRO",
   125  	}
   126  	payload, err := json.Marshal(&reqBody)
   127  	if err != nil {
   128  		return 0, errors.Annotate(
   129  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Could not marshal request to the Registry")
   130  	}
   131  	uri := m.registryURL + "/subjects/" + url.QueryEscape(m.tableNameToSchemaSubject(tableName)) + "/versions"
   132  	log.Debug("Registering schema", zap.String("uri", uri), zap.ByteString("payload", payload))
   133  
   134  	req, err := http.NewRequestWithContext(ctx, "POST", uri, bytes.NewReader(payload))
   135  	if err != nil {
   136  		return 0, cerror.ErrAvroSchemaAPIError.GenWithStackByArgs()
   137  	}
   138  	req.Header.Add("Accept", "application/vnd.schemaregistry.v1+json")
   139  	resp, err := httpRetry(ctx, m.credential, req, false)
   140  	if err != nil {
   141  		return 0, err
   142  	}
   143  	defer resp.Body.Close()
   144  
   145  	body, err := ioutil.ReadAll(resp.Body)
   146  	if err != nil {
   147  		return 0, errors.Annotate(err, "Failed to read response from Registry")
   148  	}
   149  
   150  	if resp.StatusCode != 200 {
   151  		log.Warn("Failed to register schema to the Registry, HTTP error",
   152  			zap.Int("status", resp.StatusCode),
   153  			zap.String("uri", uri),
   154  			zap.ByteString("requestBody", payload),
   155  			zap.ByteString("responseBody", body))
   156  		return 0, cerror.ErrAvroSchemaAPIError.GenWithStack("Failed to register schema to the Registry, HTTP error")
   157  	}
   158  
   159  	var jsonResp registerResponse
   160  	err = json.Unmarshal(body, &jsonResp)
   161  
   162  	if err != nil {
   163  		return 0, errors.Annotate(
   164  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Failed to parse result from Registry")
   165  	}
   166  
   167  	if jsonResp.ID == 0 {
   168  		return 0, cerror.ErrAvroSchemaAPIError.GenWithStack("Illegal schema ID returned from Registry %d", jsonResp.ID)
   169  	}
   170  
   171  	log.Info("Registered schema successfully",
   172  		zap.Int("id", jsonResp.ID),
   173  		zap.String("uri", uri),
   174  		zap.ByteString("body", body))
   175  
   176  	return jsonResp.ID, nil
   177  }
   178  
   179  // Lookup the latest schema and the Registry designated ID for that schema.
   180  // TiSchemaId is only used to trigger fetching from the Registry server.
   181  // Calling this method with a tiSchemaID other than that used last time will invariably trigger a RESTful request to the Registry.
   182  // Returns (codec, registry schema ID, error)
   183  // NOT USED for now, reserved for future use.
   184  func (m *AvroSchemaManager) Lookup(ctx context.Context, tableName model.TableName, tiSchemaID uint64) (*goavro.Codec, int, error) {
   185  	key := m.tableNameToSchemaSubject(tableName)
   186  	m.cacheRWLock.RLock()
   187  	if entry, exists := m.cache[key]; exists && entry.tiSchemaID == tiSchemaID {
   188  		log.Info("Avro schema lookup cache hit",
   189  			zap.String("key", key),
   190  			zap.Uint64("tiSchemaID", tiSchemaID),
   191  			zap.Int("registryID", entry.registryID))
   192  		m.cacheRWLock.RUnlock()
   193  		return entry.codec, entry.registryID, nil
   194  	}
   195  	m.cacheRWLock.RUnlock()
   196  
   197  	log.Info("Avro schema lookup cache miss",
   198  		zap.String("key", key),
   199  		zap.Uint64("tiSchemaID", tiSchemaID))
   200  
   201  	uri := m.registryURL + "/subjects/" + url.QueryEscape(m.tableNameToSchemaSubject(tableName)) + "/versions/latest"
   202  	log.Debug("Querying for latest schema", zap.String("uri", uri))
   203  
   204  	req, err := http.NewRequestWithContext(ctx, "GET", uri, nil)
   205  	if err != nil {
   206  		return nil, 0, errors.Annotate(
   207  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Error constructing request for Registry lookup")
   208  	}
   209  	req.Header.Add("Accept", "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json")
   210  
   211  	resp, err := httpRetry(ctx, m.credential, req, true)
   212  	if err != nil {
   213  		return nil, 0, err
   214  	}
   215  	defer resp.Body.Close()
   216  
   217  	body, err := ioutil.ReadAll(resp.Body)
   218  	if err != nil {
   219  		return nil, 0, errors.Annotate(
   220  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Failed to read response from Registry")
   221  	}
   222  
   223  	if resp.StatusCode != 200 && resp.StatusCode != 404 {
   224  		log.Warn("Failed to query schema from the Registry, HTTP error",
   225  			zap.Int("status", resp.StatusCode),
   226  			zap.String("uri", uri),
   227  
   228  			zap.ByteString("responseBody", body))
   229  		return nil, 0, cerror.ErrAvroSchemaAPIError.GenWithStack("Failed to query schema from the Registry, HTTP error")
   230  	}
   231  
   232  	if resp.StatusCode == 404 {
   233  		log.Warn("Specified schema not found in Registry",
   234  			zap.String("key", key),
   235  			zap.Uint64("tiSchemaID", tiSchemaID))
   236  
   237  		return nil, 0, cerror.ErrAvroSchemaAPIError.GenWithStackByArgs("Schema not found in Registry")
   238  	}
   239  
   240  	var jsonResp lookupResponse
   241  	err = json.Unmarshal(body, &jsonResp)
   242  	if err != nil {
   243  		return nil, 0, errors.Annotate(
   244  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Failed to parse result from Registry")
   245  	}
   246  
   247  	cacheEntry := new(schemaCacheEntry)
   248  	cacheEntry.codec, err = goavro.NewCodec(jsonResp.Schema)
   249  	if err != nil {
   250  		return nil, 0, errors.Annotate(
   251  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "Creating Avro codec failed")
   252  	}
   253  	cacheEntry.registryID = jsonResp.RegistryID
   254  	cacheEntry.tiSchemaID = tiSchemaID
   255  
   256  	m.cacheRWLock.Lock()
   257  	m.cache[m.tableNameToSchemaSubject(tableName)] = cacheEntry
   258  	m.cacheRWLock.Unlock()
   259  
   260  	log.Info("Avro schema lookup successful with cache miss",
   261  		zap.Uint64("tiSchemaID", cacheEntry.tiSchemaID),
   262  		zap.Int("registryID", cacheEntry.registryID),
   263  		zap.String("schema", cacheEntry.codec.Schema()))
   264  
   265  	return cacheEntry.codec, cacheEntry.registryID, nil
   266  }
   267  
   268  // SchemaGenerator represents a function that returns an Avro schema in JSON.
   269  // Used for lazy evaluation
   270  type SchemaGenerator func() (string, error)
   271  
   272  // GetCachedOrRegister checks if the suitable Avro schema has been cached.
   273  // If not, a new schema is generated, registered and cached.
   274  func (m *AvroSchemaManager) GetCachedOrRegister(ctx context.Context, tableName model.TableName, tiSchemaID uint64, schemaGen SchemaGenerator) (*goavro.Codec, int, error) {
   275  	key := m.tableNameToSchemaSubject(tableName)
   276  	m.cacheRWLock.RLock()
   277  	if entry, exists := m.cache[key]; exists && entry.tiSchemaID == tiSchemaID {
   278  		log.Debug("Avro schema GetCachedOrRegister cache hit",
   279  			zap.String("key", key),
   280  			zap.Uint64("tiSchemaID", tiSchemaID),
   281  			zap.Int("registryID", entry.registryID))
   282  		m.cacheRWLock.RUnlock()
   283  		return entry.codec, entry.registryID, nil
   284  	}
   285  	m.cacheRWLock.RUnlock()
   286  
   287  	log.Info("Avro schema lookup cache miss",
   288  		zap.String("key", key),
   289  		zap.Uint64("tiSchemaID", tiSchemaID))
   290  
   291  	schema, err := schemaGen()
   292  	if err != nil {
   293  		return nil, 0, errors.Annotate(err, "GetCachedOrRegister: SchemaGen failed")
   294  	}
   295  
   296  	codec, err := goavro.NewCodec(schema)
   297  	if err != nil {
   298  		return nil, 0, errors.Annotate(
   299  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "GetCachedOrRegister: Could not make goavro codec")
   300  	}
   301  
   302  	id, err := m.Register(ctx, tableName, codec)
   303  	if err != nil {
   304  		return nil, 0, errors.Annotate(
   305  			cerror.WrapError(cerror.ErrAvroSchemaAPIError, err), "GetCachedOrRegister: Could not register schema")
   306  	}
   307  
   308  	cacheEntry := new(schemaCacheEntry)
   309  	cacheEntry.codec = codec
   310  	cacheEntry.registryID = id
   311  	cacheEntry.tiSchemaID = tiSchemaID
   312  
   313  	m.cacheRWLock.Lock()
   314  	m.cache[m.tableNameToSchemaSubject(tableName)] = cacheEntry
   315  	m.cacheRWLock.Unlock()
   316  
   317  	log.Info("Avro schema GetCachedOrRegister successful with cache miss",
   318  		zap.Uint64("tiSchemaID", cacheEntry.tiSchemaID),
   319  		zap.Int("registryID", cacheEntry.registryID),
   320  		zap.String("schema", cacheEntry.codec.Schema()))
   321  
   322  	return codec, id, nil
   323  }
   324  
   325  // ClearRegistry clears the Registry subject for the given table. Should be idempotent.
   326  // Exported for testing.
   327  // NOT USED for now, reserved for future use.
   328  func (m *AvroSchemaManager) ClearRegistry(ctx context.Context, tableName model.TableName) error {
   329  	uri := m.registryURL + "/subjects/" + url.QueryEscape(m.tableNameToSchemaSubject(tableName))
   330  	req, err := http.NewRequestWithContext(ctx, "DELETE", uri, nil)
   331  	if err != nil {
   332  		log.Error("Could not construct request for clearRegistry", zap.String("uri", uri))
   333  		return cerror.WrapError(cerror.ErrAvroSchemaAPIError, err)
   334  	}
   335  	req.Header.Add("Accept", "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json")
   336  	resp, err := httpRetry(ctx, m.credential, req, true)
   337  	if err != nil {
   338  		return err
   339  	}
   340  
   341  	if resp.StatusCode == 200 {
   342  		log.Info("Clearing Registry successful")
   343  		return nil
   344  	}
   345  
   346  	if resp.StatusCode == 404 {
   347  		log.Info("Registry already cleaned")
   348  		return nil
   349  	}
   350  
   351  	log.Error("Error when clearing Registry", zap.Int("status", resp.StatusCode))
   352  	return cerror.ErrAvroSchemaAPIError.GenWithStack("Error when clearing Registry, status = %d", resp.StatusCode)
   353  }
   354  
   355  func httpRetry(ctx context.Context, credential *security.Credential, r *http.Request, allow404 bool) (*http.Response, error) {
   356  	var (
   357  		err  error
   358  		resp *http.Response
   359  		data []byte
   360  	)
   361  
   362  	expBackoff := backoff.NewExponentialBackOff()
   363  	expBackoff.MaxInterval = time.Second * 30
   364  	httpCli, err := httputil.NewClient(credential)
   365  
   366  	if r.Body != nil {
   367  		data, err = ioutil.ReadAll(r.Body)
   368  		_ = r.Body.Close()
   369  	}
   370  
   371  	if err != nil {
   372  		return nil, cerror.WrapError(cerror.ErrAvroSchemaAPIError, err)
   373  	}
   374  	for {
   375  		if data != nil {
   376  			r.Body = ioutil.NopCloser(bytes.NewReader(data))
   377  		}
   378  		resp, err = httpCli.Do(r)
   379  
   380  		if err != nil {
   381  			log.Warn("HTTP request failed", zap.String("msg", err.Error()))
   382  			goto checkCtx
   383  		}
   384  
   385  		if resp.StatusCode >= 200 && resp.StatusCode < 300 || (resp.StatusCode == 404 && allow404) {
   386  			break
   387  		}
   388  		log.Warn("HTTP server returned with error", zap.Int("status", resp.StatusCode))
   389  		_ = resp.Body.Close()
   390  
   391  	checkCtx:
   392  		select {
   393  		case <-ctx.Done():
   394  			return nil, errors.New("HTTP retry cancelled")
   395  
   396  		default:
   397  		}
   398  
   399  		time.Sleep(expBackoff.NextBackOff())
   400  	}
   401  
   402  	return resp, nil
   403  }
   404  
   405  func (m *AvroSchemaManager) tableNameToSchemaSubject(tableName model.TableName) string {
   406  	// We should guarantee unique names for subjects
   407  	return tableName.Schema + "_" + tableName.Table + m.subjectSuffix
   408  }