github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/pdutil/api_client.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pdutil
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"encoding/hex"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"net/url"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/pingcap/errors"
    30  	"github.com/pingcap/log"
    31  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    32  	cerror "github.com/pingcap/tiflow/pkg/errors"
    33  	"github.com/pingcap/tiflow/pkg/httputil"
    34  	"github.com/pingcap/tiflow/pkg/retry"
    35  	"github.com/pingcap/tiflow/pkg/security"
    36  	"github.com/pingcap/tiflow/pkg/spanz"
    37  	pd "github.com/tikv/pd/client"
    38  	"go.uber.org/zap"
    39  )
    40  
    41  const (
    42  	regionLabelPrefix     = "/pd/api/v1/config/region-label/rules"
    43  	gcServiceSafePointURL = "/pd/api/v1/gc/safepoint"
    44  	healthyAPI            = "/pd/api/v1/health"
    45  	scanRegionAPI         = "/pd/api/v1/regions/key"
    46  
    47  	// Split the default rule by following keys to keep metadata region isolated
    48  	// from the normal data area.
    49  	//
    50  	// * `6e000000000000000000f8`, keys starts with "m".
    51  	// * `748000fffffffffffffe00000000000000f8`, the table prefix of
    52  	//   `tidb_ddl_job` table, which has the table ID 281474976710654,
    53  	//   see "github.com/pingcap/tidb/pkg/ddl.JobTableID"
    54  	addMetaJSON = `{
    55  		"sets": [
    56  			{
    57  				"id": "ticdc/meta",
    58  				"labels": [
    59  					{
    60  						"key": "data-type",
    61  						"value": "meta"
    62  					}
    63  				],
    64  				"rule_type": "key-range",
    65  				"data": [
    66  					{
    67  						"start_key": "6d00000000000000f8",
    68  						"end_key": "6e00000000000000f8"
    69  					}
    70  				]
    71  			},
    72  			{
    73  				"id": "ticdc/meta_tidb_ddl_job",
    74  				"labels": [
    75  					{
    76  						"key": "data-type",
    77  						"value": "meta"
    78  					}
    79  				],
    80  				"rule_type": "key-range",
    81  				"data": [
    82  					{
    83  						"start_key": "748000fffffffffffffe00000000000000f8",
    84  						"end_key":   "748000ffffffffffffff00000000000000f8"
    85  					}
    86  				]
    87  			}
    88  		]
    89  	}`
    90  )
    91  
    92  const (
    93  	defaultMaxRetry       = 3
    94  	defaultRequestTimeout = 5 * time.Second
    95  )
    96  
    97  // PDAPIClient is client for PD http API.
    98  type PDAPIClient interface {
    99  	UpdateMetaLabel(ctx context.Context) error
   100  	ListGcServiceSafePoint(ctx context.Context) (*ListServiceGCSafepoint, error)
   101  	CollectMemberEndpoints(ctx context.Context) ([]string, error)
   102  	Healthy(ctx context.Context, endpoint string) error
   103  	ScanRegions(ctx context.Context, span tablepb.Span) ([]RegionInfo, error)
   104  	Close()
   105  }
   106  
   107  // pdAPIClient is the api client of Placement Driver, include grpc client and http client.
   108  type pdAPIClient struct {
   109  	grpcClient pd.Client
   110  	httpClient *httputil.Client
   111  }
   112  
   113  // NewPDAPIClient create a new pdAPIClient.
   114  func NewPDAPIClient(pdClient pd.Client, conf *security.Credential) (PDAPIClient, error) {
   115  	dialClient, err := httputil.NewClient(conf)
   116  	if err != nil {
   117  		return nil, errors.Trace(err)
   118  	}
   119  	return &pdAPIClient{
   120  		grpcClient: pdClient,
   121  		httpClient: dialClient,
   122  	}, nil
   123  }
   124  
   125  // Close the pd api client, at the moment only close idle http connections if there is any.
   126  func (pc *pdAPIClient) Close() {
   127  	pc.httpClient.CloseIdleConnections()
   128  }
   129  
   130  // UpdateMetaLabel is a reentrant function that updates the meta-region label of upstream cluster.
   131  func (pc *pdAPIClient) UpdateMetaLabel(ctx context.Context) error {
   132  	err := retry.Do(ctx, func() error {
   133  		ctx, cancel := context.WithTimeout(ctx, defaultRequestTimeout)
   134  		defer cancel()
   135  
   136  		err := pc.patchMetaLabel(ctx)
   137  		if err != nil {
   138  			log.Error("Fail to add meta region label to PD", zap.Error(err))
   139  			return err
   140  		}
   141  
   142  		log.Info("Succeed to add meta region label to PD")
   143  		return nil
   144  	}, retry.WithMaxTries(defaultMaxRetry), retry.WithIsRetryableErr(func(err error) bool {
   145  		switch errors.Cause(err) {
   146  		case context.Canceled:
   147  			return false
   148  		}
   149  		return true
   150  	}))
   151  	return err
   152  }
   153  
   154  // NewTestRegionInfo creates a new RegionInfo for test purpose.
   155  func NewTestRegionInfo(regionID uint64, start, end []byte, writtenKeys uint64) RegionInfo {
   156  	return RegionInfo{
   157  		ID:          regionID,
   158  		StartKey:    hex.EncodeToString(start),
   159  		EndKey:      hex.EncodeToString(end),
   160  		WrittenKeys: writtenKeys,
   161  	}
   162  }
   163  
   164  // RegionInfo records detail region info for api usage.
   165  // NOTE: This type is a copy of github.com/tikv/pd/server/api.RegionInfo.
   166  // To reduce dependency tree, we do not import the api package directly.
   167  type RegionInfo struct {
   168  	ID          uint64 `json:"id"`
   169  	StartKey    string `json:"start_key"`
   170  	EndKey      string `json:"end_key"`
   171  	WrittenKeys uint64 `json:"written_keys"`
   172  }
   173  
   174  // RegionsInfo contains some regions with the detailed region info.
   175  // NOTE: This type is a copy of github.com/tikv/pd/server/api.RegionInfo.
   176  // To reduce dependency tree, we do not import the api package directly.
   177  type RegionsInfo struct {
   178  	Count   int          `json:"count"`
   179  	Regions []RegionInfo `json:"regions"`
   180  }
   181  
   182  // ScanRegions is a reentrant function that updates the meta-region label of upstream cluster.
   183  func (pc *pdAPIClient) ScanRegions(ctx context.Context, span tablepb.Span) ([]RegionInfo, error) {
   184  	scanLimit := 1024
   185  	endpoints, err := pc.CollectMemberEndpoints(ctx)
   186  	if err != nil {
   187  		log.Warn("fail to collec pd member endpoints")
   188  		return nil, errors.Trace(err)
   189  	}
   190  	return pc.scanRegions(ctx, span, endpoints, scanLimit)
   191  }
   192  
   193  func (pc *pdAPIClient) scanRegions(
   194  	ctx context.Context, span tablepb.Span, endpoints []string, scanLimit int,
   195  ) ([]RegionInfo, error) {
   196  	scan := func(endpoint string, startKey, endKey []byte) ([]RegionInfo, error) {
   197  		query := url.Values{}
   198  		query.Add("key", string(startKey))
   199  		query.Add("end_key", string(endKey))
   200  		query.Add("limit", strconv.Itoa(scanLimit))
   201  		u, _ := url.Parse(endpoint + scanRegionAPI)
   202  		u.RawQuery = query.Encode()
   203  		resp, err := pc.httpClient.Get(ctx, u.String())
   204  		if err != nil {
   205  			log.Warn("fail to scan regions",
   206  				zap.String("endpoint", endpoint), zap.Any("span", span))
   207  			return nil, errors.Trace(err)
   208  		}
   209  		defer resp.Body.Close()
   210  		data, err := io.ReadAll(resp.Body)
   211  		if err != nil {
   212  			log.Warn("fail to scan regions",
   213  				zap.String("endpoint", endpoint), zap.Any("span", span))
   214  			return nil, errors.Trace(err)
   215  		}
   216  		regions := &RegionsInfo{}
   217  		err = json.Unmarshal(data, regions)
   218  		if err != nil {
   219  			log.Warn("fail to scan regions",
   220  				zap.String("endpoint", endpoint), zap.Any("span", span))
   221  			return nil, errors.Trace(err)
   222  		}
   223  		return regions.Regions, nil
   224  	}
   225  
   226  	regions := []RegionInfo{}
   227  	startKey := span.StartKey
   228  	startKeyHex := strings.ToUpper(hex.EncodeToString(startKey))
   229  	isFirstStartKey := true
   230  	for spanz.EndCompare(startKey, span.EndKey) < 0 || (len(startKey) == 0 && isFirstStartKey) {
   231  		for i, endpoint := range endpoints {
   232  			r, err := scan(endpoint, startKey, span.EndKey)
   233  			if err != nil && i+1 == len(endpoints) {
   234  				return nil, errors.Trace(err)
   235  			}
   236  
   237  			if len(r) == 0 {
   238  				// Because start key is less than end key, there must be some regions.
   239  				log.Error("fail to scan region, missing region",
   240  					zap.String("endpoint", endpoint))
   241  				return nil, cerror.WrapError(cerror.ErrInternalServerError,
   242  					fmt.Errorf("fail to scan region, missing region"))
   243  			}
   244  			if r[0].StartKey != startKeyHex {
   245  				r[0].StartKey = strings.ToUpper(hex.EncodeToString(startKey))
   246  				log.Info("start key mismatch, adjust start key",
   247  					zap.String("startKey", startKeyHex),
   248  					zap.String("regionStartKey", r[0].StartKey),
   249  					zap.Uint64("regionID", r[0].ID))
   250  			}
   251  			regions = append(regions, r...)
   252  			key, err := hex.DecodeString(regions[len(regions)-1].EndKey)
   253  			if err != nil {
   254  				log.Info("fail to decode region end key",
   255  					zap.String("endKey", regions[len(regions)-1].EndKey),
   256  					zap.Uint64("regionID", r[len(regions)-1].ID))
   257  				return nil, errors.Trace(err)
   258  			}
   259  			startKey = tablepb.Key(key)
   260  			startKeyHex = strings.ToUpper(hex.EncodeToString(startKey))
   261  			isFirstStartKey = false
   262  			break
   263  		}
   264  	}
   265  	if regions[len(regions)-1].EndKey != string(span.EndKey) {
   266  		regions[len(regions)-1].EndKey = strings.ToUpper(hex.EncodeToString(span.EndKey))
   267  		log.Info("end key mismatch, adjust end key",
   268  			zap.String("endKey", strings.ToUpper(hex.EncodeToString(span.EndKey))),
   269  			zap.String("regionEndKey", regions[len(regions)-1].EndKey),
   270  			zap.Uint64("regionID", regions[len(regions)-1].ID))
   271  	}
   272  
   273  	return regions, nil
   274  }
   275  
   276  // ServiceSafePoint contains gc service safe point
   277  type ServiceSafePoint struct {
   278  	ServiceID string `json:"service_id"`
   279  	ExpiredAt int64  `json:"expired_at"`
   280  	SafePoint uint64 `json:"safe_point"`
   281  }
   282  
   283  // ListServiceGCSafepoint is the response of pd list gc service safe point API
   284  type ListServiceGCSafepoint struct {
   285  	ServiceGCSafepoints []*ServiceSafePoint `json:"service_gc_safe_points"`
   286  	GCSafePoint         uint64              `json:"gc_safe_point"`
   287  }
   288  
   289  // ListGcServiceSafePoint list gc service safepoint from PD
   290  func (pc *pdAPIClient) ListGcServiceSafePoint(
   291  	ctx context.Context,
   292  ) (*ListServiceGCSafepoint, error) {
   293  	var (
   294  		resp *ListServiceGCSafepoint
   295  		err  error
   296  	)
   297  	err = retry.Do(ctx, func() error {
   298  		ctx, cancel := context.WithTimeout(ctx, defaultRequestTimeout)
   299  		defer cancel()
   300  
   301  		resp, err = pc.listGcServiceSafePoint(ctx)
   302  		if err != nil {
   303  			return err
   304  		}
   305  		return nil
   306  	}, retry.WithMaxTries(defaultMaxRetry), retry.WithIsRetryableErr(func(err error) bool {
   307  		switch errors.Cause(err) {
   308  		case context.Canceled:
   309  			return false
   310  		}
   311  		return true
   312  	}))
   313  	return resp, err
   314  }
   315  
   316  func (pc *pdAPIClient) patchMetaLabel(ctx context.Context) error {
   317  	url := pc.grpcClient.GetLeaderURL() + regionLabelPrefix
   318  	header := http.Header{"Content-Type": {"application/json"}}
   319  	content := []byte(addMetaJSON)
   320  
   321  	_, err := pc.httpClient.DoRequest(ctx, url, http.MethodPatch,
   322  		header, bytes.NewReader(content))
   323  	return errors.Trace(err)
   324  }
   325  
   326  func (pc *pdAPIClient) listGcServiceSafePoint(
   327  	ctx context.Context,
   328  ) (*ListServiceGCSafepoint, error) {
   329  	url := pc.grpcClient.GetLeaderURL() + gcServiceSafePointURL
   330  
   331  	respData, err := pc.httpClient.DoRequest(ctx, url, http.MethodGet,
   332  		nil, nil)
   333  	if err != nil {
   334  		return nil, errors.Trace(err)
   335  	}
   336  	resp := ListServiceGCSafepoint{}
   337  	err = json.Unmarshal(respData, &resp)
   338  	if err != nil {
   339  		return nil, errors.Trace(err)
   340  	}
   341  	return &resp, nil
   342  }
   343  
   344  // CollectMemberEndpoints return all members' endpoint
   345  func (pc *pdAPIClient) CollectMemberEndpoints(ctx context.Context) ([]string, error) {
   346  	members, err := pc.grpcClient.GetAllMembers(ctx)
   347  	if err != nil {
   348  		return nil, errors.Trace(err)
   349  	}
   350  	result := make([]string, 0, len(members))
   351  	for _, m := range members {
   352  		clientUrls := m.GetClientUrls()
   353  		if len(clientUrls) > 0 {
   354  			result = append(result, clientUrls[0])
   355  		}
   356  	}
   357  	return result, nil
   358  }
   359  
   360  // Healthy return error if the member corresponding to the endpoint is unhealthy
   361  func (pc *pdAPIClient) Healthy(ctx context.Context, endpoint string) error {
   362  	url := endpoint + healthyAPI
   363  	resp, err := pc.httpClient.Get(ctx, fmt.Sprintf("%s/", url))
   364  	if err != nil {
   365  		return errors.Trace(err)
   366  	}
   367  	_ = resp.Body.Close()
   368  	return nil
   369  }