istio.io/istio@v0.0.0-20240520182934-d79c90f27776/security/pkg/nodeagent/sds/sdsservice.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package sds implements secret discovery service in NodeAgent.
    16  package sds
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"strconv"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	cryptomb "github.com/envoyproxy/go-control-plane/contrib/envoy/extensions/private_key_providers/cryptomb/v3alpha"
    27  	qat "github.com/envoyproxy/go-control-plane/contrib/envoy/extensions/private_key_providers/qat/v3alpha"
    28  	core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    29  	tls "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/tls/v3"
    30  	discovery "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    31  	sds "github.com/envoyproxy/go-control-plane/envoy/service/secret/v3"
    32  	"github.com/google/uuid"
    33  	uberatomic "go.uber.org/atomic"
    34  	"google.golang.org/grpc"
    35  	"google.golang.org/grpc/codes"
    36  	"google.golang.org/grpc/status"
    37  	"google.golang.org/protobuf/types/known/durationpb"
    38  
    39  	mesh "istio.io/api/mesh/v1alpha1"
    40  	"istio.io/istio/pilot/pkg/util/protoconv"
    41  	"istio.io/istio/pkg/backoff"
    42  	"istio.io/istio/pkg/log"
    43  	"istio.io/istio/pkg/model"
    44  	"istio.io/istio/pkg/security"
    45  	"istio.io/istio/pkg/xds"
    46  )
    47  
    48  var sdsServiceLog = log.RegisterScope("sds", "SDS service debugging")
    49  
    50  type sdsservice struct {
    51  	st security.SecretManager
    52  
    53  	stop       chan struct{}
    54  	rootCaPath string
    55  	pkpConf    *mesh.PrivateKeyProvider
    56  
    57  	sync.Mutex
    58  	clients map[string]*Context
    59  }
    60  
    61  type Context struct {
    62  	BaseConnection xds.Connection
    63  	s              *sdsservice
    64  	w              *Watch
    65  }
    66  
    67  type Watch struct {
    68  	sync.Mutex
    69  	watch *xds.WatchedResource
    70  }
    71  
    72  // newSDSService creates Secret Discovery Service which implements envoy SDS API.
    73  func newSDSService(st security.SecretManager, options *security.Options, pkpConf *mesh.PrivateKeyProvider) *sdsservice {
    74  	ret := &sdsservice{
    75  		st:      st,
    76  		stop:    make(chan struct{}),
    77  		pkpConf: pkpConf,
    78  		clients: make(map[string]*Context),
    79  	}
    80  
    81  	ret.rootCaPath = options.CARootPath
    82  
    83  	if options.FileMountedCerts {
    84  		return ret
    85  	}
    86  
    87  	// Pre-generate workload certificates to improve startup latency and ensure that for OUTPUT_CERTS
    88  	// case we always write a certificate. A workload can technically run without any mTLS/CA
    89  	// configured, in which case this will fail; if it becomes noisy we should disable the entire SDS
    90  	// server in these cases.
    91  	go func() {
    92  		// TODO: do we need max timeout for retry, seems meaningless to retry forever if it never succeed
    93  		b := backoff.NewExponentialBackOff(backoff.DefaultOption())
    94  		// context for both timeout and channel, whichever stops first, the context will be done
    95  		ctx, cancel := context.WithCancel(context.Background())
    96  		go func() {
    97  			select {
    98  			case <-ret.stop:
    99  				cancel()
   100  			case <-ctx.Done():
   101  			}
   102  		}()
   103  		defer cancel()
   104  		_ = b.RetryWithContext(ctx, func() error {
   105  			_, err := st.GenerateSecret(security.WorkloadKeyCertResourceName)
   106  			if err != nil {
   107  				sdsServiceLog.Warnf("failed to warm certificate: %v", err)
   108  				return err
   109  			}
   110  
   111  			_, err = st.GenerateSecret(security.RootCertReqResourceName)
   112  			if err != nil {
   113  				sdsServiceLog.Warnf("failed to warm root certificate: %v", err)
   114  				return err
   115  			}
   116  
   117  			return nil
   118  		})
   119  	}()
   120  
   121  	return ret
   122  }
   123  
   124  var version uberatomic.Uint64
   125  
   126  func (s *sdsservice) generate(resourceNames []string) (*discovery.DiscoveryResponse, error) {
   127  	resources := xds.Resources{}
   128  	for _, resourceName := range resourceNames {
   129  		secret, err := s.st.GenerateSecret(resourceName)
   130  		if err != nil {
   131  			// Typically, in Istiod, we do not return an error for a failure to generate a resource
   132  			// However, here it makes sense, because we are generally streaming a single resource,
   133  			// so sending an error will not cause a single failure to prevent the entire multiplex stream
   134  			// of resources, and failures here are generally due to temporary networking issues to the CA
   135  			// rather than a result of configuration issues, which trigger updates in Istiod when resolved.
   136  			// Instead, we rely on the client to retry (with backoff) on failures.
   137  			return nil, fmt.Errorf("failed to generate secret for %v: %v", resourceName, err)
   138  		}
   139  
   140  		res := protoconv.MessageToAny(toEnvoySecret(secret, s.rootCaPath, s.pkpConf))
   141  		resources = append(resources, &discovery.Resource{
   142  			Name:     resourceName,
   143  			Resource: res,
   144  		})
   145  	}
   146  	return &discovery.DiscoveryResponse{
   147  		TypeUrl:     model.SecretType,
   148  		VersionInfo: time.Now().Format(time.RFC3339) + "/" + strconv.FormatUint(version.Inc(), 10),
   149  		Nonce:       uuid.New().String(),
   150  		Resources:   xds.ResourcesToAny(resources),
   151  	}, nil
   152  }
   153  
   154  // register adds the SDS handle to the grpc server
   155  func (s *sdsservice) register(rpcs *grpc.Server) {
   156  	sds.RegisterSecretDiscoveryServiceServer(rpcs, s)
   157  }
   158  
   159  func (s *sdsservice) push(secretName string) {
   160  	s.Lock()
   161  	defer s.Unlock()
   162  	for _, client := range s.clients {
   163  		go func(client *Context) {
   164  			select {
   165  			case client.XdsConnection().PushCh() <- secretName:
   166  			case <-client.XdsConnection().StreamDone():
   167  			}
   168  		}(client)
   169  	}
   170  }
   171  
   172  func (c Context) XdsConnection() *xds.Connection {
   173  	return &c.BaseConnection
   174  }
   175  
   176  var connectionNumber = int64(0)
   177  
   178  func (c *Context) Initialize(_ *core.Node) error {
   179  	id := atomic.AddInt64(&connectionNumber, 1)
   180  	con := c.XdsConnection()
   181  	con.SetID(strconv.FormatInt(id, 10))
   182  
   183  	c.s.Lock()
   184  	c.s.clients[con.ID()] = c
   185  	c.s.Unlock()
   186  
   187  	con.MarkInitialized()
   188  	return nil
   189  }
   190  
   191  func (c *Context) Close() {
   192  	c.s.Lock()
   193  	defer c.s.Unlock()
   194  	delete(c.s.clients, c.XdsConnection().ID())
   195  }
   196  
   197  func (c *Context) Watcher() xds.Watcher {
   198  	return c.w
   199  }
   200  
   201  func (w *Watch) DeleteWatchedResource(string) {
   202  	w.Lock()
   203  	defer w.Unlock()
   204  	w.watch = nil
   205  }
   206  
   207  func (w *Watch) GetWatchedResource(string) *xds.WatchedResource {
   208  	w.Lock()
   209  	defer w.Unlock()
   210  	return w.watch
   211  }
   212  
   213  func (w *Watch) NewWatchedResource(typeURL string, names []string) {
   214  	w.Lock()
   215  	defer w.Unlock()
   216  	w.watch = &xds.WatchedResource{TypeUrl: typeURL, ResourceNames: names}
   217  }
   218  
   219  func (w *Watch) UpdateWatchedResource(_ string, f func(*xds.WatchedResource) *xds.WatchedResource) {
   220  	w.Lock()
   221  	defer w.Unlock()
   222  	w.watch = f(w.watch)
   223  }
   224  
   225  func (w *Watch) GetID() string {
   226  	// This always maps to the same local Envoy instance.
   227  	return ""
   228  }
   229  
   230  func (w *Watch) requested(secretName string) bool {
   231  	w.Lock()
   232  	defer w.Unlock()
   233  	if w.watch != nil {
   234  		for _, res := range w.watch.ResourceNames {
   235  			if res == secretName {
   236  				return true
   237  			}
   238  		}
   239  	}
   240  	return false
   241  }
   242  
   243  func (c *Context) Process(req *discovery.DiscoveryRequest) error {
   244  	shouldRespond, delta := xds.ShouldRespond(c.Watcher(), c.XdsConnection().ID(), req)
   245  	if !shouldRespond {
   246  		return nil
   247  	}
   248  	resources := req.ResourceNames
   249  	if !delta.IsEmpty() {
   250  		resources = delta.Subscribed.UnsortedList()
   251  	}
   252  	res, err := c.s.generate(resources)
   253  	if err != nil {
   254  		return err
   255  	}
   256  	return xds.Send(c, res)
   257  }
   258  
   259  func (c *Context) Push(ev any) error {
   260  	secretName := ev.(string)
   261  	if !c.w.requested(secretName) {
   262  		return nil
   263  	}
   264  	res, err := c.s.generate([]string{secretName})
   265  	if err != nil {
   266  		return err
   267  	}
   268  	return xds.Send(c, res)
   269  }
   270  
   271  // StreamSecrets serves SDS discovery requests and SDS push requests
   272  func (s *sdsservice) StreamSecrets(stream sds.SecretDiscoveryService_StreamSecretsServer) error {
   273  	return xds.Stream(&Context{
   274  		BaseConnection: xds.NewConnection("", stream),
   275  		s:              s,
   276  		w:              &Watch{},
   277  	})
   278  }
   279  
   280  func (s *sdsservice) DeltaSecrets(stream sds.SecretDiscoveryService_DeltaSecretsServer) error {
   281  	return status.Error(codes.Unimplemented, "DeltaSecrets not implemented")
   282  }
   283  
   284  func (s *sdsservice) FetchSecrets(ctx context.Context, discReq *discovery.DiscoveryRequest) (*discovery.DiscoveryResponse, error) {
   285  	return nil, status.Error(codes.Unimplemented, "FetchSecrets not implemented")
   286  }
   287  
   288  func (s *sdsservice) Close() {
   289  	close(s.stop)
   290  }
   291  
   292  // toEnvoySecret converts a security.SecretItem to an Envoy tls.Secret
   293  func toEnvoySecret(s *security.SecretItem, caRootPath string, pkpConf *mesh.PrivateKeyProvider) *tls.Secret {
   294  	secret := &tls.Secret{
   295  		Name: s.ResourceName,
   296  	}
   297  	var cfg security.SdsCertificateConfig
   298  	ok := false
   299  	if s.ResourceName == security.FileRootSystemCACert {
   300  		cfg, ok = security.SdsCertificateConfigFromResourceNameForOSCACert(caRootPath)
   301  	} else {
   302  		cfg, ok = security.SdsCertificateConfigFromResourceName(s.ResourceName)
   303  	}
   304  	if s.ResourceName == security.RootCertReqResourceName || (ok && cfg.IsRootCertificate()) {
   305  		secret.Type = &tls.Secret_ValidationContext{
   306  			ValidationContext: &tls.CertificateValidationContext{
   307  				TrustedCa: &core.DataSource{
   308  					Specifier: &core.DataSource_InlineBytes{
   309  						InlineBytes: s.RootCert,
   310  					},
   311  				},
   312  			},
   313  		}
   314  	} else {
   315  		switch pkpConf.GetProvider().(type) {
   316  		case *mesh.PrivateKeyProvider_Cryptomb:
   317  			crypto := pkpConf.GetCryptomb()
   318  			msg := protoconv.MessageToAny(&cryptomb.CryptoMbPrivateKeyMethodConfig{
   319  				PollDelay: durationpb.New(time.Duration(crypto.GetPollDelay().Nanos)),
   320  				PrivateKey: &core.DataSource{
   321  					Specifier: &core.DataSource_InlineBytes{
   322  						InlineBytes: s.PrivateKey,
   323  					},
   324  				},
   325  			})
   326  			secret.Type = &tls.Secret_TlsCertificate{
   327  				TlsCertificate: &tls.TlsCertificate{
   328  					CertificateChain: &core.DataSource{
   329  						Specifier: &core.DataSource_InlineBytes{
   330  							InlineBytes: s.CertificateChain,
   331  						},
   332  					},
   333  					PrivateKeyProvider: &tls.PrivateKeyProvider{
   334  						ProviderName: "cryptomb",
   335  						ConfigType: &tls.PrivateKeyProvider_TypedConfig{
   336  							TypedConfig: msg,
   337  						},
   338  						Fallback: crypto.GetFallback().GetValue(),
   339  					},
   340  				},
   341  			}
   342  		case *mesh.PrivateKeyProvider_Qat:
   343  			qatConf := pkpConf.GetQat()
   344  			msg := protoconv.MessageToAny(&qat.QatPrivateKeyMethodConfig{
   345  				PollDelay: durationpb.New(time.Duration(qatConf.GetPollDelay().Nanos)),
   346  				PrivateKey: &core.DataSource{
   347  					Specifier: &core.DataSource_InlineBytes{
   348  						InlineBytes: s.PrivateKey,
   349  					},
   350  				},
   351  			})
   352  			secret.Type = &tls.Secret_TlsCertificate{
   353  				TlsCertificate: &tls.TlsCertificate{
   354  					CertificateChain: &core.DataSource{
   355  						Specifier: &core.DataSource_InlineBytes{
   356  							InlineBytes: s.CertificateChain,
   357  						},
   358  					},
   359  					PrivateKeyProvider: &tls.PrivateKeyProvider{
   360  						ProviderName: "qat",
   361  						ConfigType: &tls.PrivateKeyProvider_TypedConfig{
   362  							TypedConfig: msg,
   363  						},
   364  						Fallback: qatConf.GetFallback().GetValue(),
   365  					},
   366  				},
   367  			}
   368  		default:
   369  			secret.Type = &tls.Secret_TlsCertificate{
   370  				TlsCertificate: &tls.TlsCertificate{
   371  					CertificateChain: &core.DataSource{
   372  						Specifier: &core.DataSource_InlineBytes{
   373  							InlineBytes: s.CertificateChain,
   374  						},
   375  					},
   376  					PrivateKey: &core.DataSource{
   377  						Specifier: &core.DataSource_InlineBytes{
   378  							InlineBytes: s.PrivateKey,
   379  						},
   380  					},
   381  				},
   382  			}
   383  		}
   384  	}
   385  	return secret
   386  }