github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/server/internal/services/system_service.go (about)

     1  // Copyright 2017 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package services
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"sync"
    22  	"time"
    23  
    24  	log "github.com/golang/glog"
    25  	"golang.org/x/time/rate"
    26  
    27  	"github.com/google/fleetspeak/fleetspeak/src/common"
    28  	"github.com/google/fleetspeak/fleetspeak/src/server/db"
    29  	"github.com/google/fleetspeak/fleetspeak/src/server/internal/cache"
    30  	"github.com/google/fleetspeak/fleetspeak/src/server/service"
    31  	"github.com/google/fleetspeak/fleetspeak/src/server/stats"
    32  
    33  	fspb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak"
    34  	mpb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak_monitoring"
    35  	anypb "google.golang.org/protobuf/types/known/anypb"
    36  )
    37  
    38  const (
    39  	clientServiceName = "client"
    40  )
    41  
    42  // Allow spot-checking of clients that send kill-notifications, but avoid logspam (at most one entry every 15 minutes).
    43  var killNotificationLogLimiter = rate.NewLimiter(rate.Every(15*time.Minute), 1)
    44  
    45  // A systemService contains references to all the components we need to
    46  // operate. It is populated directly by MakeServer as a special case, as the
    47  // Datastore isn't provided to normal services.
    48  type systemService struct {
    49  	sctx      service.Context
    50  	stats     stats.Collector
    51  	datastore db.Store
    52  	w         sync.WaitGroup
    53  	cc        *cache.Clients
    54  }
    55  
    56  func (s *systemService) Start(sctx service.Context) error {
    57  	s.sctx = sctx
    58  	return nil
    59  }
    60  
    61  func (s *systemService) Stop() error {
    62  	return nil
    63  }
    64  
    65  func (s *systemService) ProcessMessage(ctx context.Context, m *fspb.Message) error {
    66  	mid, _ := common.BytesToMessageID(m.MessageId)
    67  	if m.Source == nil {
    68  		return errors.New("source is nil")
    69  	}
    70  	cid, err := common.BytesToClientID(m.Source.ClientId)
    71  	if err != nil || cid.IsNil() {
    72  		return fmt.Errorf("invalid source client id[%v]: %v", m.Source.ClientId, err)
    73  	}
    74  	// all of our messages should have data
    75  	if m.Data == nil {
    76  		return errors.New("no Data present")
    77  	}
    78  
    79  	switch m.MessageType {
    80  	case "MessageAck":
    81  		return s.processMessageAck(ctx, mid, cid, m.Data)
    82  	case "MessageError":
    83  		return s.processMessageError(ctx, cid, m.Data)
    84  	case "ClientInfo":
    85  		return s.processClientInfo(ctx, cid, m.Data)
    86  	case "ResourceUsage":
    87  		return s.processResourceUsage(ctx, cid, m.Data, m.ValidationInfo)
    88  	case "KillNotification":
    89  		return s.processKillNotification(ctx, cid, m.Data)
    90  	default:
    91  	}
    92  
    93  	return fmt.Errorf("unknown system message type: %v", m.MessageType)
    94  }
    95  
    96  // processMessageAck processes a message MessageAck from a client.
    97  func (s *systemService) processMessageAck(ctx context.Context, mid common.MessageID, cid common.ClientID, d *anypb.Any) error {
    98  	data := &fspb.MessageAckData{}
    99  	if err := d.UnmarshalTo(data); err != nil {
   100  		return fmt.Errorf("unable to unmarshal data as MessageAckData: %v", err)
   101  	}
   102  
   103  	ids := make([]common.MessageID, 0, len(data.MessageIds))
   104  	for _, b := range data.MessageIds {
   105  		id, err := common.BytesToMessageID(b)
   106  		if err != nil {
   107  			return fmt.Errorf("MessageAckData contains invalid message id[%v]: %v", b, err)
   108  		}
   109  		ids = append(ids, id)
   110  	}
   111  
   112  	msgs, err := s.datastore.GetMessages(ctx, ids, false)
   113  	if err != nil {
   114  		return service.TemporaryError{E: fmt.Errorf("unable to retrieve messages to ack: %v", err)}
   115  	}
   116  
   117  	for _, msg := range msgs {
   118  		if msg.Result == nil {
   119  			mmid, err := common.BytesToMessageID(msg.MessageId)
   120  			if err != nil {
   121  				log.Errorf("%v: retrieved message with bad message id[%v]: %v", mid, msg.MessageId, err)
   122  				continue
   123  			}
   124  			mcid, err := common.BytesToClientID(msg.Destination.ClientId)
   125  			if err != nil {
   126  				log.Errorf("%v: retrieved message[%v] with bad client id[%v]: %v", mid, mmid, msg.Destination.ClientId, err)
   127  				continue
   128  			}
   129  			if cid != mcid {
   130  				if msg.Source != nil && msg.Source.ServiceName == "system" && msg.MessageType == "RekeyRequest" {
   131  					// RekeyRequests are special - they are acked by the new client ID. Since
   132  					// the mcid is a random number, we'll assume that this client really did
   133  					// receive the RekeyRequest under its previous id.
   134  					log.Infof("%v: client [%v] acked RekeyRequest sent to [%v] - rekey complete.", mid, cid, mcid)
   135  				} else {
   136  					log.Errorf("%v: attempt by client [%v] to ack a message meant for client [%v]", mid, cid, mcid)
   137  					continue
   138  				}
   139  			}
   140  			if err := s.datastore.SetMessageResult(ctx, mcid, mmid, &fspb.MessageResult{ProcessedTime: db.NowProto()}); err != nil {
   141  				log.Errorf("%v: unable to mark message [%v] processed: %v", mid, mmid, err)
   142  			}
   143  		}
   144  	}
   145  	return nil
   146  }
   147  
   148  // processMessageError processes a MessageError message.
   149  func (s *systemService) processMessageError(ctx context.Context, cid common.ClientID, d *anypb.Any) error {
   150  	data := &fspb.MessageErrorData{}
   151  	if err := d.UnmarshalTo(data); err != nil {
   152  		return fmt.Errorf("unable to unmarshal data as MessageErrorData: %v", err)
   153  	}
   154  
   155  	id, err := common.BytesToMessageID(data.MessageId)
   156  	if err != nil {
   157  		return fmt.Errorf("MessageErr Data contains bad message id[%v]: %v", data.MessageId, err)
   158  	}
   159  
   160  	msgs, err := s.datastore.GetMessages(ctx, []common.MessageID{id}, false)
   161  	if err != nil {
   162  		return service.TemporaryError{E: fmt.Errorf("error from GetMessage([]{%v}): %v", id, err)}
   163  	}
   164  	if len(msgs) != 1 {
   165  		return fmt.Errorf("expected one result from GetMessages, got %v", len(msgs))
   166  	}
   167  	msg := msgs[0]
   168  	mcid, err := common.BytesToClientID(msg.Destination.ClientId)
   169  	if err != nil {
   170  		return fmt.Errorf("retrieved message [%v] has bad client id[%v]: %v", id, msg.Destination.ClientId, err)
   171  	}
   172  	if mcid != cid {
   173  		return fmt.Errorf("attempt by client [%v] to ack a message meant for client [%v]", cid, mcid)
   174  	}
   175  	if err := s.datastore.SetMessageResult(ctx, mcid, id,
   176  		&fspb.MessageResult{
   177  			ProcessedTime: db.NowProto(),
   178  			Failed:        true,
   179  			FailedReason:  data.Error,
   180  		}); err != nil {
   181  		return service.TemporaryError{E: fmt.Errorf("unable to mark message [%v] as failed: %v", id, err)}
   182  	}
   183  	return nil
   184  }
   185  
   186  // processClientInfo processes a ClientInfo message.
   187  func (s *systemService) processClientInfo(ctx context.Context, cid common.ClientID, d *anypb.Any) error {
   188  	data := &fspb.ClientInfoData{}
   189  	if err := d.UnmarshalTo(data); err != nil {
   190  		return fmt.Errorf("unable to unmarshal data as ClientInfoData: %v", err)
   191  	}
   192  	cd, err := s.datastore.GetClientData(ctx, cid)
   193  	if err != nil {
   194  		return service.TemporaryError{E: fmt.Errorf("GetClientData(%v) failed: %v", cid, err)}
   195  	}
   196  
   197  	// We create a set of the new client labels.
   198  	nl := make(map[string]bool)
   199  	for _, l := range data.Labels {
   200  		if l.ServiceName != clientServiceName {
   201  			log.Errorf("attempt to set non-client label: %v", l)
   202  			continue
   203  		}
   204  		nl[l.Label] = true
   205  	}
   206  
   207  	// Remove labels not in nl, remember labels already present.
   208  	ol := make(map[string]bool)
   209  	for _, l := range cd.Labels {
   210  		if l.ServiceName == clientServiceName {
   211  			if !nl[l.Label] {
   212  				if err = s.datastore.RemoveClientLabel(ctx, cid, l); err != nil {
   213  					return service.TemporaryError{E: fmt.Errorf("unable to remove label[%v]: %v", l, err)}
   214  				}
   215  			} else {
   216  				ol[l.Label] = true
   217  			}
   218  		}
   219  	}
   220  
   221  	// Add labels from nl which are not yet present.
   222  	for _, l := range data.Labels {
   223  		if l.ServiceName != clientServiceName {
   224  			continue
   225  		}
   226  		if !ol[l.Label] {
   227  			if err = s.datastore.AddClientLabel(ctx, cid, l); err != nil {
   228  				return service.TemporaryError{E: fmt.Errorf("unable to add label[%v]: %v", l, err)}
   229  			}
   230  		}
   231  	}
   232  	// Forget anything we know about this client. Other servers could have
   233  	// now-stale data, but this client is likely to stick with us due to
   234  	// connection reuse.
   235  	s.cc.Update(cid, nil)
   236  	return nil
   237  }
   238  
   239  // processResourceUsage processes a ResourceUsageData message.
   240  func (s *systemService) processResourceUsage(ctx context.Context, cid common.ClientID, d *anypb.Any, v *fspb.ValidationInfo) error {
   241  	rud := &mpb.ResourceUsageData{}
   242  	if err := d.UnmarshalTo(rud); err != nil {
   243  		return fmt.Errorf("unable to unmarshal data as ResourceUsageData: %v", err)
   244  	}
   245  
   246  	cd, err := s.sctx.GetClientData(ctx, cid)
   247  	if err != nil {
   248  		return fmt.Errorf("failed to get client data for %v: %v", cid, err)
   249  	}
   250  	s.stats.ResourceUsageDataReceived(cd, rud, v)
   251  	if err := s.datastore.RecordResourceUsageData(ctx, cid, rud); err != nil {
   252  		err = fmt.Errorf("failed to write resource-usage data: %v", err)
   253  		return err
   254  	}
   255  	return nil
   256  }
   257  
   258  // processKillNotification handles kill-notifications sent by clients.
   259  func (s *systemService) processKillNotification(ctx context.Context, cid common.ClientID, d *anypb.Any) error {
   260  	kn := &mpb.KillNotification{}
   261  	if err := d.UnmarshalTo(kn); err != nil {
   262  		return fmt.Errorf("unable to unmarshal KillNotification: %v", err)
   263  	}
   264  
   265  	if killNotificationLogLimiter.Allow() {
   266  		log.Warningf("Received kill notification from %s: [service: %s, reason: %s]", cid, kn.Service, kn.Reason)
   267  	}
   268  
   269  	cd, err := s.sctx.GetClientData(ctx, cid)
   270  	if err != nil {
   271  		return fmt.Errorf("failed to get client data for %v: %v", cid, err)
   272  	}
   273  	s.stats.KillNotificationReceived(cd, kn)
   274  	return nil
   275  }