github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/client/system_service.go (about)

     1  // Copyright 2017 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package client
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"os"
    22  	"sync"
    23  	"time"
    24  
    25  	anypb "google.golang.org/protobuf/types/known/anypb"
    26  
    27  	log "github.com/golang/glog"
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"github.com/google/fleetspeak/fleetspeak/src/client/internal/monitoring"
    31  	"github.com/google/fleetspeak/fleetspeak/src/client/service"
    32  
    33  	fspb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak"
    34  )
    35  
    36  var (
    37  	// StatsSamplePeriod is the frequency with which resource-usage data for the Fleetspeak
    38  	// process will be fetched from the OS.
    39  	StatsSamplePeriod = 30 * time.Second
    40  
    41  	// StatsSampleSize is the number of resource-usage query results that get aggregated into
    42  	// a single resource-usage report sent to Fleetspeak servers.
    43  	StatsSampleSize = 20
    44  )
    45  
    46  const (
    47  	// SuicideExitCode is used as a distinctive exit code to signify a client committing suicide.
    48  	SuicideExitCode = 7
    49  )
    50  
    51  // systemService implements Service. It handles messages for the built in
    52  // 'system' service. It is installed directly by client.New and is given direct
    53  // access to the resulting Client object.
    54  type systemService struct {
    55  	client        *Client
    56  	sc            service.Context
    57  	configChanges <-chan *fspb.ClientInfoData
    58  	close         func()
    59  }
    60  
    61  func (s *systemService) Start(sc service.Context) error {
    62  	if s.close != nil {
    63  		return fmt.Errorf("system service is already started")
    64  	}
    65  
    66  	s.sc = sc
    67  
    68  	ctx, cancel := context.WithCancel(context.Background())
    69  	var wg sync.WaitGroup
    70  	s.close = func() {
    71  		cancel()
    72  		wg.Wait()
    73  	}
    74  
    75  	rum, err := monitoring.New(s.sc, monitoring.ResourceUsageMonitorParams{
    76  		Scope:            "system",
    77  		Pid:              s.client.pid,
    78  		ProcessStartTime: s.client.startTime,
    79  		MaxSamplePeriod:  StatsSamplePeriod,
    80  		SampleSize:       StatsSampleSize,
    81  	})
    82  	if err != nil {
    83  		rum = nil
    84  		log.Errorf("Failed to start resource-usage monitor: %v", err)
    85  	}
    86  	wg.Add(4)
    87  	// TODO: call pollRevokedCerts on startup.
    88  	go func() {
    89  		defer wg.Done()
    90  		s.ackLoop(ctx)
    91  	}()
    92  	go func() {
    93  		defer wg.Done()
    94  		s.errLoop(ctx)
    95  	}()
    96  	go func() {
    97  		defer wg.Done()
    98  		s.cfgLoop(ctx)
    99  	}()
   100  	go func() {
   101  		defer wg.Done()
   102  		if rum != nil {
   103  			rum.Run(ctx)
   104  		}
   105  	}()
   106  	return nil
   107  }
   108  
   109  func (s *systemService) ProcessMessage(_ context.Context, m *fspb.Message) error {
   110  	switch m.MessageType {
   111  	case "RekeyRequest":
   112  		if err := s.client.config.Rekey(); err != nil {
   113  			// Very unlikely.
   114  			return fmt.Errorf("unable to rekey client: %v", err)
   115  		}
   116  		s.client.config.SendConfigUpdate()
   117  	case "Die":
   118  		dr := &fspb.DieRequest{}
   119  		if err := m.Data.UnmarshalTo(dr); err != nil {
   120  			return fmt.Errorf("can't unmarshal DieRequest: %v", err)
   121  		}
   122  		if dr.Force {
   123  			log.Info("Committing forced suicide on request.")
   124  			os.Exit(SuicideExitCode)
   125  		} else {
   126  			log.Info("Committing graceful suicide on request.")
   127  			// Stop the service and exit in a goroutine. As the "system" service
   128  			// is currently processing the "Die" message, trying to stop it would
   129  			// deadlock. We have to let the ProcessMessage return in order for
   130  			// s.client.Stop() to complete.
   131  			go func() {
   132  				s.client.Stop()
   133  				os.Exit(SuicideExitCode)
   134  			}()
   135  		}
   136  
   137  	case "RestartService":
   138  		rs := &fspb.RestartServiceRequest{}
   139  		if err := m.Data.UnmarshalTo(rs); err != nil {
   140  			return fmt.Errorf("can't unmarshal RestartServiceRequest: %v", err)
   141  		}
   142  		log.Infof("Restarting service %s", rs.Name)
   143  
   144  		if err := s.client.sc.RestartService(rs.Name); err != nil {
   145  			log.Errorf("Failed to restart service '%s': %v", rs.Name, err)
   146  			return err
   147  		}
   148  		log.Infof("Restarted service '%s'", rs.Name)
   149  	default:
   150  		return fmt.Errorf("unable to process message of type: %v", m.MessageType)
   151  	}
   152  
   153  	return nil
   154  }
   155  
   156  func (s *systemService) Stop() error {
   157  	if s.close != nil {
   158  		s.close()
   159  		s.close = nil
   160  	}
   161  	return nil
   162  }
   163  
   164  func (s *systemService) ackLoop(ctx context.Context) {
   165  	for {
   166  		select {
   167  		case <-ctx.Done():
   168  			return
   169  		case mid := <-s.client.acks:
   170  			a := &fspb.MessageAckData{MessageIds: [][]byte{mid.Bytes()}}
   171  			t := time.NewTimer(time.Second)
   172  		groupLoop:
   173  			for {
   174  				select {
   175  				case <-ctx.Done():
   176  					t.Stop()
   177  					return
   178  				case mid = <-s.client.acks:
   179  					a.MessageIds = append(a.MessageIds, mid.Bytes())
   180  				case <-t.C:
   181  					break groupLoop
   182  				}
   183  			}
   184  			d, err := anypb.New(a)
   185  			if err != nil {
   186  				log.Fatalf("Unable to marshal MessageAckData: %v", err)
   187  			}
   188  			ctx, c := context.WithTimeout(context.Background(), 5*time.Second)
   189  			if err := s.sc.Send(ctx, service.AckMessage{
   190  				M: &fspb.Message{
   191  					Destination: &fspb.Address{ServiceName: "system"},
   192  					MessageType: "MessageAck",
   193  					Priority:    fspb.Message_HIGH,
   194  					Data:        d,
   195  					Background:  true,
   196  				},
   197  			}); err != nil {
   198  				log.Errorf("Error acknowledging message: %v", err)
   199  			}
   200  			c()
   201  		}
   202  	}
   203  }
   204  
   205  func (s *systemService) errLoop(ctx context.Context) {
   206  	for {
   207  		select {
   208  		case <-ctx.Done():
   209  			return
   210  		case e := <-s.client.errs:
   211  			d, err := anypb.New(e)
   212  			if err != nil {
   213  				log.Fatalf("Unable to marshal MessageErrData: %v", err)
   214  			}
   215  			ctx, c := context.WithTimeout(context.Background(), 5*time.Second)
   216  			if err := s.sc.Send(ctx, service.AckMessage{
   217  				M: &fspb.Message{
   218  					Destination: &fspb.Address{ServiceName: "system"},
   219  					MessageType: "MessageError",
   220  					Priority:    fspb.Message_HIGH,
   221  					Data:        d,
   222  					Background:  true,
   223  				},
   224  			}); err != nil {
   225  				log.Errorf("Error reporting message error: %v", err)
   226  			}
   227  			c()
   228  		}
   229  	}
   230  }
   231  
   232  func (s *systemService) cfgLoop(ctx context.Context) {
   233  	certTicker := time.NewTicker(time.Hour)
   234  	defer certTicker.Stop()
   235  	for {
   236  		select {
   237  		case <-ctx.Done():
   238  			return
   239  		case <-certTicker.C:
   240  			s.pollRevokedCerts()
   241  		case chg := <-s.configChanges:
   242  			d, err := anypb.New(chg)
   243  			if err != nil {
   244  				log.Fatalf("Unable to marshal ClientInfoData: %v", err)
   245  			}
   246  			ctx, c := context.WithTimeout(context.Background(), 5*time.Minute)
   247  			if err := s.sc.Send(ctx, service.AckMessage{
   248  				M: &fspb.Message{
   249  					Destination: &fspb.Address{ServiceName: "system"},
   250  					MessageType: "ClientInfo",
   251  					Priority:    fspb.Message_HIGH,
   252  					Data:        d,
   253  					Background:  true,
   254  				},
   255  			}); err != nil {
   256  				log.Errorf("Error reporting configuration change: %v", err)
   257  			}
   258  			c()
   259  		}
   260  	}
   261  }
   262  
   263  func (s *systemService) pollRevokedCerts() {
   264  	ctx, c := context.WithTimeout(context.Background(), 30*time.Second)
   265  	defer c()
   266  	data, _, err := s.sc.GetFileIfModified(ctx, "RevokedCertificates", time.Time{})
   267  	if err != nil {
   268  		log.Errorf("Unable to get revoked certificate list: %v", err)
   269  		return
   270  	}
   271  	defer data.Close()
   272  
   273  	b, err := io.ReadAll(data)
   274  	if err != nil {
   275  		log.Errorf("Unable to read revoked certificate list: %v", err)
   276  		return
   277  	}
   278  	if len(b) == 0 {
   279  		return
   280  	}
   281  	var l fspb.RevokedCertificateList
   282  	if err := proto.Unmarshal(b, &l); err != nil {
   283  		log.Errorf("Unable to parse revoked certificate list: %v", err)
   284  		return
   285  	}
   286  	s.client.config.AddRevokedSerials(l.Serials)
   287  }