github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/client/system_service.go (about) 1 // Copyright 2017 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package client 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "os" 22 "sync" 23 "time" 24 25 anypb "google.golang.org/protobuf/types/known/anypb" 26 27 log "github.com/golang/glog" 28 "google.golang.org/protobuf/proto" 29 30 "github.com/google/fleetspeak/fleetspeak/src/client/internal/monitoring" 31 "github.com/google/fleetspeak/fleetspeak/src/client/service" 32 33 fspb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak" 34 ) 35 36 var ( 37 // StatsSamplePeriod is the frequency with which resource-usage data for the Fleetspeak 38 // process will be fetched from the OS. 39 StatsSamplePeriod = 30 * time.Second 40 41 // StatsSampleSize is the number of resource-usage query results that get aggregated into 42 // a single resource-usage report sent to Fleetspeak servers. 43 StatsSampleSize = 20 44 ) 45 46 const ( 47 // SuicideExitCode is used as a distinctive exit code to signify a client committing suicide. 48 SuicideExitCode = 7 49 ) 50 51 // systemService implements Service. It handles messages for the built in 52 // 'system' service. It is installed directly by client.New and is given direct 53 // access to the resulting Client object. 54 type systemService struct { 55 client *Client 56 sc service.Context 57 configChanges <-chan *fspb.ClientInfoData 58 close func() 59 } 60 61 func (s *systemService) Start(sc service.Context) error { 62 if s.close != nil { 63 return fmt.Errorf("system service is already started") 64 } 65 66 s.sc = sc 67 68 ctx, cancel := context.WithCancel(context.Background()) 69 var wg sync.WaitGroup 70 s.close = func() { 71 cancel() 72 wg.Wait() 73 } 74 75 rum, err := monitoring.New(s.sc, monitoring.ResourceUsageMonitorParams{ 76 Scope: "system", 77 Pid: s.client.pid, 78 ProcessStartTime: s.client.startTime, 79 MaxSamplePeriod: StatsSamplePeriod, 80 SampleSize: StatsSampleSize, 81 }) 82 if err != nil { 83 rum = nil 84 log.Errorf("Failed to start resource-usage monitor: %v", err) 85 } 86 wg.Add(4) 87 // TODO: call pollRevokedCerts on startup. 88 go func() { 89 defer wg.Done() 90 s.ackLoop(ctx) 91 }() 92 go func() { 93 defer wg.Done() 94 s.errLoop(ctx) 95 }() 96 go func() { 97 defer wg.Done() 98 s.cfgLoop(ctx) 99 }() 100 go func() { 101 defer wg.Done() 102 if rum != nil { 103 rum.Run(ctx) 104 } 105 }() 106 return nil 107 } 108 109 func (s *systemService) ProcessMessage(_ context.Context, m *fspb.Message) error { 110 switch m.MessageType { 111 case "RekeyRequest": 112 if err := s.client.config.Rekey(); err != nil { 113 // Very unlikely. 114 return fmt.Errorf("unable to rekey client: %v", err) 115 } 116 s.client.config.SendConfigUpdate() 117 case "Die": 118 dr := &fspb.DieRequest{} 119 if err := m.Data.UnmarshalTo(dr); err != nil { 120 return fmt.Errorf("can't unmarshal DieRequest: %v", err) 121 } 122 if dr.Force { 123 log.Info("Committing forced suicide on request.") 124 os.Exit(SuicideExitCode) 125 } else { 126 log.Info("Committing graceful suicide on request.") 127 // Stop the service and exit in a goroutine. As the "system" service 128 // is currently processing the "Die" message, trying to stop it would 129 // deadlock. We have to let the ProcessMessage return in order for 130 // s.client.Stop() to complete. 131 go func() { 132 s.client.Stop() 133 os.Exit(SuicideExitCode) 134 }() 135 } 136 137 case "RestartService": 138 rs := &fspb.RestartServiceRequest{} 139 if err := m.Data.UnmarshalTo(rs); err != nil { 140 return fmt.Errorf("can't unmarshal RestartServiceRequest: %v", err) 141 } 142 log.Infof("Restarting service %s", rs.Name) 143 144 if err := s.client.sc.RestartService(rs.Name); err != nil { 145 log.Errorf("Failed to restart service '%s': %v", rs.Name, err) 146 return err 147 } 148 log.Infof("Restarted service '%s'", rs.Name) 149 default: 150 return fmt.Errorf("unable to process message of type: %v", m.MessageType) 151 } 152 153 return nil 154 } 155 156 func (s *systemService) Stop() error { 157 if s.close != nil { 158 s.close() 159 s.close = nil 160 } 161 return nil 162 } 163 164 func (s *systemService) ackLoop(ctx context.Context) { 165 for { 166 select { 167 case <-ctx.Done(): 168 return 169 case mid := <-s.client.acks: 170 a := &fspb.MessageAckData{MessageIds: [][]byte{mid.Bytes()}} 171 t := time.NewTimer(time.Second) 172 groupLoop: 173 for { 174 select { 175 case <-ctx.Done(): 176 t.Stop() 177 return 178 case mid = <-s.client.acks: 179 a.MessageIds = append(a.MessageIds, mid.Bytes()) 180 case <-t.C: 181 break groupLoop 182 } 183 } 184 d, err := anypb.New(a) 185 if err != nil { 186 log.Fatalf("Unable to marshal MessageAckData: %v", err) 187 } 188 ctx, c := context.WithTimeout(context.Background(), 5*time.Second) 189 if err := s.sc.Send(ctx, service.AckMessage{ 190 M: &fspb.Message{ 191 Destination: &fspb.Address{ServiceName: "system"}, 192 MessageType: "MessageAck", 193 Priority: fspb.Message_HIGH, 194 Data: d, 195 Background: true, 196 }, 197 }); err != nil { 198 log.Errorf("Error acknowledging message: %v", err) 199 } 200 c() 201 } 202 } 203 } 204 205 func (s *systemService) errLoop(ctx context.Context) { 206 for { 207 select { 208 case <-ctx.Done(): 209 return 210 case e := <-s.client.errs: 211 d, err := anypb.New(e) 212 if err != nil { 213 log.Fatalf("Unable to marshal MessageErrData: %v", err) 214 } 215 ctx, c := context.WithTimeout(context.Background(), 5*time.Second) 216 if err := s.sc.Send(ctx, service.AckMessage{ 217 M: &fspb.Message{ 218 Destination: &fspb.Address{ServiceName: "system"}, 219 MessageType: "MessageError", 220 Priority: fspb.Message_HIGH, 221 Data: d, 222 Background: true, 223 }, 224 }); err != nil { 225 log.Errorf("Error reporting message error: %v", err) 226 } 227 c() 228 } 229 } 230 } 231 232 func (s *systemService) cfgLoop(ctx context.Context) { 233 certTicker := time.NewTicker(time.Hour) 234 defer certTicker.Stop() 235 for { 236 select { 237 case <-ctx.Done(): 238 return 239 case <-certTicker.C: 240 s.pollRevokedCerts() 241 case chg := <-s.configChanges: 242 d, err := anypb.New(chg) 243 if err != nil { 244 log.Fatalf("Unable to marshal ClientInfoData: %v", err) 245 } 246 ctx, c := context.WithTimeout(context.Background(), 5*time.Minute) 247 if err := s.sc.Send(ctx, service.AckMessage{ 248 M: &fspb.Message{ 249 Destination: &fspb.Address{ServiceName: "system"}, 250 MessageType: "ClientInfo", 251 Priority: fspb.Message_HIGH, 252 Data: d, 253 Background: true, 254 }, 255 }); err != nil { 256 log.Errorf("Error reporting configuration change: %v", err) 257 } 258 c() 259 } 260 } 261 } 262 263 func (s *systemService) pollRevokedCerts() { 264 ctx, c := context.WithTimeout(context.Background(), 30*time.Second) 265 defer c() 266 data, _, err := s.sc.GetFileIfModified(ctx, "RevokedCertificates", time.Time{}) 267 if err != nil { 268 log.Errorf("Unable to get revoked certificate list: %v", err) 269 return 270 } 271 defer data.Close() 272 273 b, err := io.ReadAll(data) 274 if err != nil { 275 log.Errorf("Unable to read revoked certificate list: %v", err) 276 return 277 } 278 if len(b) == 0 { 279 return 280 } 281 var l fspb.RevokedCertificateList 282 if err := proto.Unmarshal(b, &l); err != nil { 283 log.Errorf("Unable to parse revoked certificate list: %v", err) 284 return 285 } 286 s.client.config.AddRevokedSerials(l.Serials) 287 }