github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/server/internal/services/system_service.go (about) 1 // Copyright 2017 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package services 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "sync" 22 "time" 23 24 log "github.com/golang/glog" 25 "golang.org/x/time/rate" 26 27 "github.com/google/fleetspeak/fleetspeak/src/common" 28 "github.com/google/fleetspeak/fleetspeak/src/server/db" 29 "github.com/google/fleetspeak/fleetspeak/src/server/internal/cache" 30 "github.com/google/fleetspeak/fleetspeak/src/server/service" 31 "github.com/google/fleetspeak/fleetspeak/src/server/stats" 32 33 fspb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak" 34 mpb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak_monitoring" 35 anypb "google.golang.org/protobuf/types/known/anypb" 36 ) 37 38 const ( 39 clientServiceName = "client" 40 ) 41 42 // Allow spot-checking of clients that send kill-notifications, but avoid logspam (at most one entry every 15 minutes). 43 var killNotificationLogLimiter = rate.NewLimiter(rate.Every(15*time.Minute), 1) 44 45 // A systemService contains references to all the components we need to 46 // operate. It is populated directly by MakeServer as a special case, as the 47 // Datastore isn't provided to normal services. 48 type systemService struct { 49 sctx service.Context 50 stats stats.Collector 51 datastore db.Store 52 w sync.WaitGroup 53 cc *cache.Clients 54 } 55 56 func (s *systemService) Start(sctx service.Context) error { 57 s.sctx = sctx 58 return nil 59 } 60 61 func (s *systemService) Stop() error { 62 return nil 63 } 64 65 func (s *systemService) ProcessMessage(ctx context.Context, m *fspb.Message) error { 66 mid, _ := common.BytesToMessageID(m.MessageId) 67 if m.Source == nil { 68 return errors.New("source is nil") 69 } 70 cid, err := common.BytesToClientID(m.Source.ClientId) 71 if err != nil || cid.IsNil() { 72 return fmt.Errorf("invalid source client id[%v]: %v", m.Source.ClientId, err) 73 } 74 // all of our messages should have data 75 if m.Data == nil { 76 return errors.New("no Data present") 77 } 78 79 switch m.MessageType { 80 case "MessageAck": 81 return s.processMessageAck(ctx, mid, cid, m.Data) 82 case "MessageError": 83 return s.processMessageError(ctx, cid, m.Data) 84 case "ClientInfo": 85 return s.processClientInfo(ctx, cid, m.Data) 86 case "ResourceUsage": 87 return s.processResourceUsage(ctx, cid, m.Data, m.ValidationInfo) 88 case "KillNotification": 89 return s.processKillNotification(ctx, cid, m.Data) 90 default: 91 } 92 93 return fmt.Errorf("unknown system message type: %v", m.MessageType) 94 } 95 96 // processMessageAck processes a message MessageAck from a client. 97 func (s *systemService) processMessageAck(ctx context.Context, mid common.MessageID, cid common.ClientID, d *anypb.Any) error { 98 data := &fspb.MessageAckData{} 99 if err := d.UnmarshalTo(data); err != nil { 100 return fmt.Errorf("unable to unmarshal data as MessageAckData: %v", err) 101 } 102 103 ids := make([]common.MessageID, 0, len(data.MessageIds)) 104 for _, b := range data.MessageIds { 105 id, err := common.BytesToMessageID(b) 106 if err != nil { 107 return fmt.Errorf("MessageAckData contains invalid message id[%v]: %v", b, err) 108 } 109 ids = append(ids, id) 110 } 111 112 msgs, err := s.datastore.GetMessages(ctx, ids, false) 113 if err != nil { 114 return service.TemporaryError{E: fmt.Errorf("unable to retrieve messages to ack: %v", err)} 115 } 116 117 for _, msg := range msgs { 118 if msg.Result == nil { 119 mmid, err := common.BytesToMessageID(msg.MessageId) 120 if err != nil { 121 log.Errorf("%v: retrieved message with bad message id[%v]: %v", mid, msg.MessageId, err) 122 continue 123 } 124 mcid, err := common.BytesToClientID(msg.Destination.ClientId) 125 if err != nil { 126 log.Errorf("%v: retrieved message[%v] with bad client id[%v]: %v", mid, mmid, msg.Destination.ClientId, err) 127 continue 128 } 129 if cid != mcid { 130 if msg.Source != nil && msg.Source.ServiceName == "system" && msg.MessageType == "RekeyRequest" { 131 // RekeyRequests are special - they are acked by the new client ID. Since 132 // the mcid is a random number, we'll assume that this client really did 133 // receive the RekeyRequest under its previous id. 134 log.Infof("%v: client [%v] acked RekeyRequest sent to [%v] - rekey complete.", mid, cid, mcid) 135 } else { 136 log.Errorf("%v: attempt by client [%v] to ack a message meant for client [%v]", mid, cid, mcid) 137 continue 138 } 139 } 140 if err := s.datastore.SetMessageResult(ctx, mcid, mmid, &fspb.MessageResult{ProcessedTime: db.NowProto()}); err != nil { 141 log.Errorf("%v: unable to mark message [%v] processed: %v", mid, mmid, err) 142 } 143 } 144 } 145 return nil 146 } 147 148 // processMessageError processes a MessageError message. 149 func (s *systemService) processMessageError(ctx context.Context, cid common.ClientID, d *anypb.Any) error { 150 data := &fspb.MessageErrorData{} 151 if err := d.UnmarshalTo(data); err != nil { 152 return fmt.Errorf("unable to unmarshal data as MessageErrorData: %v", err) 153 } 154 155 id, err := common.BytesToMessageID(data.MessageId) 156 if err != nil { 157 return fmt.Errorf("MessageErr Data contains bad message id[%v]: %v", data.MessageId, err) 158 } 159 160 msgs, err := s.datastore.GetMessages(ctx, []common.MessageID{id}, false) 161 if err != nil { 162 return service.TemporaryError{E: fmt.Errorf("error from GetMessage([]{%v}): %v", id, err)} 163 } 164 if len(msgs) != 1 { 165 return fmt.Errorf("expected one result from GetMessages, got %v", len(msgs)) 166 } 167 msg := msgs[0] 168 mcid, err := common.BytesToClientID(msg.Destination.ClientId) 169 if err != nil { 170 return fmt.Errorf("retrieved message [%v] has bad client id[%v]: %v", id, msg.Destination.ClientId, err) 171 } 172 if mcid != cid { 173 return fmt.Errorf("attempt by client [%v] to ack a message meant for client [%v]", cid, mcid) 174 } 175 if err := s.datastore.SetMessageResult(ctx, mcid, id, 176 &fspb.MessageResult{ 177 ProcessedTime: db.NowProto(), 178 Failed: true, 179 FailedReason: data.Error, 180 }); err != nil { 181 return service.TemporaryError{E: fmt.Errorf("unable to mark message [%v] as failed: %v", id, err)} 182 } 183 return nil 184 } 185 186 // processClientInfo processes a ClientInfo message. 187 func (s *systemService) processClientInfo(ctx context.Context, cid common.ClientID, d *anypb.Any) error { 188 data := &fspb.ClientInfoData{} 189 if err := d.UnmarshalTo(data); err != nil { 190 return fmt.Errorf("unable to unmarshal data as ClientInfoData: %v", err) 191 } 192 cd, err := s.datastore.GetClientData(ctx, cid) 193 if err != nil { 194 return service.TemporaryError{E: fmt.Errorf("GetClientData(%v) failed: %v", cid, err)} 195 } 196 197 // We create a set of the new client labels. 198 nl := make(map[string]bool) 199 for _, l := range data.Labels { 200 if l.ServiceName != clientServiceName { 201 log.Errorf("attempt to set non-client label: %v", l) 202 continue 203 } 204 nl[l.Label] = true 205 } 206 207 // Remove labels not in nl, remember labels already present. 208 ol := make(map[string]bool) 209 for _, l := range cd.Labels { 210 if l.ServiceName == clientServiceName { 211 if !nl[l.Label] { 212 if err = s.datastore.RemoveClientLabel(ctx, cid, l); err != nil { 213 return service.TemporaryError{E: fmt.Errorf("unable to remove label[%v]: %v", l, err)} 214 } 215 } else { 216 ol[l.Label] = true 217 } 218 } 219 } 220 221 // Add labels from nl which are not yet present. 222 for _, l := range data.Labels { 223 if l.ServiceName != clientServiceName { 224 continue 225 } 226 if !ol[l.Label] { 227 if err = s.datastore.AddClientLabel(ctx, cid, l); err != nil { 228 return service.TemporaryError{E: fmt.Errorf("unable to add label[%v]: %v", l, err)} 229 } 230 } 231 } 232 // Forget anything we know about this client. Other servers could have 233 // now-stale data, but this client is likely to stick with us due to 234 // connection reuse. 235 s.cc.Update(cid, nil) 236 return nil 237 } 238 239 // processResourceUsage processes a ResourceUsageData message. 240 func (s *systemService) processResourceUsage(ctx context.Context, cid common.ClientID, d *anypb.Any, v *fspb.ValidationInfo) error { 241 rud := &mpb.ResourceUsageData{} 242 if err := d.UnmarshalTo(rud); err != nil { 243 return fmt.Errorf("unable to unmarshal data as ResourceUsageData: %v", err) 244 } 245 246 cd, err := s.sctx.GetClientData(ctx, cid) 247 if err != nil { 248 return fmt.Errorf("failed to get client data for %v: %v", cid, err) 249 } 250 s.stats.ResourceUsageDataReceived(cd, rud, v) 251 if err := s.datastore.RecordResourceUsageData(ctx, cid, rud); err != nil { 252 err = fmt.Errorf("failed to write resource-usage data: %v", err) 253 return err 254 } 255 return nil 256 } 257 258 // processKillNotification handles kill-notifications sent by clients. 259 func (s *systemService) processKillNotification(ctx context.Context, cid common.ClientID, d *anypb.Any) error { 260 kn := &mpb.KillNotification{} 261 if err := d.UnmarshalTo(kn); err != nil { 262 return fmt.Errorf("unable to unmarshal KillNotification: %v", err) 263 } 264 265 if killNotificationLogLimiter.Allow() { 266 log.Warningf("Received kill notification from %s: [service: %s, reason: %s]", cid, kn.Service, kn.Reason) 267 } 268 269 cd, err := s.sctx.GetClientData(ctx, cid) 270 if err != nil { 271 return fmt.Errorf("failed to get client data for %v: %v", cid, err) 272 } 273 s.stats.KillNotificationReceived(cd, kn) 274 return nil 275 }