github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/admin/history/history.go (about)

     1  // Copyright 2017 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package history analyzes client contact history to compute statistics and
    16  // find anomalies.
    17  package history
    18  
    19  import (
    20  	"fmt"
    21  	"net"
    22  	"sort"
    23  	"time"
    24  
    25  	log "github.com/golang/glog"
    26  	tspb "google.golang.org/protobuf/types/known/timestamppb"
    27  
    28  	spb "github.com/google/fleetspeak/fleetspeak/src/server/proto/fleetspeak_server"
    29  )
    30  
    31  // Summary describes the result of analyzing a sequence of contacts made by
    32  // a single client id.
    33  //
    34  // The Splits, SplitPoints and Skips fields work together to recognize when a machine
    35  // is restored from backup or cloned:
    36  //
    37  // In normal operation they will all be 0.
    38  //
    39  // When a machine is restored from a backup, restarted from a fixed VM image or
    40  // otherwise caused to use old FS state, we will count 1 Split and 1 Skip for
    41  // every restore. We also count 1 SplitPoint for every image that we restore
    42  // from.
    43  //
    44  // NOTE: All SplitPoints occurring before the time range of contacts we are
    45  // given are merged together. This this allows us to more accurately count past
    46  // Splits, but means we might under count SplitPoints.
    47  //
    48  // When a machine is cloned n ways, Splits, SplitPoints and Skips will be
    49  // counted as we would for n restores. However, we'll also see ~n Skips per poll
    50  // interval (default poll interval is 5 min). Therefore Skips > Splits is
    51  // evidence that a machine has been cloned.
    52  type Summary struct {
    53  	Start, End  time.Time // First and last contact analyzed.
    54  	Count       int       // Number of contacts analyzed.
    55  	IPCount     int       // Number of distinct IPs observed.
    56  	Splits      int       // Number of excess references to nonces.
    57  	SplitPoints int       // Number of distinct nonces with more than 1 reference.
    58  	Skips       int       // Number of points which reference a nonce other than the immediately previous contact.
    59  }
    60  
    61  type contactSlice []*spb.ClientContact
    62  
    63  func (s contactSlice) Len() int      { return len(s) }
    64  func (s contactSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
    65  func (s contactSlice) Less(i, j int) bool {
    66  	return tsLess(s[i].Timestamp, s[j].Timestamp)
    67  }
    68  
    69  func tsLess(i, j *tspb.Timestamp) bool {
    70  	return i.Seconds < j.Seconds || (i.Seconds == j.Seconds && i.Nanos < j.Nanos)
    71  }
    72  
    73  // Summarize computes a Summary for list of contacts.
    74  func Summarize(cs []*spb.ClientContact) (*Summary, error) {
    75  	if err := validate(cs); err != nil {
    76  		return nil, err
    77  	}
    78  	if len(cs) == 0 {
    79  		return &Summary{}, nil
    80  	}
    81  
    82  	sort.Sort(contactSlice(cs))
    83  
    84  	nm := nonceMap(cs)
    85  	if err := validateTime(nm); err != nil {
    86  		return nil, err
    87  	}
    88  
    89  	splits, splitPoints := countSplits(nm)
    90  	skips := countSkips(cs)
    91  
    92  	start := cs[0].Timestamp.AsTime()
    93  	end := cs[len(cs)-1].Timestamp.AsTime()
    94  
    95  	return &Summary{
    96  		Start:       start,
    97  		End:         end,
    98  		Count:       len(cs),
    99  		IPCount:     countIPs(cs),
   100  		Splits:      splits,
   101  		SplitPoints: splitPoints,
   102  		Skips:       skips,
   103  	}, nil
   104  }
   105  
   106  func validate(contacts []*spb.ClientContact) error {
   107  	times := make(map[int64]bool)
   108  	nonces := make(map[uint64]bool)
   109  	for _, c := range contacts {
   110  		// The datastore assigns and is responsible for preventing duplicate contact
   111  		// timestamps. If we see a duplicate, it is a data error that might confuse
   112  		// other analysis.
   113  		if err := c.Timestamp.CheckValid(); err != nil {
   114  			return fmt.Errorf("bad timestamp proto [%v]: %v", c.Timestamp, err)
   115  		}
   116  		t := c.Timestamp.AsTime()
   117  		ts := t.UnixNano()
   118  		if times[ts] {
   119  			return fmt.Errorf("duplicate timestamp: %v", t)
   120  		}
   121  		times[ts] = true
   122  
   123  		// The sent nonce is a 64 bit number that should have been chosen by the
   124  		// server for each contact using a strong RNG. If we see a duplicate it is a
   125  		// data error that might confuse other analysis.
   126  		if nonces[c.SentNonce] {
   127  			return fmt.Errorf("duplicate sent nonce: %d", c.SentNonce)
   128  		}
   129  		nonces[c.SentNonce] = true
   130  	}
   131  	return nil
   132  }
   133  
   134  func validateTime(nm map[uint64]*spb.ClientContact) error {
   135  
   136  	for _, i := range nm {
   137  		if i.ReceivedNonce == 0 {
   138  			continue
   139  		}
   140  		if p := nm[i.ReceivedNonce]; p != nil {
   141  			if !tsLess(p.Timestamp, i.Timestamp) {
   142  				// The nonce received from a client cannot reference a nonce produced by
   143  				// the server in the future. If this seems to have happened, it is a
   144  				// data error that could confuse other analysis.
   145  				return fmt.Errorf("nonce at [%v] references future time [%v]", i.Timestamp, p.Timestamp)
   146  			}
   147  		}
   148  	}
   149  	return nil
   150  }
   151  
   152  func countIPs(contacts []*spb.ClientContact) int {
   153  	m := make(map[string]bool)
   154  	for _, c := range contacts {
   155  		h, _, err := net.SplitHostPort(c.ObservedAddress)
   156  		if err != nil {
   157  			log.Warningf("Unable to parse ObservedAddress [%s], ignoring: %v", c.ObservedAddress, err)
   158  			continue
   159  		}
   160  		m[h] = true
   161  	}
   162  	return len(m)
   163  }
   164  
   165  func nonceMap(cs []*spb.ClientContact) map[uint64]*spb.ClientContact {
   166  	n := make(map[uint64]*spb.ClientContact)
   167  	for _, c := range cs {
   168  		n[c.SentNonce] = c
   169  	}
   170  	return n
   171  }
   172  
   173  func countSplits(nm map[uint64]*spb.ClientContact) (splits, splitPoints int) {
   174  	// A count of how many contacts target each nonce.
   175  	ts := make(map[uint64]int)
   176  	for _, c := range nm {
   177  		// Anything not in nm must be too old - coalesce with the 0 target.
   178  		if c.ReceivedNonce == 0 || nm[c.ReceivedNonce] == nil {
   179  			ts[0]++
   180  		} else {
   181  			ts[c.ReceivedNonce]++
   182  		}
   183  	}
   184  	for _, t := range ts {
   185  		if t > 1 {
   186  			splitPoints++
   187  			splits += t - 1
   188  		}
   189  	}
   190  	return splits, splitPoints
   191  }
   192  
   193  func countSkips(cs []*spb.ClientContact) int {
   194  	var s int
   195  	for i := 1; i < len(cs); i++ {
   196  		if cs[i].ReceivedNonce != cs[i-1].SentNonce {
   197  			s++
   198  		}
   199  	}
   200  	return s
   201  }