github.com/google/cadvisor@v0.49.1/summary/summary.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Maintains the summary of aggregated minute, hour, and day stats.
    16  // For a container running for more than a day, amount of tracked data can go up to
    17  // 40 KB when cpu and memory are tracked. We'll start by enabling collection for the
    18  // node, followed by docker, and then all containers as we understand the usage pattern
    19  // better
    20  // TODO(rjnagal): Optimize the size if we start running it for every container.
    21  package summary
    22  
    23  import (
    24  	"fmt"
    25  	"sync"
    26  	"time"
    27  
    28  	v1 "github.com/google/cadvisor/info/v1"
    29  	info "github.com/google/cadvisor/info/v2"
    30  )
    31  
    32  // Usage fields we track for generating percentiles.
    33  type secondSample struct {
    34  	Timestamp time.Time // time when the sample was recorded.
    35  	Cpu       uint64    // cpu usage
    36  	Memory    uint64    // memory usage
    37  }
    38  
    39  type availableResources struct {
    40  	Cpu    bool
    41  	Memory bool
    42  }
    43  
    44  type StatsSummary struct {
    45  	// Resources being tracked for this container.
    46  	available availableResources
    47  	// list of second samples. The list is cleared when a new minute samples is generated.
    48  	secondSamples []*secondSample
    49  	// minute percentiles. We track 24 * 60 maximum samples.
    50  	minuteSamples *SamplesBuffer
    51  	// latest derived instant, minute, hour, and day stats. Instant sample updated every second.
    52  	// Others updated every minute.
    53  	derivedStats info.DerivedStats // Guarded by dataLock.
    54  	dataLock     sync.RWMutex
    55  }
    56  
    57  // Adds a new seconds sample.
    58  // If enough seconds samples are collected, a minute sample is generated and derived
    59  // stats are updated.
    60  func (s *StatsSummary) AddSample(stat v1.ContainerStats) error {
    61  	sample := secondSample{}
    62  	sample.Timestamp = stat.Timestamp
    63  	if s.available.Cpu {
    64  		sample.Cpu = stat.Cpu.Usage.Total
    65  	}
    66  	if s.available.Memory {
    67  		sample.Memory = stat.Memory.WorkingSet
    68  	}
    69  	s.secondSamples = append(s.secondSamples, &sample)
    70  	s.updateLatestUsage()
    71  	// TODO(jnagal): Use 'available' to avoid unnecessary computation.
    72  	numSamples := len(s.secondSamples)
    73  	elapsed := time.Nanosecond
    74  	if numSamples > 1 {
    75  		start := s.secondSamples[0].Timestamp
    76  		end := s.secondSamples[numSamples-1].Timestamp
    77  		elapsed = end.Sub(start)
    78  	}
    79  	if elapsed > 60*time.Second {
    80  		// Make a minute sample. This works with dynamic housekeeping as long
    81  		// as we keep max dynamic housekeeping period close to a minute.
    82  		minuteSample := GetMinutePercentiles(s.secondSamples)
    83  		// Clear seconds samples. Keep the latest sample for continuity.
    84  		// Copying and resizing helps avoid slice re-allocation.
    85  		s.secondSamples[0] = s.secondSamples[numSamples-1]
    86  		s.secondSamples = s.secondSamples[:1]
    87  		s.minuteSamples.Add(minuteSample)
    88  		err := s.updateDerivedStats()
    89  		if err != nil {
    90  			return err
    91  		}
    92  	}
    93  	return nil
    94  }
    95  
    96  func (s *StatsSummary) updateLatestUsage() {
    97  	usage := info.InstantUsage{}
    98  	numStats := len(s.secondSamples)
    99  	if numStats < 1 {
   100  		return
   101  	}
   102  	latest := s.secondSamples[numStats-1]
   103  	usage.Memory = latest.Memory
   104  	if numStats > 1 {
   105  		previous := s.secondSamples[numStats-2]
   106  		cpu, err := getCPURate(*latest, *previous)
   107  		if err == nil {
   108  			usage.Cpu = cpu
   109  		}
   110  	}
   111  
   112  	s.dataLock.Lock()
   113  	defer s.dataLock.Unlock()
   114  	s.derivedStats.LatestUsage = usage
   115  	s.derivedStats.Timestamp = latest.Timestamp
   116  }
   117  
   118  // Generate new derived stats based on current minute stats samples.
   119  func (s *StatsSummary) updateDerivedStats() error {
   120  	derived := info.DerivedStats{}
   121  	derived.Timestamp = time.Now()
   122  	minuteSamples := s.minuteSamples.RecentStats(1)
   123  	if len(minuteSamples) != 1 {
   124  		return fmt.Errorf("failed to retrieve minute stats")
   125  	}
   126  	derived.MinuteUsage = *minuteSamples[0]
   127  	hourUsage, err := s.getDerivedUsage(60)
   128  	if err != nil {
   129  		return fmt.Errorf("failed to compute hour stats: %v", err)
   130  	}
   131  	dayUsage, err := s.getDerivedUsage(60 * 24)
   132  	if err != nil {
   133  		return fmt.Errorf("failed to compute day usage: %v", err)
   134  	}
   135  	derived.HourUsage = hourUsage
   136  	derived.DayUsage = dayUsage
   137  
   138  	s.dataLock.Lock()
   139  	defer s.dataLock.Unlock()
   140  	derived.LatestUsage = s.derivedStats.LatestUsage
   141  	s.derivedStats = derived
   142  
   143  	return nil
   144  }
   145  
   146  // helper method to get hour and daily derived stats
   147  func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
   148  	if n < 1 {
   149  		return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n)
   150  	}
   151  	samples := s.minuteSamples.RecentStats(n)
   152  	numSamples := len(samples)
   153  	if numSamples < 1 {
   154  		return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats")
   155  	}
   156  	// We generate derived stats even with partial data.
   157  	usage := GetDerivedPercentiles(samples)
   158  	// Assumes we have equally placed minute samples.
   159  	usage.PercentComplete = int32(numSamples * 100 / n)
   160  	return usage, nil
   161  }
   162  
   163  // Return the latest calculated derived stats.
   164  func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) {
   165  	s.dataLock.RLock()
   166  	defer s.dataLock.RUnlock()
   167  
   168  	return s.derivedStats, nil
   169  }
   170  
   171  func New(spec v1.ContainerSpec) (*StatsSummary, error) {
   172  	summary := StatsSummary{}
   173  	if spec.HasCpu {
   174  		summary.available.Cpu = true
   175  	}
   176  	if spec.HasMemory {
   177  		summary.available.Memory = true
   178  	}
   179  	if !summary.available.Cpu && !summary.available.Memory {
   180  		return nil, fmt.Errorf("none of the resources are being tracked")
   181  	}
   182  	summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
   183  	return &summary, nil
   184  }