github.com/letsencrypt/boulder@v0.20251208.0/log/validator/validator.go

github.com/letsencrypt/boulder@v0.20251208.0/log/validator/validator.go (about)

     1  package validator
     2  
     3  import (
     4  	"context"
     5  	"encoding/base64"
     6  	"errors"
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/nxadm/tail"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"github.com/prometheus/client_golang/prometheus/promauto"
    17  
    18  	"github.com/letsencrypt/boulder/log"
    19  )
    20  
    21  var errInvalidChecksum = errors.New("invalid checksum length")
    22  
    23  type Validator struct {
    24  	// mu guards patterns and tailers to prevent Shutdown racing monitor
    25  	mu sync.Mutex
    26  
    27  	// patterns is the list of glob patterns to monitor with filepath.Glob for logs
    28  	patterns []string
    29  
    30  	// tailers is a map of filenames to the tailer which are currently being tailed
    31  	tailers map[string]*tail.Tail
    32  
    33  	// monitorCancel cancels the monitor's context, so it exits
    34  	monitorCancel context.CancelFunc
    35  
    36  	lineCounter *prometheus.CounterVec
    37  	log         log.Logger
    38  }
    39  
    40  // New Validator monitoring paths, which is a list of file globs.
    41  func New(patterns []string, logger log.Logger, stats prometheus.Registerer) *Validator {
    42  	lineCounter := promauto.With(stats).NewCounterVec(prometheus.CounterOpts{
    43  		Name: "log_lines",
    44  		Help: "A counter of log lines processed, with status",
    45  	}, []string{"filename", "status"})
    46  
    47  	monitorContext, monitorCancel := context.WithCancel(context.Background())
    48  
    49  	v := &Validator{
    50  		patterns:      patterns,
    51  		tailers:       map[string]*tail.Tail{},
    52  		log:           logger,
    53  		monitorCancel: monitorCancel,
    54  		lineCounter:   lineCounter,
    55  	}
    56  
    57  	go v.monitor(monitorContext)
    58  
    59  	return v
    60  }
    61  
    62  // pollPaths expands v.patterns and calls v.tailValidateFile on each resulting file
    63  func (v *Validator) pollPaths() {
    64  	v.mu.Lock()
    65  	defer v.mu.Unlock()
    66  	for _, pattern := range v.patterns {
    67  		paths, err := filepath.Glob(pattern)
    68  		if err != nil {
    69  			v.log.Err(err.Error())
    70  		}
    71  
    72  		for _, path := range paths {
    73  			if _, ok := v.tailers[path]; ok {
    74  				// We are already tailing this file
    75  				continue
    76  			}
    77  
    78  			t, err := tail.TailFile(path, tail.Config{
    79  				ReOpen:        true,
    80  				MustExist:     false, // sometimes files won't exist, so we must tolerate that
    81  				Follow:        true,
    82  				Logger:        tailLogger{v.log},
    83  				CompleteLines: true,
    84  			})
    85  			if err != nil {
    86  				// TailFile shouldn't error when MustExist is false
    87  				v.log.Errf("unexpected error from TailFile: %v", err)
    88  			}
    89  
    90  			go v.tailValidate(path, t.Lines)
    91  
    92  			v.tailers[path] = t
    93  		}
    94  	}
    95  }
    96  
    97  // Monitor calls v.pollPaths every minute until its context is cancelled
    98  func (v *Validator) monitor(ctx context.Context) {
    99  	for {
   100  		v.pollPaths()
   101  
   102  		// Wait a minute, unless cancelled
   103  		timer := time.NewTimer(time.Minute)
   104  		select {
   105  		case <-ctx.Done():
   106  			return
   107  		case <-timer.C:
   108  		}
   109  	}
   110  }
   111  
   112  func (v *Validator) tailValidate(filename string, lines chan *tail.Line) {
   113  	// Emit no more than 1 error line per second. This prevents consuming large
   114  	// amounts of disk space in case there is problem that causes all log lines to
   115  	// be invalid.
   116  	outputLimiter := time.NewTicker(time.Second)
   117  	defer outputLimiter.Stop()
   118  
   119  	for line := range lines {
   120  		if line.Err != nil {
   121  			v.log.Errf("error while tailing %s: %s", filename, line.Err)
   122  			continue
   123  		}
   124  		err := lineValid(line.Text)
   125  		if err != nil {
   126  			if errors.Is(err, errInvalidChecksum) {
   127  				v.lineCounter.WithLabelValues(filename, "invalid checksum length").Inc()
   128  			} else {
   129  				v.lineCounter.WithLabelValues(filename, "bad").Inc()
   130  			}
   131  			select {
   132  			case <-outputLimiter.C:
   133  				v.log.Errf("%s: %s %q", filename, err, line.Text)
   134  			default:
   135  			}
   136  		} else {
   137  			v.lineCounter.WithLabelValues(filename, "ok").Inc()
   138  		}
   139  	}
   140  }
   141  
   142  // Shutdown should be called before process shutdown
   143  func (v *Validator) Shutdown() {
   144  	v.mu.Lock()
   145  	defer v.mu.Unlock()
   146  
   147  	v.monitorCancel()
   148  
   149  	for _, t := range v.tailers {
   150  		// The tail module seems to have a race condition that will generate
   151  		// errors like this on shutdown:
   152  		// failed to stop tailing file: <filename>: Failed to detect creation of
   153  		// <filename>: inotify watcher has been closed
   154  		// This is probably related to the module's shutdown logic triggering the
   155  		// "reopen" code path for files that are removed and then recreated.
   156  		// These errors are harmless so we ignore them to allow clean shutdown.
   157  		_ = t.Stop()
   158  		t.Cleanup()
   159  	}
   160  }
   161  
   162  func lineValid(text string) error {
   163  	// Line format should match the following rsyslog omfile template:
   164  	//
   165  	//   template( name="LELogFormat" type="list" ) {
   166  	//  	property(name="timereported" dateFormat="rfc3339")
   167  	//  	constant(value=" ")
   168  	//  	property(name="hostname" field.delimiter="46" field.number="1")
   169  	//  	constant(value=" datacenter ")
   170  	//  	property(name="syslogseverity")
   171  	//  	constant(value=" ")
   172  	//  	property(name="syslogtag")
   173  	//  	property(name="msg" spifno1stsp="on" )
   174  	//  	property(name="msg" droplastlf="on" )
   175  	//  	constant(value="\n")
   176  	//   }
   177  	//
   178  	// This should result in a log line that looks like this:
   179  	//   timestamp hostname datacenter syslogseverity binary-name[pid]: checksum msg
   180  
   181  	fields := strings.Split(text, " ")
   182  	const errorPrefix = "log-validator:"
   183  	// Extract checksum from line
   184  	if len(fields) < 6 {
   185  		return fmt.Errorf("%s line doesn't match expected format", errorPrefix)
   186  	}
   187  	checksum := fields[5]
   188  	_, err := base64.RawURLEncoding.DecodeString(checksum)
   189  	if err != nil || len(checksum) != 6 {
   190  		return fmt.Errorf(
   191  			"%s expected a 6 character base64 raw URL decodable string, got %q: %w",
   192  			errorPrefix,
   193  			checksum,
   194  			errInvalidChecksum,
   195  		)
   196  	}
   197  
   198  	// Reconstruct just the message portion of the line
   199  	line := strings.Join(fields[6:], " ")
   200  
   201  	// If we are fed our own output, treat it as always valid. This
   202  	// prevents runaway scenarios where we generate ever-longer output.
   203  	if strings.Contains(text, errorPrefix) {
   204  		return nil
   205  	}
   206  	// Check the extracted checksum against the computed checksum
   207  	computedChecksum := log.LogLineChecksum(line)
   208  	if checksum != computedChecksum {
   209  		return fmt.Errorf("%s invalid checksum (expected %q, got %q)", errorPrefix, computedChecksum, checksum)
   210  	}
   211  	return nil
   212  }
   213  
   214  // ValidateFile validates a single file and returns
   215  func ValidateFile(filename string) error {
   216  	file, err := os.ReadFile(filename)
   217  	if err != nil {
   218  		return err
   219  	}
   220  	badFile := false
   221  	for i, line := range strings.Split(string(file), "\n") {
   222  		if line == "" {
   223  			continue
   224  		}
   225  		err := lineValid(line)
   226  		if err != nil {
   227  			badFile = true
   228  			fmt.Fprintf(os.Stderr, "[line %d] %s: %s\n", i+1, err, line)
   229  		}
   230  	}
   231  
   232  	if badFile {
   233  		return errors.New("file contained invalid lines")
   234  	}
   235  	return nil
   236  }