github.com/letsencrypt/boulder@v0.20251208.0/log/validator/validator.go (about) 1 package validator 2 3 import ( 4 "context" 5 "encoding/base64" 6 "errors" 7 "fmt" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/nxadm/tail" 15 "github.com/prometheus/client_golang/prometheus" 16 "github.com/prometheus/client_golang/prometheus/promauto" 17 18 "github.com/letsencrypt/boulder/log" 19 ) 20 21 var errInvalidChecksum = errors.New("invalid checksum length") 22 23 type Validator struct { 24 // mu guards patterns and tailers to prevent Shutdown racing monitor 25 mu sync.Mutex 26 27 // patterns is the list of glob patterns to monitor with filepath.Glob for logs 28 patterns []string 29 30 // tailers is a map of filenames to the tailer which are currently being tailed 31 tailers map[string]*tail.Tail 32 33 // monitorCancel cancels the monitor's context, so it exits 34 monitorCancel context.CancelFunc 35 36 lineCounter *prometheus.CounterVec 37 log log.Logger 38 } 39 40 // New Validator monitoring paths, which is a list of file globs. 41 func New(patterns []string, logger log.Logger, stats prometheus.Registerer) *Validator { 42 lineCounter := promauto.With(stats).NewCounterVec(prometheus.CounterOpts{ 43 Name: "log_lines", 44 Help: "A counter of log lines processed, with status", 45 }, []string{"filename", "status"}) 46 47 monitorContext, monitorCancel := context.WithCancel(context.Background()) 48 49 v := &Validator{ 50 patterns: patterns, 51 tailers: map[string]*tail.Tail{}, 52 log: logger, 53 monitorCancel: monitorCancel, 54 lineCounter: lineCounter, 55 } 56 57 go v.monitor(monitorContext) 58 59 return v 60 } 61 62 // pollPaths expands v.patterns and calls v.tailValidateFile on each resulting file 63 func (v *Validator) pollPaths() { 64 v.mu.Lock() 65 defer v.mu.Unlock() 66 for _, pattern := range v.patterns { 67 paths, err := filepath.Glob(pattern) 68 if err != nil { 69 v.log.Err(err.Error()) 70 } 71 72 for _, path := range paths { 73 if _, ok := v.tailers[path]; ok { 74 // We are already tailing this file 75 continue 76 } 77 78 t, err := tail.TailFile(path, tail.Config{ 79 ReOpen: true, 80 MustExist: false, // sometimes files won't exist, so we must tolerate that 81 Follow: true, 82 Logger: tailLogger{v.log}, 83 CompleteLines: true, 84 }) 85 if err != nil { 86 // TailFile shouldn't error when MustExist is false 87 v.log.Errf("unexpected error from TailFile: %v", err) 88 } 89 90 go v.tailValidate(path, t.Lines) 91 92 v.tailers[path] = t 93 } 94 } 95 } 96 97 // Monitor calls v.pollPaths every minute until its context is cancelled 98 func (v *Validator) monitor(ctx context.Context) { 99 for { 100 v.pollPaths() 101 102 // Wait a minute, unless cancelled 103 timer := time.NewTimer(time.Minute) 104 select { 105 case <-ctx.Done(): 106 return 107 case <-timer.C: 108 } 109 } 110 } 111 112 func (v *Validator) tailValidate(filename string, lines chan *tail.Line) { 113 // Emit no more than 1 error line per second. This prevents consuming large 114 // amounts of disk space in case there is problem that causes all log lines to 115 // be invalid. 116 outputLimiter := time.NewTicker(time.Second) 117 defer outputLimiter.Stop() 118 119 for line := range lines { 120 if line.Err != nil { 121 v.log.Errf("error while tailing %s: %s", filename, line.Err) 122 continue 123 } 124 err := lineValid(line.Text) 125 if err != nil { 126 if errors.Is(err, errInvalidChecksum) { 127 v.lineCounter.WithLabelValues(filename, "invalid checksum length").Inc() 128 } else { 129 v.lineCounter.WithLabelValues(filename, "bad").Inc() 130 } 131 select { 132 case <-outputLimiter.C: 133 v.log.Errf("%s: %s %q", filename, err, line.Text) 134 default: 135 } 136 } else { 137 v.lineCounter.WithLabelValues(filename, "ok").Inc() 138 } 139 } 140 } 141 142 // Shutdown should be called before process shutdown 143 func (v *Validator) Shutdown() { 144 v.mu.Lock() 145 defer v.mu.Unlock() 146 147 v.monitorCancel() 148 149 for _, t := range v.tailers { 150 // The tail module seems to have a race condition that will generate 151 // errors like this on shutdown: 152 // failed to stop tailing file: <filename>: Failed to detect creation of 153 // <filename>: inotify watcher has been closed 154 // This is probably related to the module's shutdown logic triggering the 155 // "reopen" code path for files that are removed and then recreated. 156 // These errors are harmless so we ignore them to allow clean shutdown. 157 _ = t.Stop() 158 t.Cleanup() 159 } 160 } 161 162 func lineValid(text string) error { 163 // Line format should match the following rsyslog omfile template: 164 // 165 // template( name="LELogFormat" type="list" ) { 166 // property(name="timereported" dateFormat="rfc3339") 167 // constant(value=" ") 168 // property(name="hostname" field.delimiter="46" field.number="1") 169 // constant(value=" datacenter ") 170 // property(name="syslogseverity") 171 // constant(value=" ") 172 // property(name="syslogtag") 173 // property(name="msg" spifno1stsp="on" ) 174 // property(name="msg" droplastlf="on" ) 175 // constant(value="\n") 176 // } 177 // 178 // This should result in a log line that looks like this: 179 // timestamp hostname datacenter syslogseverity binary-name[pid]: checksum msg 180 181 fields := strings.Split(text, " ") 182 const errorPrefix = "log-validator:" 183 // Extract checksum from line 184 if len(fields) < 6 { 185 return fmt.Errorf("%s line doesn't match expected format", errorPrefix) 186 } 187 checksum := fields[5] 188 _, err := base64.RawURLEncoding.DecodeString(checksum) 189 if err != nil || len(checksum) != 6 { 190 return fmt.Errorf( 191 "%s expected a 6 character base64 raw URL decodable string, got %q: %w", 192 errorPrefix, 193 checksum, 194 errInvalidChecksum, 195 ) 196 } 197 198 // Reconstruct just the message portion of the line 199 line := strings.Join(fields[6:], " ") 200 201 // If we are fed our own output, treat it as always valid. This 202 // prevents runaway scenarios where we generate ever-longer output. 203 if strings.Contains(text, errorPrefix) { 204 return nil 205 } 206 // Check the extracted checksum against the computed checksum 207 computedChecksum := log.LogLineChecksum(line) 208 if checksum != computedChecksum { 209 return fmt.Errorf("%s invalid checksum (expected %q, got %q)", errorPrefix, computedChecksum, checksum) 210 } 211 return nil 212 } 213 214 // ValidateFile validates a single file and returns 215 func ValidateFile(filename string) error { 216 file, err := os.ReadFile(filename) 217 if err != nil { 218 return err 219 } 220 badFile := false 221 for i, line := range strings.Split(string(file), "\n") { 222 if line == "" { 223 continue 224 } 225 err := lineValid(line) 226 if err != nil { 227 badFile = true 228 fmt.Fprintf(os.Stderr, "[line %d] %s: %s\n", i+1, err, line) 229 } 230 } 231 232 if badFile { 233 return errors.New("file contained invalid lines") 234 } 235 return nil 236 }