github.com/quay/claircore@v1.5.28/debian/updater.go (about) 1 package debian 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "encoding/json" 8 "fmt" 9 "io" 10 "net/http" 11 "net/textproto" 12 "net/url" 13 "path" 14 "regexp" 15 "strconv" 16 "strings" 17 18 "github.com/quay/zlog" 19 20 "github.com/quay/claircore/libvuln/driver" 21 "github.com/quay/claircore/pkg/tmp" 22 ) 23 24 //doc:url updater 25 const ( 26 defaultMirror = `https://deb.debian.org/` 27 defaultJSON = `https://security-tracker.debian.org/tracker/data/json` 28 ) 29 30 var ( 31 _ driver.UpdaterSetFactory = (*Factory)(nil) 32 _ driver.Configurable = (*Factory)(nil) 33 _ driver.Updater = (*updater)(nil) 34 _ driver.Configurable = (*updater)(nil) 35 ) 36 37 // Factory creates Updaters for all Debian distributions that exist 38 // in the mirror, and have entries in the JSON security tracker. 39 // 40 // [Configure] must be called before [UpdaterSet]. 41 type Factory struct { 42 c *http.Client 43 mirror *url.URL 44 json *url.URL 45 } 46 47 // NewFactory constructs a Factory. 48 // 49 // [Configure] must be called before [UpdaterSet]. 50 func NewFactory(_ context.Context) (*Factory, error) { 51 f := &Factory{} 52 return f, nil 53 } 54 55 // Configure implements [driver.Configurable]. 56 func (f *Factory) Configure(_ context.Context, cf driver.ConfigUnmarshaler, c *http.Client) error { 57 f.c = c 58 var cfg FactoryConfig 59 if err := cf(&cfg); err != nil { 60 return fmt.Errorf("debian: factory configuration error: %w", err) 61 } 62 63 if cfg.ArchiveURL != "" || cfg.OVALURL != "" { 64 return fmt.Errorf("debian: neither archive_url nor oval_url should be populated anymore; use json_url and mirror_url instead") 65 } 66 67 u, err := url.Parse(defaultMirror) 68 if cfg.MirrorURL != "" { 69 u, err = url.Parse(cfg.MirrorURL) 70 } 71 if err != nil { 72 return fmt.Errorf("debian: bad mirror URL: %w", err) 73 } 74 f.mirror, err = u.Parse("debian/") 75 if err != nil { 76 return fmt.Errorf("debian: bad mirror URL: %w", err) 77 } 78 79 f.json, err = url.Parse(defaultJSON) 80 if cfg.JSONURL != "" { 81 f.json, err = url.Parse(cfg.JSONURL) 82 } 83 if err != nil { 84 return fmt.Errorf("debian: bad JSON URL: %w", err) 85 } 86 87 return nil 88 } 89 90 // FactoryConfig is the configuration honored by the Factory. 91 // 92 // The "mirror" URLs expect to find HTML at "dists/" formatted like 93 // the HTML from the Debian project (that is to say, HTML containing relative links 94 // to distribution directories). 95 // 96 // The "mirror" URL needs a trailing slash. 97 // 98 // The "JSON" URL expects to find a JSON array of packages mapped to related vulnerabilities. 99 type FactoryConfig struct { 100 // ArchiveURL is a URL to a Debian archive. 101 // 102 // Deprecated: Only MirrorURL should be used. 103 ArchiveURL string `json:"archive_url" yaml:"archive_url"` 104 MirrorURL string `json:"mirror_url" yaml:"mirror_url"` 105 // OVALURL is a URL to a collection of OVAL XML documents. 106 // 107 // Deprecated: Use JSONURL instead. 108 OVALURL string `json:"oval_url" yaml:"oval_url"` 109 // JSONURL is a URL to a JSON vulnerability feed. 110 JSONURL string `json:"json_url" yaml:"json_url"` 111 } 112 113 var ( 114 // LinkRegexp is a bad regexp to extract link targets. 115 // This will break if Debian's codenames include a double-quote in the future. 116 linkRegexp = regexp.MustCompile(`href="([^"]+)"`) 117 // SkipList is a list of strings that, experimentally, indicate the string 118 // is not a codename. 119 skipList = []string{ 120 "-", "Debian", "sid", "stable", "testing", "experimental", "README", "updates", "backports", 121 } 122 ) 123 124 // UpdaterSet implements [driver.UpdaterSetFactory]. 125 func (f *Factory) UpdaterSet(ctx context.Context) (driver.UpdaterSet, error) { 126 s := driver.NewUpdaterSet() 127 128 if err := f.findReleases(ctx, f.mirror); err != nil { 129 return s, fmt.Errorf("debian: examining remote: %w", err) 130 } 131 132 // TODO: Consider returning stub if Last-Modified has not updated. 133 u := &updater{ 134 jsonURL: f.json.String(), 135 } 136 137 if err := s.Add(u); err != nil { 138 return s, fmt.Errorf("debian: unable to add updater: %w", err) 139 } 140 141 return s, nil 142 } 143 144 // FindReleases is split out as a method to make it easier to examine the mirror and the archive. 145 func (f *Factory) findReleases(ctx context.Context, u *url.URL) error { 146 dir, err := u.Parse("dists/") 147 if err != nil { 148 return fmt.Errorf("debian: unable to construct URL: %w", err) 149 } 150 req, err := http.NewRequestWithContext(ctx, http.MethodGet, dir.String(), nil) 151 if err != nil { 152 return fmt.Errorf("debian: unable to construct request: %w", err) 153 } 154 res, err := f.c.Do(req) 155 if err != nil { 156 return fmt.Errorf("debian: unable to do request: %w", err) 157 } 158 defer res.Body.Close() 159 switch res.StatusCode { 160 case http.StatusOK: 161 default: 162 return fmt.Errorf("debian: unexpected status fetching %q: %s", dir.String(), res.Status) 163 } 164 var buf bytes.Buffer 165 if _, err := buf.ReadFrom(res.Body); err != nil { 166 return fmt.Errorf("debian: unable to read dists listing: %w", err) 167 } 168 ms := linkRegexp.FindAllStringSubmatch(buf.String(), -1) 169 170 Listing: 171 for _, m := range ms { 172 dist := m[1] 173 switch { 174 case dist == "": 175 continue 176 case dist[0] == '/', dist[0] == '?': 177 continue 178 } 179 for _, s := range skipList { 180 if strings.Contains(dist, s) { 181 continue Listing 182 } 183 } 184 dist = strings.Trim(dist, "/") 185 rf, err := dir.Parse(path.Join(dist, `Release`)) 186 if err != nil { 187 zlog.Info(ctx). 188 Err(err). 189 Stringer("context", dir). 190 Str("target", path.Join(dist, `Release`)). 191 Msg("unable to construct URL") 192 continue 193 } 194 req, err := http.NewRequestWithContext(ctx, http.MethodGet, rf.String(), nil) 195 if err != nil { 196 zlog.Info(ctx). 197 Err(err). 198 Stringer("url", rf). 199 Msg("unable to construct request") 200 continue 201 } 202 req.Header.Set("range", "bytes=0-512") 203 res, err := f.c.Do(req) 204 if err != nil { 205 zlog.Info(ctx). 206 Err(err). 207 Stringer("url", rf). 208 Msg("unable to do request") 209 continue 210 } 211 buf.Reset() 212 buf.ReadFrom(res.Body) 213 res.Body.Close() 214 switch res.StatusCode { 215 case http.StatusPartialContent, http.StatusOK: 216 case http.StatusNotFound: // Probably extremely old, it's fine. 217 continue 218 default: 219 zlog.Info(ctx). 220 Str("status", res.Status). 221 Stringer("url", rf). 222 Msg("unexpected response") 223 continue 224 } 225 tp := textproto.NewReader(bufio.NewReader(io.MultiReader(&buf, bytes.NewReader([]byte("\r\n\r\n"))))) 226 h, err := tp.ReadMIMEHeader() 227 if err != nil { 228 zlog.Info(ctx).Err(err).Msg("unable to read MIME-ish headers") 229 continue 230 } 231 sv := h.Get("Version") 232 if sv == "" { 233 zlog.Debug(ctx).Str("dist", dist).Msg("no version assigned, skipping") 234 continue 235 } 236 vs := strings.Split(sv, ".") 237 if len(vs) == 1 { 238 zlog.Debug(ctx).Str("dist", dist).Msg("no version assigned, skipping") 239 continue 240 } 241 ver, err := strconv.ParseInt(vs[0], 10, 32) 242 if err != nil { 243 zlog.Info(ctx).Err(err).Msg("unable to parse version") 244 continue 245 } 246 247 mkDist(dist, int(ver)) 248 } 249 250 return nil 251 } 252 253 // Updater implements [driver.updater]. 254 type updater struct { 255 // jsonURL is the URL from which to fetch JSON vulnerability data 256 jsonURL string 257 258 c *http.Client 259 } 260 261 // UpdaterConfig is the configuration for the updater. 262 type UpdaterConfig struct { 263 // Deprecated: Use JSONURL instead. 264 OVALURL string `json:"url" yaml:"url"` 265 JSONURL string `json:"json_url" yaml:"json_url"` 266 // Deprecated: DistURL and DistsURLs are unused. 267 DistsURL string `json:"dists_url" yaml:"dists_url"` 268 DistsURLs []json.RawMessage `json:"dists_urls" yaml:"dists_urls"` 269 } 270 271 // Name implements [driver.Updater]. 272 func (u *updater) Name() string { 273 return "debian/updater" 274 } 275 276 // Configure implements [driver.Configurable]. 277 func (u *updater) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error { 278 ctx = zlog.ContextWithValues(ctx, "component", "debian/Updater.Configure") 279 u.c = c 280 var cfg UpdaterConfig 281 if err := f(&cfg); err != nil { 282 return err 283 } 284 285 if cfg.DistsURL != "" || cfg.OVALURL != "" { 286 zlog.Error(ctx).Msg("configured with deprecated URLs") 287 return fmt.Errorf("debian: neither url nor dists_url should be used anymore; use json_url and dists_urls instead") 288 } 289 290 if cfg.JSONURL != "" { 291 u.jsonURL = cfg.JSONURL 292 zlog.Info(ctx). 293 Msg("configured JSON database URL") 294 } 295 296 return nil 297 } 298 299 // Fetch implements [driver.Fetcher]. 300 func (u *updater) Fetch(ctx context.Context, fingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { 301 ctx = zlog.ContextWithValues(ctx, 302 "component", "debian/Updater.Fetch", 303 "database", u.jsonURL) 304 305 req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.jsonURL, nil) 306 if err != nil { 307 return nil, "", fmt.Errorf("failed to create request") 308 } 309 if fingerprint != "" { 310 req.Header.Set("If-Modified-Since", string(fingerprint)) 311 } 312 313 // fetch JSON database 314 resp, err := u.c.Do(req) 315 if resp != nil { 316 defer resp.Body.Close() 317 } 318 if err != nil { 319 return nil, "", fmt.Errorf("failed to retrieve JSON database: %v", err) 320 } 321 322 fp := resp.Header.Get("Last-Modified") 323 324 switch resp.StatusCode { 325 case http.StatusOK: 326 if fingerprint == "" || fp != string(fingerprint) { 327 zlog.Info(ctx).Msg("fetching latest JSON database") 328 break 329 } 330 fallthrough 331 case http.StatusNotModified: 332 return nil, fingerprint, driver.Unchanged 333 default: 334 return nil, "", fmt.Errorf("unexpected response: %v", resp.Status) 335 } 336 337 f, err := tmp.NewFile("", "debian.") 338 if err != nil { 339 return nil, "", err 340 } 341 342 var success bool 343 defer func() { 344 if !success { 345 if err := f.Close(); err != nil { 346 zlog.Warn(ctx).Err(err).Msg("unable to close spool") 347 } 348 } 349 }() 350 if _, err := io.Copy(f, resp.Body); err != nil { 351 return nil, "", fmt.Errorf("failed to read http body: %w", err) 352 } 353 if _, err := f.Seek(0, io.SeekStart); err != nil { 354 return nil, "", fmt.Errorf("failed to seek body: %w", err) 355 } 356 zlog.Info(ctx).Msg("fetched latest json database successfully") 357 358 success = true 359 return f, driver.Fingerprint(fp), err 360 }