go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/explorer/bundle.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package explorer 5 6 import ( 7 "context" 8 "errors" 9 "io/fs" 10 "os" 11 "path/filepath" 12 "strings" 13 14 "github.com/rs/zerolog/log" 15 "github.com/segmentio/ksuid" 16 "go.mondoo.com/cnquery/checksums" 17 llx "go.mondoo.com/cnquery/llx" 18 "go.mondoo.com/cnquery/mrn" 19 "go.mondoo.com/cnquery/utils/multierr" 20 "sigs.k8s.io/yaml" 21 ) 22 23 const ( 24 MRN_RESOURCE_QUERY = "queries" 25 MRN_RESOURCE_QUERYPACK = "querypacks" 26 MRN_RESOURCE_ASSET = "assets" 27 ) 28 29 // BundleMap is a Bundle with easier access to its data 30 type BundleMap struct { 31 OwnerMrn string `json:"owner_mrn,omitempty"` 32 Packs map[string]*QueryPack `json:"packs,omitempty"` 33 Queries map[string]*Mquery `json:"queries,omitempty"` 34 Props map[string]*Mquery `json:"props,omitempty"` 35 } 36 37 // NewBundleMap creates a new empty initialized map 38 // dataLake (optional) connects an additional data layer which may provide queries/packs 39 func NewBundleMap(ownerMrn string) *BundleMap { 40 return &BundleMap{ 41 OwnerMrn: ownerMrn, 42 Packs: make(map[string]*QueryPack), 43 Queries: make(map[string]*Mquery), 44 Props: make(map[string]*Mquery), 45 } 46 } 47 48 // BundleFromPaths loads a single bundle file or a bundle that 49 // was split into multiple files into a single Bundle struct 50 func BundleFromPaths(paths ...string) (*Bundle, error) { 51 // load all the source files 52 resolvedFilenames, err := walkBundleFiles(paths) 53 if err != nil { 54 log.Error().Err(err).Msg("could not resolve bundle files") 55 return nil, err 56 } 57 58 // aggregate all files into a single bundle 59 aggregatedBundle, err := aggregateFilesToBundle(resolvedFilenames) 60 if err != nil { 61 log.Error().Err(err).Msg("could merge bundle files") 62 return nil, err 63 } 64 return aggregatedBundle, nil 65 } 66 67 // walkBundleFiles iterates over all provided filenames and 68 // checks if the name is a file or a directory. If the filename 69 // is a directory, it walks the directory recursively 70 func walkBundleFiles(filenames []string) ([]string, error) { 71 // resolve file names 72 resolvedFilenames := []string{} 73 for i := range filenames { 74 filename := filenames[i] 75 fi, err := os.Stat(filename) 76 if err != nil { 77 return nil, multierr.Wrap(err, "could not load bundle file: "+filename) 78 } 79 80 if fi.IsDir() { 81 filepath.WalkDir(filename, func(path string, d fs.DirEntry, err error) error { 82 if err != nil { 83 return err 84 } 85 // we ignore nested directories 86 if d.IsDir() { 87 return nil 88 } 89 90 // only consider .yaml|.yml files 91 if strings.HasSuffix(d.Name(), ".yaml") || strings.HasSuffix(d.Name(), ".yml") { 92 resolvedFilenames = append(resolvedFilenames, path) 93 } 94 95 return nil 96 }) 97 } else { 98 resolvedFilenames = append(resolvedFilenames, filename) 99 } 100 } 101 102 return resolvedFilenames, nil 103 } 104 105 // aggregateFilesToBundle iterates over all provided files and loads its content. 106 // It assumes that all provided files are checked upfront and are not a directory 107 func aggregateFilesToBundle(paths []string) (*Bundle, error) { 108 // iterate over all files, load them and merge them 109 mergedBundle := &Bundle{} 110 111 for i := range paths { 112 path := paths[i] 113 bundle, err := bundleFromSingleFile(path) 114 if err != nil { 115 return nil, multierr.Wrap(err, "could not load file: "+path) 116 } 117 combineBundles(mergedBundle, bundle) 118 } 119 120 return mergedBundle, nil 121 } 122 123 // Combine two bundles, even if they aren't compiled yet. 124 // Uses the existing owner MRN if it is set, otherwise the other is used. 125 func combineBundles(into *Bundle, other *Bundle) { 126 if into.OwnerMrn == "" { 127 into.OwnerMrn = other.OwnerMrn 128 } 129 130 into.Packs = append(into.Packs, other.Packs...) 131 into.Queries = append(into.Queries, other.Queries...) 132 } 133 134 // bundleFromSingleFile loads a bundle from a single file 135 func bundleFromSingleFile(path string) (*Bundle, error) { 136 bundleData, err := os.ReadFile(path) 137 if err != nil { 138 return nil, err 139 } 140 141 return BundleFromYAML(bundleData) 142 } 143 144 // BundleFromYAML create a bundle from yaml contents 145 func BundleFromYAML(data []byte) (*Bundle, error) { 146 var res Bundle 147 err := yaml.Unmarshal(data, &res) 148 res.EnsureUIDs() 149 return &res, err 150 } 151 152 // ToYAML returns the bundle as yaml 153 func (p *Bundle) ToYAML() ([]byte, error) { 154 return yaml.Marshal(p) 155 } 156 157 func (p *Bundle) SourceHash() (string, error) { 158 raw, err := p.ToYAML() 159 if err != nil { 160 return "", err 161 } 162 c := checksums.New 163 c = c.Add(string(raw)) 164 return c.String(), nil 165 } 166 167 // ToMap turns the Bundle into a BundleMap 168 func (p *Bundle) ToMap() *BundleMap { 169 res := NewBundleMap(p.OwnerMrn) 170 171 for i := range p.Queries { 172 q := p.Queries[i] 173 res.Queries[q.Mrn] = q 174 } 175 176 for i := range p.Packs { 177 c := p.Packs[i] 178 res.Packs[c.Mrn] = c 179 } 180 181 return res 182 } 183 184 // Add another bundle into this. No duplicate packs, queries, or 185 // properties are allowed and will lead to an error. Both bundles must have 186 // MRNs for everything. OwnerMRNs must be identical as well. 187 func (p *Bundle) AddBundle(other *Bundle) error { 188 if p.OwnerMrn == "" { 189 p.OwnerMrn = other.OwnerMrn 190 } else if p.OwnerMrn != other.OwnerMrn { 191 return errors.New("when combining bundles the owner MRNs must be identical") 192 } 193 194 for i := range other.Packs { 195 c := other.Packs[i] 196 if c.Mrn == "" { 197 return errors.New("source bundle that is added has missing query pack MRNs") 198 } 199 200 for j := range p.Packs { 201 if p.Packs[j].Mrn == c.Mrn { 202 return errors.New("cannot combine query packs, duplicate query packs: " + c.Mrn) 203 } 204 } 205 206 p.Packs = append(p.Packs, c) 207 } 208 209 return nil 210 } 211 212 // Compile a bundle. See CompileExt for a full description. 213 func (p *Bundle) Compile(ctx context.Context, schema llx.Schema) (*BundleMap, error) { 214 return p.CompileExt(ctx, BundleCompileConf{ 215 Schema: schema, 216 }) 217 } 218 219 type BundleCompileConf struct { 220 Schema llx.Schema 221 RemoveFailing bool 222 } 223 224 // Compile a bundle 225 // Does a few things: 226 // 1. turns it into a map for easier access 227 // 2. compile all queries and validates them 228 // 3. validation of all contents 229 // 4. generate MRNs for all packs, queries, and updates referencing local fields 230 // 5. snapshot all queries into the packs 231 // 6. make queries public that are only embedded 232 func (bundle *Bundle) CompileExt(ctx context.Context, conf BundleCompileConf) (*BundleMap, error) { 233 ownerMrn := bundle.OwnerMrn 234 if ownerMrn == "" { 235 // this only happens for local bundles where queries have no mrn yet 236 ownerMrn = "//local.cnquery.io/run/local-execution" 237 } 238 239 cache := &bundleCache{ 240 ownerMrn: ownerMrn, 241 bundle: bundle, 242 uid2mrn: map[string]string{}, 243 removeQueries: map[string]struct{}{}, 244 lookupProp: map[string]PropertyRef{}, 245 lookupQuery: map[string]*Mquery{}, 246 conf: conf, 247 } 248 249 if err := cache.compileQueries(bundle.Queries, nil); err != nil { 250 return nil, err 251 } 252 253 // index packs + update MRNs and checksums, link properties via MRNs 254 for i := range bundle.Packs { 255 pack := bundle.Packs[i] 256 257 // !this is very important to prevent user overrides! vv 258 pack.InvalidateAllChecksums() 259 pack.ComputedFilters = &Filters{ 260 Items: map[string]*Mquery{}, 261 } 262 263 err := pack.RefreshMRN(ownerMrn) 264 if err != nil { 265 return nil, multierr.Wrap(err, "failed to refresh query pack "+pack.Mrn) 266 } 267 268 if err = pack.Filters.Compile(ownerMrn, conf.Schema); err != nil { 269 return nil, multierr.Wrap(err, "failed to compile querypack filters") 270 } 271 pack.ComputedFilters.AddFilters(pack.Filters) 272 273 if err := cache.compileQueries(pack.Queries, pack); err != nil { 274 return nil, err 275 } 276 277 for i := range pack.Groups { 278 group := pack.Groups[i] 279 280 // When filters are initially added they haven't been compiled 281 if err = group.Filters.Compile(ownerMrn, conf.Schema); err != nil { 282 return nil, multierr.Wrap(err, "failed to compile querypack filters") 283 } 284 pack.ComputedFilters.AddFilters(group.Filters) 285 286 if err := cache.compileQueries(group.Queries, pack); err != nil { 287 return nil, err 288 } 289 } 290 } 291 292 // Removing any failing queries happens at the very end, when everything is 293 // set to go. We do this to the original bundle, because the intent is to 294 // clean it up with this option. 295 cache.removeFailing(bundle) 296 297 return bundle.ToMap(), cache.error() 298 } 299 300 type bundleCache struct { 301 ownerMrn string 302 lookupQuery map[string]*Mquery 303 lookupProp map[string]PropertyRef 304 uid2mrn map[string]string 305 removeQueries map[string]struct{} 306 bundle *Bundle 307 errors []error 308 conf BundleCompileConf 309 } 310 311 type PropertyRef struct { 312 *Property 313 Name string 314 } 315 316 func (c *bundleCache) removeFailing(res *Bundle) { 317 if !c.conf.RemoveFailing { 318 return 319 } 320 321 filtered := []*Mquery{} 322 for i := range res.Queries { 323 cur := res.Queries[i] 324 if _, ok := c.removeQueries[cur.Mrn]; !ok { 325 filtered = append(filtered, cur) 326 } 327 } 328 res.Queries = filtered 329 330 for i := range res.Packs { 331 pack := res.Packs[i] 332 333 filtered := []*Mquery{} 334 for i := range pack.Queries { 335 cur := pack.Queries[i] 336 if _, ok := c.removeQueries[cur.Mrn]; !ok { 337 filtered = append(filtered, cur) 338 } 339 } 340 pack.Queries = filtered 341 342 groups := []*QueryGroup{} 343 for j := range pack.Groups { 344 group := pack.Groups[j] 345 filtered := []*Mquery{} 346 for k := range group.Queries { 347 cur := group.Queries[k] 348 if _, ok := c.removeQueries[cur.Mrn]; !ok { 349 filtered = append(filtered, cur) 350 } 351 } 352 group.Queries = filtered 353 if len(group.Queries) != 0 { 354 groups = append(groups, group) 355 } 356 } 357 pack.Groups = groups 358 } 359 } 360 361 func (c *bundleCache) hasErrors() bool { 362 return len(c.errors) != 0 363 } 364 365 func (c *bundleCache) error() error { 366 if len(c.errors) == 0 { 367 return nil 368 } 369 370 var msg strings.Builder 371 for i := range c.errors { 372 msg.WriteString(c.errors[i].Error()) 373 msg.WriteString("\n") 374 } 375 return errors.New(msg.String()) 376 } 377 378 func (c *bundleCache) compileQueries(queries []*Mquery, pack *QueryPack) error { 379 for i := range queries { 380 c.precompileQuery(queries[i], pack) 381 } 382 383 // After the first pass we may have errors. We try to collect as many errors 384 // as we can before returning, so more problems can be fixed at once. 385 // We have to return at this point, because these errors will prevent us from 386 // compiling the queries. 387 if c.hasErrors() { 388 return c.error() 389 } 390 391 for i := range queries { 392 c.compileQuery(queries[i]) 393 } 394 395 // The second pass on errors is done after we have compiled as much as possible. 396 // Since shared queries may be used in other places, any errors here will prevent 397 // us from compiling further. 398 return c.error() 399 } 400 401 // precompileQuery indexes the query, turns UIDs into MRNs, compiles properties 402 // and filters, and pre-processes variants. Also makes sure the query isn't nil. 403 func (c *bundleCache) precompileQuery(query *Mquery, pack *QueryPack) { 404 if query == nil { 405 c.errors = append(c.errors, errors.New("received null query")) 406 return 407 } 408 409 // remove leading and trailing whitespace of docs, refs and tags 410 query.Sanitize() 411 412 // ensure the correct mrn is set 413 uid := query.Uid 414 if err := query.RefreshMRN(c.ownerMrn); err != nil { 415 c.errors = append(c.errors, errors.New("failed to refresh MRN for query "+query.Uid)) 416 return 417 } 418 if uid != "" { 419 c.uid2mrn[uid] = query.Mrn 420 } 421 422 // the pack is only nil if we are dealing with shared queries 423 if pack == nil { 424 c.lookupQuery[query.Mrn] = query 425 } else if existing, ok := c.lookupQuery[query.Mrn]; ok { 426 query.AddBase(existing) 427 } else { 428 // Any other query that is in a pack, that does not exist globally, 429 // we share out to be available in the bundle. 430 c.bundle.Queries = append(c.bundle.Queries, query) 431 c.lookupQuery[query.Mrn] = query 432 } 433 434 // ensure MRNs for properties 435 for i := range query.Props { 436 if err := c.compileProp(query.Props[i]); err != nil { 437 c.errors = append(c.errors, errors.New("failed to compile properties for query "+query.Mrn)) 438 return 439 } 440 } 441 442 // filters have no dependencies, so we can compile them early 443 if err := query.Filters.Compile(c.ownerMrn, c.conf.Schema); err != nil { 444 c.errors = append(c.errors, errors.New("failed to compile filters for query "+query.Mrn)) 445 return 446 } 447 448 // filters will need to be aggregated into the pack's filters 449 if pack != nil { 450 if err := pack.ComputedFilters.AddQueryFilters(query, c.lookupQuery); err != nil { 451 c.errors = append(c.errors, errors.New("failed to register filters for query "+query.Mrn)) 452 return 453 } 454 } 455 456 // ensure MRNs for variants 457 for i := range query.Variants { 458 variant := query.Variants[i] 459 uid := variant.Uid 460 if err := variant.RefreshMRN(c.ownerMrn); err != nil { 461 c.errors = append(c.errors, errors.New("failed to refresh MRN for variant in query "+query.Uid)) 462 return 463 } 464 if uid != "" { 465 c.uid2mrn[uid] = variant.Mrn 466 } 467 } 468 } 469 470 // Note: you only want to run this, after you are sure that all connected 471 // dependencies have been processed. Properties must be compiled. Connected 472 // queries may not be ready yet, but we have to have precompiled them. 473 func (c *bundleCache) compileQuery(query *Mquery) { 474 _, err := query.RefreshChecksumAndType(c.lookupQuery, c.lookupProp, c.conf.Schema) 475 if err != nil { 476 if c.conf.RemoveFailing { 477 c.removeQueries[query.Mrn] = struct{}{} 478 } else { 479 c.errors = append(c.errors, multierr.Wrap(err, "failed to validate query '"+query.Mrn+"'")) 480 } 481 } 482 } 483 484 func (c *bundleCache) compileProp(prop *Property) error { 485 var name string 486 487 if prop.Mrn == "" { 488 uid := prop.Uid 489 if err := prop.RefreshMRN(c.ownerMrn); err != nil { 490 return err 491 } 492 if uid != "" { 493 c.uid2mrn[uid] = prop.Mrn 494 } 495 496 // TODO: uid's can be namespaced, extract the name 497 name = uid 498 } else { 499 m, err := mrn.NewMRN(prop.Mrn) 500 if err != nil { 501 return multierr.Wrap(err, "failed to compile prop, invalid mrn: "+prop.Mrn) 502 } 503 504 name = m.Basename() 505 } 506 507 if _, err := prop.RefreshChecksumAndType(c.conf.Schema); err != nil { 508 return err 509 } 510 511 c.lookupProp[prop.Mrn] = PropertyRef{ 512 Property: prop, 513 Name: name, 514 } 515 516 return nil 517 } 518 519 // FilterQueryPacks only keeps the given UIDs or MRNs and removes every other one. 520 // If a given query pack has a MRN set (but no UID) it will try to get the UID from the MRN 521 // and also filter by that criteria. 522 // If the list of IDs is empty this function doesn't do anything. 523 // If all packs in the bundles were filtered out, return true. 524 func (p *Bundle) FilterQueryPacks(IDs []string) bool { 525 if len(IDs) == 0 { 526 return false 527 } 528 529 if p == nil { 530 return true 531 } 532 533 valid := make(map[string]struct{}, len(IDs)) 534 for i := range IDs { 535 valid[IDs[i]] = struct{}{} 536 } 537 538 var res []*QueryPack 539 for i := range p.Packs { 540 cur := p.Packs[i] 541 542 if cur.Mrn != "" { 543 if _, ok := valid[cur.Mrn]; ok { 544 res = append(res, cur) 545 continue 546 } 547 548 uid, _ := mrn.GetResource(cur.Mrn, MRN_RESOURCE_QUERYPACK) 549 if _, ok := valid[uid]; ok { 550 res = append(res, cur) 551 } 552 553 // if we have a MRN we do not check the UID 554 continue 555 } 556 557 if _, ok := valid[cur.Uid]; ok { 558 res = append(res, cur) 559 } 560 } 561 562 p.Packs = res 563 564 return len(res) == 0 565 } 566 567 // Makes sure every query in the bundle and every query pack has a UID set, 568 // IF the MRN is empty. Otherwise MRNs suffice. 569 func (p *Bundle) EnsureUIDs() { 570 for i := range p.Packs { 571 pack := p.Packs[i] 572 if pack.Mrn == "" && pack.Uid == "" { 573 pack.Uid = ksuid.New().String() 574 } 575 576 for j := range pack.Queries { 577 query := pack.Queries[j] 578 if query.Mrn == "" && query.Uid == "" { 579 query.Uid = ksuid.New().String() 580 } 581 } 582 } 583 } 584 585 // Filters retrieves the aggregated filters for all querypacks in this bundle. 586 func (p *Bundle) Filters() []*Mquery { 587 uniq := map[string]*Mquery{} 588 for i := range p.Packs { 589 // TODO: Currently we don't process the difference between local pack filters 590 // and their group filters correctly. These need aggregation. 591 592 pack := p.Packs[i] 593 if pack.ComputedFilters != nil { 594 for k, v := range pack.ComputedFilters.Items { 595 uniq[k] = v 596 } 597 } 598 } 599 600 res := make([]*Mquery, len(uniq)) 601 i := 0 602 for _, v := range uniq { 603 res[i] = v 604 i++ 605 } 606 607 return res 608 }