go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/explorer/bundle.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package explorer
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"io/fs"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  
    14  	"github.com/rs/zerolog/log"
    15  	"github.com/segmentio/ksuid"
    16  	"go.mondoo.com/cnquery/checksums"
    17  	llx "go.mondoo.com/cnquery/llx"
    18  	"go.mondoo.com/cnquery/mrn"
    19  	"go.mondoo.com/cnquery/utils/multierr"
    20  	"sigs.k8s.io/yaml"
    21  )
    22  
    23  const (
    24  	MRN_RESOURCE_QUERY     = "queries"
    25  	MRN_RESOURCE_QUERYPACK = "querypacks"
    26  	MRN_RESOURCE_ASSET     = "assets"
    27  )
    28  
    29  // BundleMap is a Bundle with easier access to its data
    30  type BundleMap struct {
    31  	OwnerMrn string                `json:"owner_mrn,omitempty"`
    32  	Packs    map[string]*QueryPack `json:"packs,omitempty"`
    33  	Queries  map[string]*Mquery    `json:"queries,omitempty"`
    34  	Props    map[string]*Mquery    `json:"props,omitempty"`
    35  }
    36  
    37  // NewBundleMap creates a new empty initialized map
    38  // dataLake (optional) connects an additional data layer which may provide queries/packs
    39  func NewBundleMap(ownerMrn string) *BundleMap {
    40  	return &BundleMap{
    41  		OwnerMrn: ownerMrn,
    42  		Packs:    make(map[string]*QueryPack),
    43  		Queries:  make(map[string]*Mquery),
    44  		Props:    make(map[string]*Mquery),
    45  	}
    46  }
    47  
    48  // BundleFromPaths loads a single bundle file or a bundle that
    49  // was split into multiple files into a single Bundle struct
    50  func BundleFromPaths(paths ...string) (*Bundle, error) {
    51  	// load all the source files
    52  	resolvedFilenames, err := walkBundleFiles(paths)
    53  	if err != nil {
    54  		log.Error().Err(err).Msg("could not resolve bundle files")
    55  		return nil, err
    56  	}
    57  
    58  	// aggregate all files into a single bundle
    59  	aggregatedBundle, err := aggregateFilesToBundle(resolvedFilenames)
    60  	if err != nil {
    61  		log.Error().Err(err).Msg("could merge bundle files")
    62  		return nil, err
    63  	}
    64  	return aggregatedBundle, nil
    65  }
    66  
    67  // walkBundleFiles iterates over all provided filenames and
    68  // checks if the name is a file or a directory. If the filename
    69  // is a directory, it walks the directory recursively
    70  func walkBundleFiles(filenames []string) ([]string, error) {
    71  	// resolve file names
    72  	resolvedFilenames := []string{}
    73  	for i := range filenames {
    74  		filename := filenames[i]
    75  		fi, err := os.Stat(filename)
    76  		if err != nil {
    77  			return nil, multierr.Wrap(err, "could not load bundle file: "+filename)
    78  		}
    79  
    80  		if fi.IsDir() {
    81  			filepath.WalkDir(filename, func(path string, d fs.DirEntry, err error) error {
    82  				if err != nil {
    83  					return err
    84  				}
    85  				// we ignore nested directories
    86  				if d.IsDir() {
    87  					return nil
    88  				}
    89  
    90  				// only consider .yaml|.yml files
    91  				if strings.HasSuffix(d.Name(), ".yaml") || strings.HasSuffix(d.Name(), ".yml") {
    92  					resolvedFilenames = append(resolvedFilenames, path)
    93  				}
    94  
    95  				return nil
    96  			})
    97  		} else {
    98  			resolvedFilenames = append(resolvedFilenames, filename)
    99  		}
   100  	}
   101  
   102  	return resolvedFilenames, nil
   103  }
   104  
   105  // aggregateFilesToBundle iterates over all provided files and loads its content.
   106  // It assumes that all provided files are checked upfront and are not a directory
   107  func aggregateFilesToBundle(paths []string) (*Bundle, error) {
   108  	// iterate over all files, load them and merge them
   109  	mergedBundle := &Bundle{}
   110  
   111  	for i := range paths {
   112  		path := paths[i]
   113  		bundle, err := bundleFromSingleFile(path)
   114  		if err != nil {
   115  			return nil, multierr.Wrap(err, "could not load file: "+path)
   116  		}
   117  		combineBundles(mergedBundle, bundle)
   118  	}
   119  
   120  	return mergedBundle, nil
   121  }
   122  
   123  // Combine two bundles, even if they aren't compiled yet.
   124  // Uses the existing owner MRN if it is set, otherwise the other is used.
   125  func combineBundles(into *Bundle, other *Bundle) {
   126  	if into.OwnerMrn == "" {
   127  		into.OwnerMrn = other.OwnerMrn
   128  	}
   129  
   130  	into.Packs = append(into.Packs, other.Packs...)
   131  	into.Queries = append(into.Queries, other.Queries...)
   132  }
   133  
   134  // bundleFromSingleFile loads a bundle from a single file
   135  func bundleFromSingleFile(path string) (*Bundle, error) {
   136  	bundleData, err := os.ReadFile(path)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  
   141  	return BundleFromYAML(bundleData)
   142  }
   143  
   144  // BundleFromYAML create a bundle from yaml contents
   145  func BundleFromYAML(data []byte) (*Bundle, error) {
   146  	var res Bundle
   147  	err := yaml.Unmarshal(data, &res)
   148  	res.EnsureUIDs()
   149  	return &res, err
   150  }
   151  
   152  // ToYAML returns the bundle as yaml
   153  func (p *Bundle) ToYAML() ([]byte, error) {
   154  	return yaml.Marshal(p)
   155  }
   156  
   157  func (p *Bundle) SourceHash() (string, error) {
   158  	raw, err := p.ToYAML()
   159  	if err != nil {
   160  		return "", err
   161  	}
   162  	c := checksums.New
   163  	c = c.Add(string(raw))
   164  	return c.String(), nil
   165  }
   166  
   167  // ToMap turns the Bundle into a BundleMap
   168  func (p *Bundle) ToMap() *BundleMap {
   169  	res := NewBundleMap(p.OwnerMrn)
   170  
   171  	for i := range p.Queries {
   172  		q := p.Queries[i]
   173  		res.Queries[q.Mrn] = q
   174  	}
   175  
   176  	for i := range p.Packs {
   177  		c := p.Packs[i]
   178  		res.Packs[c.Mrn] = c
   179  	}
   180  
   181  	return res
   182  }
   183  
   184  // Add another bundle into this. No duplicate packs, queries, or
   185  // properties are allowed and will lead to an error. Both bundles must have
   186  // MRNs for everything. OwnerMRNs must be identical as well.
   187  func (p *Bundle) AddBundle(other *Bundle) error {
   188  	if p.OwnerMrn == "" {
   189  		p.OwnerMrn = other.OwnerMrn
   190  	} else if p.OwnerMrn != other.OwnerMrn {
   191  		return errors.New("when combining bundles the owner MRNs must be identical")
   192  	}
   193  
   194  	for i := range other.Packs {
   195  		c := other.Packs[i]
   196  		if c.Mrn == "" {
   197  			return errors.New("source bundle that is added has missing query pack MRNs")
   198  		}
   199  
   200  		for j := range p.Packs {
   201  			if p.Packs[j].Mrn == c.Mrn {
   202  				return errors.New("cannot combine query packs, duplicate query packs: " + c.Mrn)
   203  			}
   204  		}
   205  
   206  		p.Packs = append(p.Packs, c)
   207  	}
   208  
   209  	return nil
   210  }
   211  
   212  // Compile a bundle. See CompileExt for a full description.
   213  func (p *Bundle) Compile(ctx context.Context, schema llx.Schema) (*BundleMap, error) {
   214  	return p.CompileExt(ctx, BundleCompileConf{
   215  		Schema: schema,
   216  	})
   217  }
   218  
   219  type BundleCompileConf struct {
   220  	Schema        llx.Schema
   221  	RemoveFailing bool
   222  }
   223  
   224  // Compile a bundle
   225  // Does a few things:
   226  // 1. turns it into a map for easier access
   227  // 2. compile all queries and validates them
   228  // 3. validation of all contents
   229  // 4. generate MRNs for all packs, queries, and updates referencing local fields
   230  // 5. snapshot all queries into the packs
   231  // 6. make queries public that are only embedded
   232  func (bundle *Bundle) CompileExt(ctx context.Context, conf BundleCompileConf) (*BundleMap, error) {
   233  	ownerMrn := bundle.OwnerMrn
   234  	if ownerMrn == "" {
   235  		// this only happens for local bundles where queries have no mrn yet
   236  		ownerMrn = "//local.cnquery.io/run/local-execution"
   237  	}
   238  
   239  	cache := &bundleCache{
   240  		ownerMrn:      ownerMrn,
   241  		bundle:        bundle,
   242  		uid2mrn:       map[string]string{},
   243  		removeQueries: map[string]struct{}{},
   244  		lookupProp:    map[string]PropertyRef{},
   245  		lookupQuery:   map[string]*Mquery{},
   246  		conf:          conf,
   247  	}
   248  
   249  	if err := cache.compileQueries(bundle.Queries, nil); err != nil {
   250  		return nil, err
   251  	}
   252  
   253  	// index packs + update MRNs and checksums, link properties via MRNs
   254  	for i := range bundle.Packs {
   255  		pack := bundle.Packs[i]
   256  
   257  		// !this is very important to prevent user overrides! vv
   258  		pack.InvalidateAllChecksums()
   259  		pack.ComputedFilters = &Filters{
   260  			Items: map[string]*Mquery{},
   261  		}
   262  
   263  		err := pack.RefreshMRN(ownerMrn)
   264  		if err != nil {
   265  			return nil, multierr.Wrap(err, "failed to refresh query pack "+pack.Mrn)
   266  		}
   267  
   268  		if err = pack.Filters.Compile(ownerMrn, conf.Schema); err != nil {
   269  			return nil, multierr.Wrap(err, "failed to compile querypack filters")
   270  		}
   271  		pack.ComputedFilters.AddFilters(pack.Filters)
   272  
   273  		if err := cache.compileQueries(pack.Queries, pack); err != nil {
   274  			return nil, err
   275  		}
   276  
   277  		for i := range pack.Groups {
   278  			group := pack.Groups[i]
   279  
   280  			// When filters are initially added they haven't been compiled
   281  			if err = group.Filters.Compile(ownerMrn, conf.Schema); err != nil {
   282  				return nil, multierr.Wrap(err, "failed to compile querypack filters")
   283  			}
   284  			pack.ComputedFilters.AddFilters(group.Filters)
   285  
   286  			if err := cache.compileQueries(group.Queries, pack); err != nil {
   287  				return nil, err
   288  			}
   289  		}
   290  	}
   291  
   292  	// Removing any failing queries happens at the very end, when everything is
   293  	// set to go. We do this to the original bundle, because the intent is to
   294  	// clean it up with this option.
   295  	cache.removeFailing(bundle)
   296  
   297  	return bundle.ToMap(), cache.error()
   298  }
   299  
   300  type bundleCache struct {
   301  	ownerMrn      string
   302  	lookupQuery   map[string]*Mquery
   303  	lookupProp    map[string]PropertyRef
   304  	uid2mrn       map[string]string
   305  	removeQueries map[string]struct{}
   306  	bundle        *Bundle
   307  	errors        []error
   308  	conf          BundleCompileConf
   309  }
   310  
   311  type PropertyRef struct {
   312  	*Property
   313  	Name string
   314  }
   315  
   316  func (c *bundleCache) removeFailing(res *Bundle) {
   317  	if !c.conf.RemoveFailing {
   318  		return
   319  	}
   320  
   321  	filtered := []*Mquery{}
   322  	for i := range res.Queries {
   323  		cur := res.Queries[i]
   324  		if _, ok := c.removeQueries[cur.Mrn]; !ok {
   325  			filtered = append(filtered, cur)
   326  		}
   327  	}
   328  	res.Queries = filtered
   329  
   330  	for i := range res.Packs {
   331  		pack := res.Packs[i]
   332  
   333  		filtered := []*Mquery{}
   334  		for i := range pack.Queries {
   335  			cur := pack.Queries[i]
   336  			if _, ok := c.removeQueries[cur.Mrn]; !ok {
   337  				filtered = append(filtered, cur)
   338  			}
   339  		}
   340  		pack.Queries = filtered
   341  
   342  		groups := []*QueryGroup{}
   343  		for j := range pack.Groups {
   344  			group := pack.Groups[j]
   345  			filtered := []*Mquery{}
   346  			for k := range group.Queries {
   347  				cur := group.Queries[k]
   348  				if _, ok := c.removeQueries[cur.Mrn]; !ok {
   349  					filtered = append(filtered, cur)
   350  				}
   351  			}
   352  			group.Queries = filtered
   353  			if len(group.Queries) != 0 {
   354  				groups = append(groups, group)
   355  			}
   356  		}
   357  		pack.Groups = groups
   358  	}
   359  }
   360  
   361  func (c *bundleCache) hasErrors() bool {
   362  	return len(c.errors) != 0
   363  }
   364  
   365  func (c *bundleCache) error() error {
   366  	if len(c.errors) == 0 {
   367  		return nil
   368  	}
   369  
   370  	var msg strings.Builder
   371  	for i := range c.errors {
   372  		msg.WriteString(c.errors[i].Error())
   373  		msg.WriteString("\n")
   374  	}
   375  	return errors.New(msg.String())
   376  }
   377  
   378  func (c *bundleCache) compileQueries(queries []*Mquery, pack *QueryPack) error {
   379  	for i := range queries {
   380  		c.precompileQuery(queries[i], pack)
   381  	}
   382  
   383  	// After the first pass we may have errors. We try to collect as many errors
   384  	// as we can before returning, so more problems can be fixed at once.
   385  	// We have to return at this point, because these errors will prevent us from
   386  	// compiling the queries.
   387  	if c.hasErrors() {
   388  		return c.error()
   389  	}
   390  
   391  	for i := range queries {
   392  		c.compileQuery(queries[i])
   393  	}
   394  
   395  	// The second pass on errors is done after we have compiled as much as possible.
   396  	// Since shared queries may be used in other places, any errors here will prevent
   397  	// us from compiling further.
   398  	return c.error()
   399  }
   400  
   401  // precompileQuery indexes the query, turns UIDs into MRNs, compiles properties
   402  // and filters, and pre-processes variants. Also makes sure the query isn't nil.
   403  func (c *bundleCache) precompileQuery(query *Mquery, pack *QueryPack) {
   404  	if query == nil {
   405  		c.errors = append(c.errors, errors.New("received null query"))
   406  		return
   407  	}
   408  
   409  	// remove leading and trailing whitespace of docs, refs and tags
   410  	query.Sanitize()
   411  
   412  	// ensure the correct mrn is set
   413  	uid := query.Uid
   414  	if err := query.RefreshMRN(c.ownerMrn); err != nil {
   415  		c.errors = append(c.errors, errors.New("failed to refresh MRN for query "+query.Uid))
   416  		return
   417  	}
   418  	if uid != "" {
   419  		c.uid2mrn[uid] = query.Mrn
   420  	}
   421  
   422  	// the pack is only nil if we are dealing with shared queries
   423  	if pack == nil {
   424  		c.lookupQuery[query.Mrn] = query
   425  	} else if existing, ok := c.lookupQuery[query.Mrn]; ok {
   426  		query.AddBase(existing)
   427  	} else {
   428  		// Any other query that is in a pack, that does not exist globally,
   429  		// we share out to be available in the bundle.
   430  		c.bundle.Queries = append(c.bundle.Queries, query)
   431  		c.lookupQuery[query.Mrn] = query
   432  	}
   433  
   434  	// ensure MRNs for properties
   435  	for i := range query.Props {
   436  		if err := c.compileProp(query.Props[i]); err != nil {
   437  			c.errors = append(c.errors, errors.New("failed to compile properties for query "+query.Mrn))
   438  			return
   439  		}
   440  	}
   441  
   442  	// filters have no dependencies, so we can compile them early
   443  	if err := query.Filters.Compile(c.ownerMrn, c.conf.Schema); err != nil {
   444  		c.errors = append(c.errors, errors.New("failed to compile filters for query "+query.Mrn))
   445  		return
   446  	}
   447  
   448  	// filters will need to be aggregated into the pack's filters
   449  	if pack != nil {
   450  		if err := pack.ComputedFilters.AddQueryFilters(query, c.lookupQuery); err != nil {
   451  			c.errors = append(c.errors, errors.New("failed to register filters for query "+query.Mrn))
   452  			return
   453  		}
   454  	}
   455  
   456  	// ensure MRNs for variants
   457  	for i := range query.Variants {
   458  		variant := query.Variants[i]
   459  		uid := variant.Uid
   460  		if err := variant.RefreshMRN(c.ownerMrn); err != nil {
   461  			c.errors = append(c.errors, errors.New("failed to refresh MRN for variant in query "+query.Uid))
   462  			return
   463  		}
   464  		if uid != "" {
   465  			c.uid2mrn[uid] = variant.Mrn
   466  		}
   467  	}
   468  }
   469  
   470  // Note: you only want to run this, after you are sure that all connected
   471  // dependencies have been processed. Properties must be compiled. Connected
   472  // queries may not be ready yet, but we have to have precompiled them.
   473  func (c *bundleCache) compileQuery(query *Mquery) {
   474  	_, err := query.RefreshChecksumAndType(c.lookupQuery, c.lookupProp, c.conf.Schema)
   475  	if err != nil {
   476  		if c.conf.RemoveFailing {
   477  			c.removeQueries[query.Mrn] = struct{}{}
   478  		} else {
   479  			c.errors = append(c.errors, multierr.Wrap(err, "failed to validate query '"+query.Mrn+"'"))
   480  		}
   481  	}
   482  }
   483  
   484  func (c *bundleCache) compileProp(prop *Property) error {
   485  	var name string
   486  
   487  	if prop.Mrn == "" {
   488  		uid := prop.Uid
   489  		if err := prop.RefreshMRN(c.ownerMrn); err != nil {
   490  			return err
   491  		}
   492  		if uid != "" {
   493  			c.uid2mrn[uid] = prop.Mrn
   494  		}
   495  
   496  		// TODO: uid's can be namespaced, extract the name
   497  		name = uid
   498  	} else {
   499  		m, err := mrn.NewMRN(prop.Mrn)
   500  		if err != nil {
   501  			return multierr.Wrap(err, "failed to compile prop, invalid mrn: "+prop.Mrn)
   502  		}
   503  
   504  		name = m.Basename()
   505  	}
   506  
   507  	if _, err := prop.RefreshChecksumAndType(c.conf.Schema); err != nil {
   508  		return err
   509  	}
   510  
   511  	c.lookupProp[prop.Mrn] = PropertyRef{
   512  		Property: prop,
   513  		Name:     name,
   514  	}
   515  
   516  	return nil
   517  }
   518  
   519  // FilterQueryPacks only keeps the given UIDs or MRNs and removes every other one.
   520  // If a given query pack has a MRN set (but no UID) it will try to get the UID from the MRN
   521  // and also filter by that criteria.
   522  // If the list of IDs is empty this function doesn't do anything.
   523  // If all packs in the bundles were filtered out, return true.
   524  func (p *Bundle) FilterQueryPacks(IDs []string) bool {
   525  	if len(IDs) == 0 {
   526  		return false
   527  	}
   528  
   529  	if p == nil {
   530  		return true
   531  	}
   532  
   533  	valid := make(map[string]struct{}, len(IDs))
   534  	for i := range IDs {
   535  		valid[IDs[i]] = struct{}{}
   536  	}
   537  
   538  	var res []*QueryPack
   539  	for i := range p.Packs {
   540  		cur := p.Packs[i]
   541  
   542  		if cur.Mrn != "" {
   543  			if _, ok := valid[cur.Mrn]; ok {
   544  				res = append(res, cur)
   545  				continue
   546  			}
   547  
   548  			uid, _ := mrn.GetResource(cur.Mrn, MRN_RESOURCE_QUERYPACK)
   549  			if _, ok := valid[uid]; ok {
   550  				res = append(res, cur)
   551  			}
   552  
   553  			// if we have a MRN we do not check the UID
   554  			continue
   555  		}
   556  
   557  		if _, ok := valid[cur.Uid]; ok {
   558  			res = append(res, cur)
   559  		}
   560  	}
   561  
   562  	p.Packs = res
   563  
   564  	return len(res) == 0
   565  }
   566  
   567  // Makes sure every query in the bundle and every query pack has a UID set,
   568  // IF the MRN is empty. Otherwise MRNs suffice.
   569  func (p *Bundle) EnsureUIDs() {
   570  	for i := range p.Packs {
   571  		pack := p.Packs[i]
   572  		if pack.Mrn == "" && pack.Uid == "" {
   573  			pack.Uid = ksuid.New().String()
   574  		}
   575  
   576  		for j := range pack.Queries {
   577  			query := pack.Queries[j]
   578  			if query.Mrn == "" && query.Uid == "" {
   579  				query.Uid = ksuid.New().String()
   580  			}
   581  		}
   582  	}
   583  }
   584  
   585  // Filters retrieves the aggregated filters for all querypacks in this bundle.
   586  func (p *Bundle) Filters() []*Mquery {
   587  	uniq := map[string]*Mquery{}
   588  	for i := range p.Packs {
   589  		// TODO: Currently we don't process the difference between local pack filters
   590  		// and their group filters correctly. These need aggregation.
   591  
   592  		pack := p.Packs[i]
   593  		if pack.ComputedFilters != nil {
   594  			for k, v := range pack.ComputedFilters.Items {
   595  				uniq[k] = v
   596  			}
   597  		}
   598  	}
   599  
   600  	res := make([]*Mquery, len(uniq))
   601  	i := 0
   602  	for _, v := range uniq {
   603  		res[i] = v
   604  		i++
   605  	}
   606  
   607  	return res
   608  }