github.com/turbot/steampipe@v1.7.0-rc.0.0.20240517123944-7cef272d4458/pkg/control/controlexecute/execution_tree.go

github.com/turbot/steampipe@v1.7.0-rc.0.0.20240517123944-7cef272d4458/pkg/control/controlexecute/execution_tree.go (about)

     1  package controlexecute
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"log"
     7  	"sort"
     8  	"time"
     9  
    10  	"github.com/spf13/viper"
    11  	"github.com/turbot/go-kit/helpers"
    12  	"github.com/turbot/steampipe-plugin-sdk/v5/sperr"
    13  	"github.com/turbot/steampipe/pkg/connection_sync"
    14  	"github.com/turbot/steampipe/pkg/constants"
    15  	"github.com/turbot/steampipe/pkg/control/controlstatus"
    16  	"github.com/turbot/steampipe/pkg/db/db_common"
    17  	"github.com/turbot/steampipe/pkg/query/queryresult"
    18  	"github.com/turbot/steampipe/pkg/statushooks"
    19  	"github.com/turbot/steampipe/pkg/steampipeconfig/modconfig"
    20  	"github.com/turbot/steampipe/pkg/utils"
    21  	"github.com/turbot/steampipe/pkg/workspace"
    22  	"golang.org/x/sync/semaphore"
    23  )
    24  
    25  // ExecutionTree is a structure representing the control execution hierarchy
    26  type ExecutionTree struct {
    27  	Root *ResultGroup `json:"root"`
    28  	// flat list of all control runs
    29  	ControlRuns []*ControlRun                  `json:"-"`
    30  	StartTime   time.Time                      `json:"start_time"`
    31  	EndTime     time.Time                      `json:"end_time"`
    32  	Progress    *controlstatus.ControlProgress `json:"progress"`
    33  	// map of dimension property name to property value to color map
    34  	DimensionColorGenerator *DimensionColorGenerator `json:"-"`
    35  	// the current session search path
    36  	SearchPath []string             `json:"-"`
    37  	Workspace  *workspace.Workspace `json:"-"`
    38  	client     db_common.Client
    39  	// an optional map of control names used to filter the controls which are run
    40  	controlNameFilterMap map[string]bool
    41  }
    42  
    43  func NewExecutionTree(ctx context.Context, workspace *workspace.Workspace, client db_common.Client, controlFilterWhereClause string, args ...string) (*ExecutionTree, error) {
    44  	if len(args) < 1 {
    45  		return nil, sperr.New("need at least one argument to create a check execution tree")
    46  	}
    47  
    48  	searchPath := client.GetRequiredSessionSearchPath()
    49  
    50  	// now populate the ExecutionTree
    51  	executionTree := &ExecutionTree{
    52  		Workspace:  workspace,
    53  		client:     client,
    54  		SearchPath: utils.UnquoteStringArray(searchPath),
    55  	}
    56  	// if a "--where" or "--tag" parameter was passed, build a map of control names used to filter the controls to run
    57  	// create a context with status hooks disabled
    58  	noStatusCtx := statushooks.DisableStatusHooks(ctx)
    59  	err := executionTree.populateControlFilterMap(noStatusCtx, controlFilterWhereClause)
    60  	if err != nil {
    61  		return nil, err
    62  	}
    63  
    64  	var resolvedItem modconfig.ModTreeItem
    65  
    66  	// if only one argument is provided, add this as execution root
    67  	if len(args) == 1 {
    68  		resolvedItem, err = executionTree.getExecutionRootFromArg(args[0])
    69  		if err != nil {
    70  			return nil, err
    71  		}
    72  	} else {
    73  		// for multiple items, use a root benchmark as the parent of the items
    74  		// this root benchmark will be converted to a ResultGroup that can be worked with
    75  		// this is necessary because snapshots only support a single tree item as the child of the root
    76  		items := []modconfig.ModTreeItem{}
    77  		for _, arg := range args {
    78  			item, err := executionTree.getExecutionRootFromArg(arg)
    79  			if err != nil {
    80  				return nil, err
    81  			}
    82  			items = append(items, item)
    83  		}
    84  
    85  		// create a root benchmark with `items` as it's children
    86  		resolvedItem = modconfig.NewRootBenchmarkWithChildren(workspace.Mod, items).(modconfig.ModTreeItem)
    87  	}
    88  	// build tree of result groups, starting with a synthetic 'root' node
    89  	executionTree.Root = NewRootResultGroup(ctx, executionTree, resolvedItem)
    90  
    91  	// after tree has built, ControlCount will be set - create progress rendered
    92  	executionTree.Progress = controlstatus.NewControlProgress(len(executionTree.ControlRuns))
    93  
    94  	return executionTree, nil
    95  }
    96  
    97  // IsExportSourceData implements ExportSourceData
    98  func (*ExecutionTree) IsExportSourceData() {}
    99  
   100  // AddControl checks whether control should be included in the tree
   101  // if so, creates a ControlRun, which is added to the parent group
   102  func (e *ExecutionTree) AddControl(ctx context.Context, control *modconfig.Control, group *ResultGroup) {
   103  	// note we use short name to determine whether to include a control
   104  	if e.ShouldIncludeControl(control.ShortName) {
   105  		// create new ControlRun with treeItem as the parent
   106  		controlRun := NewControlRun(control, group, e)
   107  		// add it into the group
   108  		group.addControl(controlRun)
   109  
   110  		// also add it into the execution tree control run list
   111  		e.ControlRuns = append(e.ControlRuns, controlRun)
   112  	}
   113  }
   114  
   115  func (e *ExecutionTree) Execute(ctx context.Context) error {
   116  	log.Println("[TRACE]", "begin ExecutionTree.Execute")
   117  	defer log.Println("[TRACE]", "end ExecutionTree.Execute")
   118  	e.StartTime = time.Now()
   119  	e.Progress.Start(ctx)
   120  
   121  	defer func() {
   122  		e.EndTime = time.Now()
   123  		e.Progress.Finish(ctx)
   124  	}()
   125  
   126  	// TODO should we always wait even with non custom search path?
   127  	// if there is a custom search path, wait until the first connection of each plugin has loaded
   128  	if customSearchPath := e.client.GetCustomSearchPath(); customSearchPath != nil {
   129  		if err := connection_sync.WaitForSearchPathSchemas(ctx, e.client, customSearchPath); err != nil {
   130  			return err
   131  		}
   132  	}
   133  
   134  	// the number of goroutines parallel to start
   135  	var maxParallelGoRoutines int64 = constants.DefaultMaxConnections
   136  	if viper.IsSet(constants.ArgMaxParallel) {
   137  		maxParallelGoRoutines = viper.GetInt64(constants.ArgMaxParallel)
   138  	}
   139  
   140  	// to limit the number of parallel controls go routines started
   141  	parallelismLock := semaphore.NewWeighted(maxParallelGoRoutines)
   142  
   143  	// just execute the root - it will traverse the tree
   144  	e.Root.execute(ctx, e.client, parallelismLock)
   145  
   146  	if err := e.waitForActiveRunsToComplete(ctx, parallelismLock, maxParallelGoRoutines); err != nil {
   147  		log.Printf("[WARN] timed out waiting for active runs to complete")
   148  	}
   149  
   150  	// now build map of dimension property name to property value to color map
   151  	e.DimensionColorGenerator, _ = NewDimensionColorGenerator(4, 27)
   152  	e.DimensionColorGenerator.populate(e)
   153  
   154  	return nil
   155  }
   156  
   157  func (e *ExecutionTree) waitForActiveRunsToComplete(ctx context.Context, parallelismLock *semaphore.Weighted, maxParallelGoRoutines int64) error {
   158  	waitCtx := ctx
   159  	// if the context was already cancelled, we must creat ea new one to use  when waiting to acquire the lock
   160  	if ctx.Err() != nil {
   161  		// use a Background context - since the original context has been cancelled
   162  		// this lets us wait for the active control queries to cancel
   163  		c, cancel := context.WithTimeout(context.Background(), constants.ControlQueryCancellationTimeoutSecs*time.Second)
   164  		waitCtx = c
   165  		defer cancel()
   166  	}
   167  	// wait till we can acquire all semaphores - meaning that all active runs have finished
   168  	return parallelismLock.Acquire(waitCtx, maxParallelGoRoutines)
   169  }
   170  
   171  func (e *ExecutionTree) populateControlFilterMap(ctx context.Context, controlFilterWhereClause string) error {
   172  	// if we derived or were passed a where clause, run the filter
   173  	if len(controlFilterWhereClause) > 0 {
   174  		log.Println("[TRACE]", "filtering controls with", controlFilterWhereClause)
   175  		var err error
   176  		e.controlNameFilterMap, err = e.getControlMapFromWhereClause(ctx, controlFilterWhereClause)
   177  		if err != nil {
   178  			return err
   179  		}
   180  	}
   181  
   182  	return nil
   183  }
   184  
   185  func (e *ExecutionTree) ShouldIncludeControl(controlName string) bool {
   186  	if e.controlNameFilterMap == nil {
   187  		return true
   188  	}
   189  	_, ok := e.controlNameFilterMap[controlName]
   190  	return ok
   191  }
   192  
   193  // getExecutionRootFromArg resolves the arg into the execution root
   194  // - if the arg is a control name, the root will be the Control with that name
   195  // - if the arg is a benchmark name, the root will be the Benchmark with that name
   196  // - if the arg is a mod name, the root will be the Mod with that name
   197  // - if the arg is 'all' the root will be a node with all Mods as children
   198  func (e *ExecutionTree) getExecutionRootFromArg(arg string) (modconfig.ModTreeItem, error) {
   199  	// special case handling for the string "all"
   200  	if arg == "all" {
   201  		// if the arg is "all", we want to execute all _direct_ children of the Mod
   202  		// but NOT children which come from dependency mods
   203  
   204  		// to achieve this, use a  DirectChildrenModDecorator
   205  		return &DirectChildrenModDecorator{Mod: e.Workspace.Mod}, nil
   206  	}
   207  
   208  	// if the arg is the name of one of the workspace dependendencies, wrap it in DirectChildrenModDecorator
   209  	// so we only execute _its_ direct children
   210  	for _, mod := range e.Workspace.Mods {
   211  		if mod.ShortName == arg {
   212  			return &DirectChildrenModDecorator{Mod: mod}, nil
   213  		}
   214  	}
   215  
   216  	// what resource type is arg?
   217  	parsedName, err := modconfig.ParseResourceName(arg)
   218  	if err != nil {
   219  		// just log error
   220  		return nil, fmt.Errorf("failed to parse check argument '%s': %v", arg, err)
   221  	}
   222  
   223  	resource, found := e.Workspace.GetResource(parsedName)
   224  
   225  	root, ok := resource.(modconfig.ModTreeItem)
   226  	if !found || !ok {
   227  		return nil, fmt.Errorf("no resources found matching argument '%s'", arg)
   228  	}
   229  	// root item must be either a benchmark or a control
   230  	if !helpers.StringSliceContains([]string{modconfig.BlockTypeControl, modconfig.BlockTypeBenchmark}, root.BlockType()) {
   231  		return nil, fmt.Errorf("cannot execute '%s' using check, only controls and benchmarks may be run", resource.Name())
   232  	}
   233  	return root, nil
   234  }
   235  
   236  // Get a map of control names from the introspection table steampipe_control
   237  // This is used to implement the 'where' control filtering
   238  func (e *ExecutionTree) getControlMapFromWhereClause(ctx context.Context, whereClause string) (map[string]bool, error) {
   239  	// query may either be a 'where' clause, or a named query
   240  	resolvedQuery, _, err := e.Workspace.ResolveQueryAndArgsFromSQLString(whereClause)
   241  	if err != nil {
   242  		return nil, err
   243  	}
   244  	// did we in fact resolve a named query, or just return the 'name' as the query
   245  	isNamedQuery := resolvedQuery.ExecuteSQL != whereClause
   246  
   247  	// if the query is NOT a named query, we need to construct a full query by adding a select
   248  	if !isNamedQuery {
   249  		resolvedQuery.ExecuteSQL = fmt.Sprintf("select resource_name from %s where %s", constants.IntrospectionTableControl, whereClause)
   250  	}
   251  
   252  	res, err := e.client.ExecuteSync(ctx, resolvedQuery.ExecuteSQL, resolvedQuery.Args...)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  
   257  	//
   258  	// find the "resource_name" column index
   259  	resourceNameColumnIndex := -1
   260  
   261  	for i, c := range res.Cols {
   262  		if c.Name == "resource_name" {
   263  			resourceNameColumnIndex = i
   264  		}
   265  	}
   266  	if resourceNameColumnIndex == -1 {
   267  		return nil, fmt.Errorf("the named query passed in the 'where' argument must return the 'resource_name' column")
   268  	}
   269  
   270  	var controlNames = make(map[string]bool)
   271  	for _, row := range res.Rows {
   272  		rowResult := row.(*queryresult.RowResult)
   273  		controlName := rowResult.Data[resourceNameColumnIndex].(string)
   274  		controlNames[controlName] = true
   275  	}
   276  	return controlNames, nil
   277  }
   278  
   279  func (e *ExecutionTree) GetAllTags() []string {
   280  	// map keep track which tags have been added as columns
   281  	tagColumnMap := make(map[string]bool)
   282  	var tagColumns []string
   283  	for _, r := range e.ControlRuns {
   284  		if r.Control.Tags != nil {
   285  			for tag := range r.Control.Tags {
   286  				if !tagColumnMap[tag] {
   287  					tagColumns = append(tagColumns, tag)
   288  					tagColumnMap[tag] = true
   289  				}
   290  			}
   291  		}
   292  	}
   293  	sort.Strings(tagColumns)
   294  	return tagColumns
   295  }