github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/transform/staticlark/dataflow.go (about)

     1  package staticlark
     2  
     3  import (
     4  	"fmt"
     5  )
     6  
     7  // analyze the call graph to detect sensitive data being used incorrectly.
     8  // Assume that some functions return sensitive data (such as private datasets)
     9  // while others do dangerous things with data (such as uploading it with http).
    10  // These are declared using the axioms map.
    11  // Traverse the call graph, starting from lowest leaf functions first, and
    12  // for each function, build a control flow graph. Perform dataflow analysis
    13  // on this graph to see which variables are influenced by which values. The
    14  // rule is that sensitive data may not be passed to a dangerous function
    15  func analyzeSensitiveDataflow(graph *callGraph, axioms map[string]*funcNode) ([]Diagnostic, error) {
    16  	dataflowAnalyzer := &dataflowAnalyzer{
    17  		graph:  graph,
    18  		axioms: axioms,
    19  		seen:   make(map[string]struct{}),
    20  	}
    21  	for _, fn := range graph.nodes {
    22  		if err := dataflowAnalyzer.traverseNode(fn); err != nil {
    23  			return nil, err
    24  		}
    25  	}
    26  	return dataflowAnalyzer.diags, nil
    27  }
    28  
    29  type dataflowAnalyzer struct {
    30  	graph  *callGraph
    31  	axioms map[string]*funcNode
    32  	seen   map[string]struct{}
    33  	diags  []Diagnostic
    34  	// a stack of sources: values that influence any assignments created
    35  	// within the current control structure
    36  	controlSrcStack [][]string
    37  }
    38  
    39  // recursively call this function until all leaf functions are handled
    40  func (da *dataflowAnalyzer) traverseNode(fn *funcNode) error {
    41  	// Only check a given function once
    42  	if _, ok := da.seen[fn.name]; ok {
    43  		return nil
    44  	}
    45  	// Have to check the invoked functions first
    46  	for _, call := range fn.calls {
    47  		if err := da.traverseNode(call); err != nil {
    48  			return err
    49  		}
    50  	}
    51  	// Mark this as being visited
    52  	da.seen[fn.name] = struct{}{}
    53  	// Perhaps it is handled as an axiom
    54  	if satisfiesAxiom(fn, da.axioms) {
    55  		return nil
    56  	}
    57  	return da.analyzeFunction(fn)
    58  }
    59  
    60  func satisfiesAxiom(fn *funcNode, axioms map[string]*funcNode) bool {
    61  	fname := fn.name
    62  	if axioms == nil {
    63  		return false
    64  	}
    65  	if lookup, ok := axioms[fname]; ok {
    66  		fn.dangerousParams = lookup.dangerousParams
    67  		fn.sensitiveReturn = lookup.sensitiveReturn
    68  		fn.reasonParams = lookup.reasonParams
    69  		return true
    70  	}
    71  	return false
    72  }
    73  
    74  func (da *dataflowAnalyzer) analyzeFunction(fn *funcNode) error {
    75  	fname := fn.name
    76  	f := da.graph.lookup[fname]
    77  	if f == nil {
    78  		return fmt.Errorf("showing control flow, function %q not found", fname)
    79  	}
    80  	params := f.params
    81  
    82  	controlFlow, err := newControlFlowFromFunc(f)
    83  	if err != nil {
    84  		return err
    85  	}
    86  
    87  	env := newEnvironment()
    88  	env.markParams(params)
    89  
    90  	if err := da.analyzeSequence(0, len(controlFlow.blocks), controlFlow, env, fn); err != nil {
    91  		return err
    92  	}
    93  
    94  	dangerousParams, reasonParams := env.getHighSensitive(params)
    95  	fn.dangerousParams = dangerousParams
    96  	fn.reasonParams = reasonParams
    97  	return nil
    98  }
    99  
   100  // analyze the sequence of blocks beginning at start, until finish
   101  func (da *dataflowAnalyzer) analyzeSequence(start, finish int, cf *controlFlow, env *environment, fn *funcNode) error {
   102  
   103  	index := start
   104  	for index >= 0 && index < finish {
   105  		block := cf.blocks[index]
   106  
   107  		if block.isLinear() {
   108  			// linear flow simply analyzes the block, then follows the edge
   109  			if err := da.analyzeBlock(block, env, fn); err != nil {
   110  				return err
   111  			}
   112  
   113  			if len(block.edges) == 1 {
   114  				index = block.edges[0]
   115  			} else {
   116  				break
   117  			}
   118  
   119  		} else if block.isIfCondition() {
   120  			// if statements will analyze both true and false branches, then union
   121  			// the environments from each
   122  			trueIdx := block.edges[0]
   123  			falseIdx := block.edges[1]
   124  			joinIdx := block.join
   125  
   126  			trueEnv := env.clone()
   127  			falseEnv := env.clone()
   128  
   129  			// the data sources for the if condition will influence any assignments
   130  			// that happen in either branch
   131  			da.pushControlBindings(block.units[0].DataSources())
   132  
   133  			if err := da.analyzeSequence(trueIdx, joinIdx, cf, trueEnv, fn); err != nil {
   134  				return err
   135  			}
   136  			if err := da.analyzeSequence(falseIdx, joinIdx, cf, falseEnv, fn); err != nil {
   137  				return err
   138  			}
   139  
   140  			da.popControlBindings()
   141  
   142  			env.copyFrom(trueEnv.union(falseEnv))
   143  			index = joinIdx
   144  
   145  		} else {
   146  			// TODO(dustmop): Handle loops also - run the loop repeatedly
   147  			// until the environment reaches a fixed-point
   148  			return fmt.Errorf("TODO: block type %v not implemented", block)
   149  		}
   150  	}
   151  
   152  	return nil
   153  }
   154  
   155  func (da *dataflowAnalyzer) analyzeBlock(block *codeBlock, env *environment, fn *funcNode) error {
   156  	fname := fn.name
   157  	// iterate each unit. Could be an assignment, or function call, etc
   158  	for _, unit := range block.units {
   159  
   160  		// get data sources that are not function calls
   161  		sources := []string{}
   162  		for _, src := range unit.DataSources() {
   163  			if _, ok := da.graph.lookup[src]; !ok {
   164  				sources = append(sources, src)
   165  			}
   166  		}
   167  
   168  		invokes := unit.Invocations()
   169  		if dest := unit.AssignsTo(); dest != "" {
   170  			// if this variable is being assigned the output of a function
   171  			// that returns secret data, mark it as secret itself
   172  			for _, inv := range invokes {
   173  				if fn, ok := da.graph.lookup[inv.Name]; ok && fn.sensitiveReturn {
   174  					sources = append(sources, sensitiveVarName)
   175  				}
   176  			}
   177  			// assign the data sources to this variable
   178  			sources = append(sources, da.controlSources()...)
   179  			env.assign(dest, sources)
   180  		}
   181  
   182  		if unit.IsReturn() {
   183  			// if the unit is a return statement, which is returning secret
   184  			// data, mark the return of this function as being sensitive
   185  			for _, src := range unit.DataSources() {
   186  				_, ok := da.graph.lookup[src]
   187  				if !ok {
   188  					if env.isSecret(src) {
   189  						fn.sensitiveReturn = true
   190  					}
   191  				}
   192  			}
   193  		}
   194  
   195  		// check if any secret data is being passed to sensitive function
   196  		// arguments
   197  		for _, inv := range invokes {
   198  			// skip built-in functions and control structures
   199  			if da.builtinOrControlStructure(inv.Name) {
   200  				continue
   201  			}
   202  
   203  			fn, ok := da.graph.lookup[inv.Name]
   204  			if !ok {
   205  				return fmt.Errorf("invoked function %s not found", inv.Name)
   206  			}
   207  
   208  			for i, arg := range inv.Args {
   209  				danger := fn.dangerousParams
   210  				if danger != nil && i < len(danger) && danger[i] {
   211  					// receiving param can potentially be dangerous
   212  					if env.isSecret(arg) {
   213  						// secret being sent to dangerous param
   214  						prev := fn.reasonParams[i]
   215  						msg := fmt.Sprintf("secrets may leak, variable %s is secret\n%s", arg, prev.String())
   216  						d := Diagnostic{
   217  							Pos:      unit.where,
   218  							Category: "leak",
   219  							Message:  msg,
   220  						}
   221  						da.diags = append(da.diags, d)
   222  					}
   223  					// taint vars so that the sources become dangerous
   224  					prev := fn.reasonParams[i]
   225  					reason := makeReason(unit.where, fname, arg, inv.Name, fn.params[i], prev)
   226  					env.taint(arg, reason)
   227  				}
   228  			}
   229  		}
   230  	}
   231  
   232  	return nil
   233  }
   234  
   235  func (da *dataflowAnalyzer) builtinOrControlStructure(name string) bool {
   236  	return name == "if"
   237  }
   238  
   239  func (da *dataflowAnalyzer) pushControlBindings(sources []string) {
   240  	da.controlSrcStack = append(da.controlSrcStack, sources)
   241  }
   242  
   243  func (da *dataflowAnalyzer) popControlBindings() {
   244  	lastIndex := len(da.controlSrcStack) - 1
   245  	da.controlSrcStack = da.controlSrcStack[:lastIndex]
   246  }
   247  
   248  func (da *dataflowAnalyzer) controlSources() []string {
   249  	result := make([]string, 0)
   250  	for _, row := range da.controlSrcStack {
   251  		result = append(result, row...)
   252  	}
   253  	return result
   254  }