github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/transform/staticlark/dataflow.go (about) 1 package staticlark 2 3 import ( 4 "fmt" 5 ) 6 7 // analyze the call graph to detect sensitive data being used incorrectly. 8 // Assume that some functions return sensitive data (such as private datasets) 9 // while others do dangerous things with data (such as uploading it with http). 10 // These are declared using the axioms map. 11 // Traverse the call graph, starting from lowest leaf functions first, and 12 // for each function, build a control flow graph. Perform dataflow analysis 13 // on this graph to see which variables are influenced by which values. The 14 // rule is that sensitive data may not be passed to a dangerous function 15 func analyzeSensitiveDataflow(graph *callGraph, axioms map[string]*funcNode) ([]Diagnostic, error) { 16 dataflowAnalyzer := &dataflowAnalyzer{ 17 graph: graph, 18 axioms: axioms, 19 seen: make(map[string]struct{}), 20 } 21 for _, fn := range graph.nodes { 22 if err := dataflowAnalyzer.traverseNode(fn); err != nil { 23 return nil, err 24 } 25 } 26 return dataflowAnalyzer.diags, nil 27 } 28 29 type dataflowAnalyzer struct { 30 graph *callGraph 31 axioms map[string]*funcNode 32 seen map[string]struct{} 33 diags []Diagnostic 34 // a stack of sources: values that influence any assignments created 35 // within the current control structure 36 controlSrcStack [][]string 37 } 38 39 // recursively call this function until all leaf functions are handled 40 func (da *dataflowAnalyzer) traverseNode(fn *funcNode) error { 41 // Only check a given function once 42 if _, ok := da.seen[fn.name]; ok { 43 return nil 44 } 45 // Have to check the invoked functions first 46 for _, call := range fn.calls { 47 if err := da.traverseNode(call); err != nil { 48 return err 49 } 50 } 51 // Mark this as being visited 52 da.seen[fn.name] = struct{}{} 53 // Perhaps it is handled as an axiom 54 if satisfiesAxiom(fn, da.axioms) { 55 return nil 56 } 57 return da.analyzeFunction(fn) 58 } 59 60 func satisfiesAxiom(fn *funcNode, axioms map[string]*funcNode) bool { 61 fname := fn.name 62 if axioms == nil { 63 return false 64 } 65 if lookup, ok := axioms[fname]; ok { 66 fn.dangerousParams = lookup.dangerousParams 67 fn.sensitiveReturn = lookup.sensitiveReturn 68 fn.reasonParams = lookup.reasonParams 69 return true 70 } 71 return false 72 } 73 74 func (da *dataflowAnalyzer) analyzeFunction(fn *funcNode) error { 75 fname := fn.name 76 f := da.graph.lookup[fname] 77 if f == nil { 78 return fmt.Errorf("showing control flow, function %q not found", fname) 79 } 80 params := f.params 81 82 controlFlow, err := newControlFlowFromFunc(f) 83 if err != nil { 84 return err 85 } 86 87 env := newEnvironment() 88 env.markParams(params) 89 90 if err := da.analyzeSequence(0, len(controlFlow.blocks), controlFlow, env, fn); err != nil { 91 return err 92 } 93 94 dangerousParams, reasonParams := env.getHighSensitive(params) 95 fn.dangerousParams = dangerousParams 96 fn.reasonParams = reasonParams 97 return nil 98 } 99 100 // analyze the sequence of blocks beginning at start, until finish 101 func (da *dataflowAnalyzer) analyzeSequence(start, finish int, cf *controlFlow, env *environment, fn *funcNode) error { 102 103 index := start 104 for index >= 0 && index < finish { 105 block := cf.blocks[index] 106 107 if block.isLinear() { 108 // linear flow simply analyzes the block, then follows the edge 109 if err := da.analyzeBlock(block, env, fn); err != nil { 110 return err 111 } 112 113 if len(block.edges) == 1 { 114 index = block.edges[0] 115 } else { 116 break 117 } 118 119 } else if block.isIfCondition() { 120 // if statements will analyze both true and false branches, then union 121 // the environments from each 122 trueIdx := block.edges[0] 123 falseIdx := block.edges[1] 124 joinIdx := block.join 125 126 trueEnv := env.clone() 127 falseEnv := env.clone() 128 129 // the data sources for the if condition will influence any assignments 130 // that happen in either branch 131 da.pushControlBindings(block.units[0].DataSources()) 132 133 if err := da.analyzeSequence(trueIdx, joinIdx, cf, trueEnv, fn); err != nil { 134 return err 135 } 136 if err := da.analyzeSequence(falseIdx, joinIdx, cf, falseEnv, fn); err != nil { 137 return err 138 } 139 140 da.popControlBindings() 141 142 env.copyFrom(trueEnv.union(falseEnv)) 143 index = joinIdx 144 145 } else { 146 // TODO(dustmop): Handle loops also - run the loop repeatedly 147 // until the environment reaches a fixed-point 148 return fmt.Errorf("TODO: block type %v not implemented", block) 149 } 150 } 151 152 return nil 153 } 154 155 func (da *dataflowAnalyzer) analyzeBlock(block *codeBlock, env *environment, fn *funcNode) error { 156 fname := fn.name 157 // iterate each unit. Could be an assignment, or function call, etc 158 for _, unit := range block.units { 159 160 // get data sources that are not function calls 161 sources := []string{} 162 for _, src := range unit.DataSources() { 163 if _, ok := da.graph.lookup[src]; !ok { 164 sources = append(sources, src) 165 } 166 } 167 168 invokes := unit.Invocations() 169 if dest := unit.AssignsTo(); dest != "" { 170 // if this variable is being assigned the output of a function 171 // that returns secret data, mark it as secret itself 172 for _, inv := range invokes { 173 if fn, ok := da.graph.lookup[inv.Name]; ok && fn.sensitiveReturn { 174 sources = append(sources, sensitiveVarName) 175 } 176 } 177 // assign the data sources to this variable 178 sources = append(sources, da.controlSources()...) 179 env.assign(dest, sources) 180 } 181 182 if unit.IsReturn() { 183 // if the unit is a return statement, which is returning secret 184 // data, mark the return of this function as being sensitive 185 for _, src := range unit.DataSources() { 186 _, ok := da.graph.lookup[src] 187 if !ok { 188 if env.isSecret(src) { 189 fn.sensitiveReturn = true 190 } 191 } 192 } 193 } 194 195 // check if any secret data is being passed to sensitive function 196 // arguments 197 for _, inv := range invokes { 198 // skip built-in functions and control structures 199 if da.builtinOrControlStructure(inv.Name) { 200 continue 201 } 202 203 fn, ok := da.graph.lookup[inv.Name] 204 if !ok { 205 return fmt.Errorf("invoked function %s not found", inv.Name) 206 } 207 208 for i, arg := range inv.Args { 209 danger := fn.dangerousParams 210 if danger != nil && i < len(danger) && danger[i] { 211 // receiving param can potentially be dangerous 212 if env.isSecret(arg) { 213 // secret being sent to dangerous param 214 prev := fn.reasonParams[i] 215 msg := fmt.Sprintf("secrets may leak, variable %s is secret\n%s", arg, prev.String()) 216 d := Diagnostic{ 217 Pos: unit.where, 218 Category: "leak", 219 Message: msg, 220 } 221 da.diags = append(da.diags, d) 222 } 223 // taint vars so that the sources become dangerous 224 prev := fn.reasonParams[i] 225 reason := makeReason(unit.where, fname, arg, inv.Name, fn.params[i], prev) 226 env.taint(arg, reason) 227 } 228 } 229 } 230 } 231 232 return nil 233 } 234 235 func (da *dataflowAnalyzer) builtinOrControlStructure(name string) bool { 236 return name == "if" 237 } 238 239 func (da *dataflowAnalyzer) pushControlBindings(sources []string) { 240 da.controlSrcStack = append(da.controlSrcStack, sources) 241 } 242 243 func (da *dataflowAnalyzer) popControlBindings() { 244 lastIndex := len(da.controlSrcStack) - 1 245 da.controlSrcStack = da.controlSrcStack[:lastIndex] 246 } 247 248 func (da *dataflowAnalyzer) controlSources() []string { 249 result := make([]string, 0) 250 for _, row := range da.controlSrcStack { 251 result = append(result, row...) 252 } 253 return result 254 }