github.com/google/capslock@v0.2.3-0.20240517042941-dac19fc347c0/interesting/interesting.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file or at
     5  // https://developers.google.com/open-source/licenses/bsd
     6  
     7  // Package interesting contains tools for our static analysis to determine
     8  // whether a leaf function is interesting.
     9  package interesting
    10  
    11  import (
    12  	"bufio"
    13  	_ "embed"
    14  	"fmt"
    15  	"io"
    16  	"sort"
    17  	"strings"
    18  
    19  	cpb "github.com/google/capslock/proto"
    20  )
    21  
    22  //go:embed interesting.cm
    23  var interestingData string
    24  
    25  // Type Classifier contains information used to map code features to
    26  // concrete capabilities.
    27  type Classifier struct {
    28  	functionCategory   map[string]cpb.Capability
    29  	unanalyzedCategory map[string]cpb.Capability
    30  	packageCategory    map[string]cpb.Capability
    31  	ignoredEdges       map[[2]string]struct{}
    32  	cgoSuffixes        []string
    33  }
    34  
    35  var internalMap = parseInternalMapOrDie()
    36  
    37  func newClassifier() *Classifier {
    38  	return &Classifier{
    39  		functionCategory:   map[string]cpb.Capability{},
    40  		unanalyzedCategory: map[string]cpb.Capability{},
    41  		packageCategory:    map[string]cpb.Capability{},
    42  		ignoredEdges:       map[[2]string]struct{}{},
    43  	}
    44  }
    45  
    46  func parseCapabilityMap(source string, r io.Reader) (*Classifier, error) {
    47  	ret := newClassifier()
    48  	scanner := bufio.NewScanner(r)
    49  	line := 0
    50  	for scanner.Scan() {
    51  		line++
    52  		// Ignore comments and empty lines.
    53  		t := strings.Split(scanner.Text(), "#")
    54  		if len(t) == 0 {
    55  			continue
    56  		}
    57  		args := strings.Fields(t[0])
    58  		if len(args) == 0 {
    59  			continue
    60  		}
    61  		if len(args) < 2 {
    62  			return nil, fmt.Errorf("%v:%v: invalid format", source, line)
    63  		}
    64  		// Keyword is first argument.
    65  		switch args[0] {
    66  		case "cgo_suffix":
    67  			// Format: cgo_suffix suffix.
    68  			ret.cgoSuffixes = append(ret.cgoSuffixes, args[1])
    69  		case "func":
    70  			// Format: func package/function capability
    71  			if len(args) < 3 {
    72  				return nil, fmt.Errorf("%v:%v: invalid %v format", source, line, args[0])
    73  			}
    74  			if _, ok := ret.functionCategory[args[1]]; ok {
    75  				return nil, fmt.Errorf("%v:%v: duplicate %v key", source, line, args[0])
    76  			}
    77  			c, ok := cpb.Capability_value[args[2]]
    78  			if !ok {
    79  				return nil, fmt.Errorf("%v:%v: unsupported capability %q", source, line, args[2])
    80  			}
    81  			ret.functionCategory[args[1]] = cpb.Capability(c)
    82  		case "ignore_edge":
    83  			// Format: ignore_edge function function
    84  			if len(args) < 3 {
    85  				return nil, fmt.Errorf("%v:%v: invalid %v format", source, line, args[0])
    86  			}
    87  			k := [2]string{args[1], args[2]}
    88  			if _, ok := ret.ignoredEdges[k]; ok {
    89  				return nil, fmt.Errorf("%v:%v: duplicate %v key", source, line, args[0])
    90  			}
    91  			ret.ignoredEdges[k] = struct{}{}
    92  		case "package":
    93  			// Format: package package_name capability
    94  			if len(args) < 3 {
    95  				return nil, fmt.Errorf("%v:%v: invalid %v format", source, line, args[0])
    96  			}
    97  			if _, ok := ret.packageCategory[args[1]]; ok {
    98  				return nil, fmt.Errorf("%v:%v: duplicate %v key", source, line, args[0])
    99  			}
   100  			c, ok := cpb.Capability_value[args[2]]
   101  			if !ok {
   102  				return nil, fmt.Errorf("%v:%v: unsupported capability %q", source, line, args[2])
   103  			}
   104  			ret.packageCategory[args[1]] = cpb.Capability(c)
   105  		case "unanalyzed":
   106  			// Format: unanalyzed function
   107  			if _, ok := ret.unanalyzedCategory[args[1]]; ok {
   108  				return nil, fmt.Errorf("%v:%v: duplicate %v key", source, line, args[0])
   109  			}
   110  			ret.unanalyzedCategory[args[1]] = cpb.Capability_CAPABILITY_UNANALYZED
   111  		default:
   112  			return nil, fmt.Errorf("%v:%v: unsupported keyword %q", source, line, args[0])
   113  		}
   114  	}
   115  	return ret, nil
   116  }
   117  
   118  // parseInternalMapOrDie parses the internal embedded capability map data
   119  // or panic()s if this fails.  It returns the embedded classifier.
   120  func parseInternalMapOrDie() *Classifier {
   121  	classifier, err := parseCapabilityMap("internal", strings.NewReader(interestingData))
   122  	if err != nil {
   123  		panic("internal error: " + err.Error())
   124  	}
   125  	if len(classifier.functionCategory) == 0 {
   126  		panic("internal error: no capabilities loaded")
   127  	}
   128  	return classifier
   129  }
   130  
   131  // DefaultClassifier returns the default internal Classifier.
   132  func DefaultClassifier() *Classifier {
   133  	return internalMap
   134  }
   135  
   136  // ClassifierExcludingUnanalyzed returns a copy of the supplied Classifier
   137  // that is modified to never classify capabilities as CAPABILITY_UNANALYZED.
   138  func ClassifierExcludingUnanalyzed(classifier *Classifier) *Classifier {
   139  	withoutUnanalyzed := *classifier
   140  	withoutUnanalyzed.unanalyzedCategory = nil
   141  	return &withoutUnanalyzed
   142  }
   143  
   144  func mergeCapabilityMap(dst, s1, s2 map[string]cpb.Capability) {
   145  	for k, v := range s1 {
   146  		dst[k] = v
   147  	}
   148  	for k, v := range s2 {
   149  		dst[k] = v
   150  	}
   151  }
   152  
   153  // LoadClassifier returns a capability classifier loaded from the specified
   154  // io.Reader. The filename argument is used only for providing context to
   155  // error messages. The classifier will also include the default Capslock
   156  // classifications unless the excludeBuiltin argument is set.
   157  //
   158  // Refer to the interesting/interesting.cm file in the source code for an
   159  // example of the capability map format. Classifications loaded from a
   160  // caller-specified file always override builtin classifications.
   161  func LoadClassifier(source string, r io.Reader, excludeBuiltin bool) (*Classifier, error) {
   162  	userClassifier, err := parseCapabilityMap(source, r)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	if excludeBuiltin {
   167  		return userClassifier, nil
   168  	}
   169  	ret := newClassifier()
   170  	// Merge.
   171  	// TODO(djm): use `maps.Copy` once it graduates from x/exp.
   172  	mergeCapabilityMap(ret.functionCategory, internalMap.functionCategory, userClassifier.functionCategory)
   173  	mergeCapabilityMap(ret.unanalyzedCategory, internalMap.unanalyzedCategory, userClassifier.unanalyzedCategory)
   174  	mergeCapabilityMap(ret.packageCategory, internalMap.packageCategory, userClassifier.packageCategory)
   175  	for k, v := range internalMap.ignoredEdges {
   176  		ret.ignoredEdges[k] = v
   177  	}
   178  	for k, v := range userClassifier.ignoredEdges {
   179  		ret.ignoredEdges[k] = v
   180  	}
   181  	cgoSuffixes := map[string]bool{}
   182  	for _, v := range internalMap.cgoSuffixes {
   183  		cgoSuffixes[v] = true
   184  	}
   185  	for _, v := range userClassifier.cgoSuffixes {
   186  		cgoSuffixes[v] = true
   187  	}
   188  	// TODO(djm) use `maps.Keys` once it graduates from x/exp.
   189  	for k := range cgoSuffixes {
   190  		ret.cgoSuffixes = append(ret.cgoSuffixes, k)
   191  	}
   192  	sort.Strings(ret.cgoSuffixes)
   193  	return ret, nil
   194  }
   195  
   196  // IncludeCall returns true if a call from one function to another should be
   197  // considered when searching for transitive capabilities.  We return false for
   198  // some internal calls in the standard library where we know a potential
   199  // transitive capability does not arise in practice.
   200  func (c *Classifier) IncludeCall(caller, callee string) bool {
   201  	_, ok := internalMap.ignoredEdges[[2]string{caller, callee}]
   202  	return !ok
   203  }
   204  
   205  // FunctionCategory returns a Category for the given function specified by
   206  // a package name and function name.  Examples of function names include
   207  // "math.Cos", "(time.Time).Clock", and "(*sync.Cond).Signal".
   208  //
   209  // If the return value is Unspecified, then we have not declared it to be
   210  // either safe or unsafe, so its descendants will have to be considered by the
   211  // static analysis.
   212  func (c *Classifier) FunctionCategory(pkg, name string) cpb.Capability {
   213  	for _, s := range c.cgoSuffixes {
   214  		// Calls to C functions produce a call to a function
   215  		// named "_cgo_runtime_cgocall" in the current package.
   216  		// Calls to the various type conversion functions in the
   217  		// "C" pseudo-package (see See https://pkg.go.dev/cmd/cgo)
   218  		// produce calls to other functions listed in cgoSuffixes.
   219  		if strings.HasSuffix(name, s) {
   220  			return cpb.Capability_CAPABILITY_CGO
   221  		}
   222  	}
   223  	if cat, ok := c.functionCategory[name]; ok {
   224  		// If the function has a category, that takes precedence over its
   225  		// package's category.  This includes the possibility that the function
   226  		// is categorized as "unspecified", which indicates that the analyzer
   227  		// should analyze the function's code as normal.
   228  		return cat
   229  	}
   230  	if cat, ok := c.unanalyzedCategory[name]; ok {
   231  		return cat
   232  	}
   233  	return c.packageCategory[pkg]
   234  }