github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/seccomp/seccomp_optimizer.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package seccomp
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  )
    21  
    22  // ruleOptimizerFunc is a function type that can optimize a SyscallRule.
    23  // It returns the updated SyscallRule, along with whether any modification
    24  // was made.
    25  type ruleOptimizerFunc func(SyscallRule) (SyscallRule, bool)
    26  
    27  // convertSingleCompoundRuleToThatRule replaces `Or` or `And` rules with a
    28  // single branch to just that branch.
    29  func convertSingleCompoundRuleToThatRule[T Or | And](rule SyscallRule) (SyscallRule, bool) {
    30  	if tRule, isT := rule.(T); isT && len(tRule) == 1 {
    31  		return tRule[0], true
    32  	}
    33  	return rule, false
    34  }
    35  
    36  // flattenCompoundRules turns compound rules (Or or And) embedded inside
    37  // compound rules of the same type into a flat rule of that type.
    38  func flattenCompoundRules[T Or | And](rule SyscallRule) (SyscallRule, bool) {
    39  	tRule, isT := rule.(T)
    40  	if !isT {
    41  		return rule, false
    42  	}
    43  	anySubT := false
    44  	for _, subRule := range tRule {
    45  		if _, subIsT := subRule.(T); subIsT {
    46  			anySubT = true
    47  			break
    48  		}
    49  	}
    50  	if !anySubT {
    51  		return rule, false
    52  	}
    53  	var newRules []SyscallRule
    54  	for _, subRule := range tRule {
    55  		if subT, subIsT := subRule.(T); subIsT {
    56  			newRules = append(newRules, subT...)
    57  		} else {
    58  			newRules = append(newRules, subRule)
    59  		}
    60  	}
    61  	return SyscallRule(T(newRules)), true
    62  }
    63  
    64  // convertMatchAllOrXToMatchAll an Or rule that contains MatchAll to MatchAll.
    65  func convertMatchAllOrXToMatchAll(rule SyscallRule) (SyscallRule, bool) {
    66  	orRule, isOr := rule.(Or)
    67  	if !isOr {
    68  		return rule, false
    69  	}
    70  	for _, subRule := range orRule {
    71  		if _, subIsMatchAll := subRule.(MatchAll); subIsMatchAll {
    72  			return MatchAll{}, true
    73  		}
    74  	}
    75  	return orRule, false
    76  }
    77  
    78  // convertMatchAllAndXToX removes MatchAll clauses from And rules.
    79  func convertMatchAllAndXToX(rule SyscallRule) (SyscallRule, bool) {
    80  	andRule, isAnd := rule.(And)
    81  	if !isAnd {
    82  		return rule, false
    83  	}
    84  	hasMatchAll := false
    85  	for _, subRule := range andRule {
    86  		if _, subIsMatchAll := subRule.(MatchAll); subIsMatchAll {
    87  			hasMatchAll = true
    88  			break
    89  		}
    90  	}
    91  	if !hasMatchAll {
    92  		return rule, false
    93  	}
    94  	var newRules []SyscallRule
    95  	for _, subRule := range andRule {
    96  		if _, subIsAny := subRule.(MatchAll); !subIsAny {
    97  			newRules = append(newRules, subRule)
    98  		}
    99  	}
   100  	if len(newRules) == 0 {
   101  		// An `And` rule with zero rules inside is invalid.
   102  		return MatchAll{}, true
   103  	}
   104  	return And(newRules), true
   105  }
   106  
   107  // nilInPerArgToAnyValue replaces `nil` values in `PerArg` rules with
   108  // `AnyValue`. This isn't really an optimization, but it simplifies the
   109  // logic of other `PerArg` optimizers to not have to handle the `nil` case
   110  // separately from the `AnyValue` case.
   111  func nilInPerArgToAnyValue(rule SyscallRule) (SyscallRule, bool) {
   112  	perArg, isPerArg := rule.(PerArg)
   113  	if !isPerArg {
   114  		return rule, false
   115  	}
   116  	changed := false
   117  	for argNum, valueMatcher := range perArg {
   118  		if valueMatcher == nil {
   119  			perArg[argNum] = AnyValue{}
   120  			changed = true
   121  		}
   122  	}
   123  	return perArg, changed
   124  }
   125  
   126  // convertUselessPerArgToMatchAll looks for `PerArg` rules that match
   127  // anything and replaces them with `MatchAll`.
   128  func convertUselessPerArgToMatchAll(rule SyscallRule) (SyscallRule, bool) {
   129  	perArg, isPerArg := rule.(PerArg)
   130  	if !isPerArg {
   131  		return rule, false
   132  	}
   133  	for _, valueMatcher := range perArg {
   134  		if _, isAnyValue := valueMatcher.(AnyValue); !isAnyValue {
   135  			return rule, false
   136  		}
   137  	}
   138  	return MatchAll{}, true
   139  }
   140  
   141  // signature returns a string signature of this `PerArg`.
   142  // This string can be used to identify the behavior of this `PerArg` rule.
   143  func (pa PerArg) signature() string {
   144  	var sb strings.Builder
   145  	for _, valueMatcher := range pa {
   146  		repr := valueMatcher.Repr()
   147  		if strings.ContainsRune(repr, ';') {
   148  			panic(fmt.Sprintf("ValueMatcher %v (type %T) returned representation %q containing illegal character ';'", valueMatcher, valueMatcher, repr))
   149  		}
   150  		sb.WriteString(repr)
   151  		sb.WriteRune(';')
   152  	}
   153  	return sb.String()
   154  }
   155  
   156  // deduplicatePerArgs deduplicates PerArg rules with identical matchers.
   157  // This can happen during filter construction, when rules are added across
   158  // multiple files.
   159  func deduplicatePerArgs[T Or | And](rule SyscallRule) (SyscallRule, bool) {
   160  	tRule, isT := rule.(T)
   161  	if !isT || len(tRule) < 2 {
   162  		return rule, false
   163  	}
   164  	knownPerArgs := make(map[string]struct{}, len(tRule))
   165  	newRules := make([]SyscallRule, 0, len(tRule))
   166  	changed := false
   167  	for _, subRule := range tRule {
   168  		subPerArg, subIsPerArg := subRule.(PerArg)
   169  		if !subIsPerArg {
   170  			newRules = append(newRules, subRule)
   171  			continue
   172  		}
   173  		sig := subPerArg.signature()
   174  		if _, isDupe := knownPerArgs[sig]; isDupe {
   175  			changed = true
   176  			continue
   177  		}
   178  		knownPerArgs[sig] = struct{}{}
   179  		newRules = append(newRules, subPerArg)
   180  	}
   181  	if !changed {
   182  		return rule, false
   183  	}
   184  	return SyscallRule(T(newRules)), true
   185  }
   186  
   187  // splitMatchers replaces every `splittableValueMatcher` with a
   188  // `splitMatcher` value matcher instead.
   189  // This enables optimizations that are split-aware to run without
   190  // the need to have logic handling this conversion.
   191  func splitMatchers(rule SyscallRule) (SyscallRule, bool) {
   192  	perArg, isPerArg := rule.(PerArg)
   193  	if !isPerArg {
   194  		return rule, false
   195  	}
   196  	changed := false
   197  	for argNum, valueMatcher := range perArg {
   198  		if _, isAlreadySplit := valueMatcher.(splitMatcher); isAlreadySplit {
   199  			continue
   200  		}
   201  		splittableMatcher, isSplittableMatcher := valueMatcher.(splittableValueMatcher)
   202  		if !isSplittableMatcher {
   203  			continue
   204  		}
   205  		perArg[argNum] = splittableMatcher.split()
   206  		changed = true
   207  	}
   208  	return perArg, changed
   209  }
   210  
   211  // simplifyHalfValueMatcher may convert a `halfValueMatcher` to a simpler
   212  // (and potentially faster) representation.
   213  func simplifyHalfValueMatcher(hvm halfValueMatcher) halfValueMatcher {
   214  	switch v := hvm.(type) {
   215  	case halfNotSet:
   216  		if v == 0 {
   217  			return halfAnyValue{}
   218  		}
   219  	case halfMaskedEqual:
   220  		switch {
   221  		case v.mask == 0 && v.value == 0:
   222  			return halfAnyValue{}
   223  		case v.mask == 0xffffffff:
   224  			return halfEqualTo(v.value)
   225  		case v.value == 0:
   226  			return halfNotSet(v.mask)
   227  		}
   228  	}
   229  	return hvm
   230  }
   231  
   232  // simplifyHalfValueMatchers replace `halfValueMatcher`s with their simplified
   233  // version.
   234  func simplifyHalfValueMatchers(rule SyscallRule) (SyscallRule, bool) {
   235  	perArg, isPerArg := rule.(PerArg)
   236  	if !isPerArg {
   237  		return rule, false
   238  	}
   239  	changed := false
   240  	for i, valueMatcher := range perArg {
   241  		sm, isSplitMatcher := valueMatcher.(splitMatcher)
   242  		if !isSplitMatcher {
   243  			continue
   244  		}
   245  		if newHigh := simplifyHalfValueMatcher(sm.highMatcher); newHigh.Repr() != sm.highMatcher.Repr() {
   246  			sm.highMatcher = newHigh
   247  			perArg[i] = sm
   248  			changed = true
   249  		}
   250  		if newLow := simplifyHalfValueMatcher(sm.lowMatcher); newLow.Repr() != sm.lowMatcher.Repr() {
   251  			sm.lowMatcher = newLow
   252  			perArg[i] = sm
   253  			changed = true
   254  		}
   255  	}
   256  	return perArg, changed
   257  }
   258  
   259  // anySplitMatchersToAnyValue converts `splitMatcher`s where both halves
   260  // match any value to a single AnyValue{} rule.
   261  func anySplitMatchersToAnyValue(rule SyscallRule) (SyscallRule, bool) {
   262  	perArg, isPerArg := rule.(PerArg)
   263  	if !isPerArg {
   264  		return rule, false
   265  	}
   266  	changed := false
   267  	for argNum, valueMatcher := range perArg {
   268  		sm, isSplitMatcher := valueMatcher.(splitMatcher)
   269  		if !isSplitMatcher {
   270  			continue
   271  		}
   272  		_, highIsAny := sm.highMatcher.(halfAnyValue)
   273  		_, lowIsAny := sm.lowMatcher.(halfAnyValue)
   274  		if highIsAny && lowIsAny {
   275  			perArg[argNum] = AnyValue{}
   276  			changed = true
   277  		}
   278  	}
   279  	return perArg, changed
   280  }
   281  
   282  // invalidValueMatcher is a stand-in `ValueMatcher` with a unique
   283  // representation that doesn't look like any legitimate `ValueMatcher`.
   284  // Calling any method other than `Repr` will panic.
   285  // It is used as an intermediate step for some optimizers.
   286  type invalidValueMatcher struct {
   287  	ValueMatcher
   288  }
   289  
   290  // Repr implements `ValueMatcher.Repr`.
   291  func (invalidValueMatcher) Repr() string {
   292  	return "invalidValueMatcher"
   293  }
   294  
   295  // invalidHalfValueMatcher is a stand-in `HalfValueMatcher` with a unique
   296  // representation that doesn't look like any legitimate `HalfValueMatcher`.
   297  // Calling any method other than `Repr` will panic.
   298  // It is used as an intermediate step for some optimizers.
   299  type invalidHalfValueMatcher struct {
   300  	halfValueMatcher
   301  }
   302  
   303  // Repr implements `HalfValueMatcher.Repr`.
   304  func (invalidHalfValueMatcher) Repr() string {
   305  	return "invalidHalfValueMatcher"
   306  }
   307  
   308  // sameStringSet returns whether the given string sets are equal.
   309  func sameStringSet(m1, m2 map[string]struct{}) bool {
   310  	if len(m1) != len(m2) {
   311  		return false
   312  	}
   313  	for k := range m1 {
   314  		if _, found := m2[k]; !found {
   315  			return false
   316  		}
   317  	}
   318  	return true
   319  }
   320  
   321  // extractRepeatedMatchers looks for common argument matchers that are
   322  // repeated across all combinations of *other* argument matchers in branches
   323  // of an `Or` rule that contains only `PerArg` rules.
   324  // It removes them from these `PerArg` rules, creates an `Or` of the
   325  // matchers that are repeated across all combinations, and `And`s that
   326  // rule to the rewritten `Or` rule.
   327  // In other words (simplifying `PerArg` to 4 items for simplicity):
   328  //
   329  //	Or{
   330  //		PerArg{A1, B1, C1, D},
   331  //		PerArg{A2, B1, C1, D},
   332  //		PerArg{A1, B2, C2, D},
   333  //		PerArg{A2, B2, C2, D},
   334  //		PerArg{A1, B3, C3, D},
   335  //		PerArg{A2, B3, C3, D},
   336  //	}
   337  //
   338  // becomes (after one pass):
   339  //
   340  //	And{
   341  //		Or{
   342  //			# Note: These will get deduplicated by deduplicatePerArgs
   343  //			PerArg{A1, AnyValue{}, AnyValue{}, AnyValue{}},
   344  //			PerArg{A2, AnyValue{}, AnyValue{}, AnyValue{}},
   345  //			PerArg{A1, AnyValue{}, AnyValue{}, AnyValue{}},
   346  //			PerArg{A2, AnyValue{}, AnyValue{}, AnyValue{}},
   347  //			PerArg{A1, AnyValue{}, AnyValue{}, AnyValue{}},
   348  //			PerArg{A2, AnyValue{}, AnyValue{}, AnyValue{}},
   349  //		},
   350  //		Or{
   351  //			# Note: These will also get deduplicated by deduplicatePerArgs
   352  //			PerArg{AnyValue{}, B1, C1, D},
   353  //			PerArg{AnyValue{}, B1, C1, D},
   354  //			PerArg{AnyValue{}, B2, C2, D},
   355  //			PerArg{AnyValue{}, B2, C2, D},
   356  //			PerArg{AnyValue{}, B3, C3, D},
   357  //			PerArg{AnyValue{}, B3, C3, D},
   358  //		},
   359  //	}
   360  //
   361  // ... then, on the second pass (after deduplication),
   362  // the second inner `Or` rule gets recursively optimized to:
   363  //
   364  //	And{
   365  //		Or{
   366  //			PerArg{A1, AnyValue{}, AnyValue{}, AnyValue{}},
   367  //			PerArg{A2, AnyValue{}, AnyValue{}, AnyValue{}},
   368  //		},
   369  //		And{
   370  //			Or{
   371  //				PerArg{AnyValue{}, AnyValue{}, AnyValue{}, D},
   372  //				PerArg{AnyValue{}, AnyValue{}, AnyValue{}, D},
   373  //				PerArg{AnyValue{}, AnyValue{}, AnyValue{}, D},
   374  //			},
   375  //			Or{
   376  //				PerArg{AnyValue{}, B1, C1, AnyValue{}},
   377  //				PerArg{AnyValue{}, B2, C2, AnyValue{}},
   378  //				PerArg{AnyValue{}, B3, C3, AnyValue{}},
   379  //			},
   380  //		},
   381  //	}
   382  //
   383  // ... which (after other optimizers clean this all up), finally becomes:
   384  //
   385  //	And{
   386  //		Or{
   387  //			PerArg{A1, AnyValue{}, AnyValue{}, AnyValue{}},
   388  //			PerArg{A2, AnyValue{}, AnyValue{}, AnyValue{}},
   389  //		},
   390  //		PerArg{AnyValue{}, AnyValue{}, AnyValue{}, D},
   391  //		Or{
   392  //			PerArg{AnyValue{}, B1, C1, AnyValue{}},
   393  //			PerArg{AnyValue{}, B2, C2, AnyValue{}},
   394  //			PerArg{AnyValue{}, B3, C3, AnyValue{}},
   395  //		},
   396  //	}
   397  //
   398  // ... Turning 24 comparisons into just 9.
   399  func extractRepeatedMatchers(rule SyscallRule) (SyscallRule, bool) {
   400  	orRule, isOr := rule.(Or)
   401  	if !isOr || len(orRule) < 2 {
   402  		return rule, false
   403  	}
   404  	for _, subRule := range orRule {
   405  		if _, subIsPerArg := subRule.(PerArg); !subIsPerArg {
   406  			return rule, false
   407  		}
   408  	}
   409  
   410  	// extractData is the result of extracting a matcher at `argNum`.
   411  	type extractData struct {
   412  		// extractedMatcher is the extracted matcher that should be AND'd
   413  		// with the rest.
   414  		extractedMatcher ValueMatcher
   415  
   416  		// otherMatchers represents the rest of the matchers after
   417  		// `extractedMatcher` is extracted from a `PerArg`.
   418  		// The matcher that was extracted should be replaced with something
   419  		// that matches any value (i.e. either `AnyValue` or `halfAnyValue`).
   420  		otherMatchers PerArg
   421  
   422  		// otherMatchersSig represents the signature of other matchers, with
   423  		// the extracted matcher being replaced with an "invalid" matcher.
   424  		// The "invalid" matcher acts as a token that is equal across all
   425  		// instances of `otherMatchersSig` for the other `PerArg` rules of the
   426  		// `Or` expression.
   427  		// `otherMatchersSig` isn't the same as `otherMatchers.Signature()`,
   428  		// as `otherMatchers` does not contain this "invalid" matcher (it
   429  		// contains a matcher that matches any value instead).
   430  		otherMatchersSig string
   431  
   432  		// extractedMatcherIsAnyValue is true iff `extractedMatcher` would
   433  		// match any value thrown at it.
   434  		// If this is the case across all branches of the `Or` expression,
   435  		// the optimization is skipped.
   436  		extractedMatcherIsAnyValue bool
   437  
   438  		// otherMatchersAreAllAnyValue is true iff all matchers in
   439  		// `otherMatchers` would match any value thrown at them.
   440  		// If this is the case across all branches of the `Or` expression,
   441  		// the optimization is skipped.
   442  		otherMatchersAreAllAnyValue bool
   443  	}
   444  
   445  	allOtherMatchersSigs := make(map[string]struct{}, len(orRule))
   446  	argExprToOtherMatchersSigs := make(map[string]map[string]struct{}, len(orRule))
   447  	for argNum := 0; argNum < len(orRule[0].(PerArg)); argNum++ {
   448  		// Check if `argNum` takes on a set of matchers common for all
   449  		// combinations of all other matchers.
   450  		// We try to extract a common matcher by three ways, which we
   451  		// iterate over here.
   452  		// Each of them returns the result of their extraction attempt,
   453  		// along with a boolean representing whether extraction was
   454  		// possible at all.
   455  		// To "extract" a matcher means to replace it with an "invalid"
   456  		// matcher in the PerArg expression, and checking if their set of
   457  		// signatures is identical for each unique `Repr()` of the extracted
   458  		// matcher. For splittable matcher, we try each half as well.
   459  		// Conceptually (simplify PerArg to 3 arguments for simplicity),
   460  		// if we have:
   461  		//
   462  		//   Or{
   463  		//     PerArg{A, B, C},
   464  		//     PerArg{D, E, F},
   465  		//   }
   466  		//
   467  		// ... then first, we will try:
   468  		//
   469  		//   Or{
   470  		//     PerArg{invalid, B, C}
   471  		//     PerArg{invalid, E, F}
   472  		//   }
   473  		//
   474  		// ... then, assuming both A and D are `splitMatcher`s:
   475  		// we will try:
   476  		//
   477  		//   Or{
   478  		//     PerArg{splitMatcher{invalid, A.lowMatcher}, B, C}
   479  		//     PerArg{splitMatcher{invalid, D.lowMatcher}, E, F}
   480  		//   }
   481  		//
   482  		// ... and finally we will try:
   483  		//
   484  		//   Or{
   485  		//     PerArg{splitMatcher{A.highMatcher, invalid}, B, C}
   486  		//     PerArg{splitMatcher{D.highMatcher, invalid}, E, F}
   487  		//   }
   488  		for _, extractFn := range []func(PerArg) (extractData, bool){
   489  			// Return whole ValueMatcher at a time:
   490  			func(pa PerArg) (extractData, bool) {
   491  				extractedMatcher := pa[argNum]
   492  				_, extractedMatcherIsAnyValue := extractedMatcher.(AnyValue)
   493  				otherMatchers := pa.clone()
   494  				otherMatchers[argNum] = invalidValueMatcher{}
   495  				otherMatchersSig := otherMatchers.signature()
   496  				otherMatchers[argNum] = AnyValue{}
   497  				otherMatchersAreAllAnyValue := true
   498  				for _, valueMatcher := range otherMatchers {
   499  					if _, isAnyValue := valueMatcher.(AnyValue); !isAnyValue {
   500  						otherMatchersAreAllAnyValue = false
   501  						break
   502  					}
   503  				}
   504  				return extractData{
   505  					extractedMatcher:            extractedMatcher,
   506  					otherMatchers:               otherMatchers,
   507  					otherMatchersSig:            otherMatchersSig,
   508  					extractedMatcherIsAnyValue:  extractedMatcherIsAnyValue,
   509  					otherMatchersAreAllAnyValue: otherMatchersAreAllAnyValue,
   510  				}, true
   511  			},
   512  			// Extract a matcher for the high bits only:
   513  			func(pa PerArg) (extractData, bool) {
   514  				split, isSplit := pa[argNum].(splitMatcher)
   515  				if !isSplit {
   516  					return extractData{}, false
   517  				}
   518  				_, extractedMatcherIsAnyValue := split.highMatcher.(halfAnyValue)
   519  				_, lowMatcherIsAnyValue := split.lowMatcher.(halfAnyValue)
   520  				extractedMatcher := high32BitsMatch(split.highMatcher)
   521  				otherMatchers := pa.clone()
   522  				otherMatchers[argNum] = splitMatcher{
   523  					highMatcher: invalidHalfValueMatcher{},
   524  					lowMatcher:  split.lowMatcher,
   525  				}
   526  				otherMatchersSig := otherMatchers.signature()
   527  				otherMatchers[argNum] = low32BitsMatch(split.lowMatcher)
   528  				otherMatchersAreAllAnyValue := lowMatcherIsAnyValue
   529  				for i, valueMatcher := range otherMatchers {
   530  					if i == argNum {
   531  						continue
   532  					}
   533  					if _, isAnyValue := valueMatcher.(AnyValue); !isAnyValue {
   534  						otherMatchersAreAllAnyValue = false
   535  						break
   536  					}
   537  				}
   538  				return extractData{
   539  					extractedMatcher:            extractedMatcher,
   540  					otherMatchers:               otherMatchers,
   541  					otherMatchersSig:            otherMatchersSig,
   542  					extractedMatcherIsAnyValue:  extractedMatcherIsAnyValue,
   543  					otherMatchersAreAllAnyValue: otherMatchersAreAllAnyValue,
   544  				}, true
   545  			},
   546  			// Extract a matcher for the low bits only:
   547  			func(pa PerArg) (extractData, bool) {
   548  				split, isSplit := pa[argNum].(splitMatcher)
   549  				if !isSplit {
   550  					return extractData{}, false
   551  				}
   552  				_, extractedMatcherIsAnyValue := split.lowMatcher.(halfAnyValue)
   553  				_, highMatcherIsAnyValue := split.highMatcher.(halfAnyValue)
   554  				extractedMatcher := low32BitsMatch(split.lowMatcher)
   555  				otherMatchers := pa.clone()
   556  				otherMatchers[argNum] = splitMatcher{
   557  					highMatcher: split.highMatcher,
   558  					lowMatcher:  invalidHalfValueMatcher{},
   559  				}
   560  				otherMatchersSig := otherMatchers.signature()
   561  				otherMatchers[argNum] = high32BitsMatch(split.highMatcher)
   562  				otherMatchersAreAllAnyValue := highMatcherIsAnyValue
   563  				for i, valueMatcher := range otherMatchers {
   564  					if i == argNum {
   565  						continue
   566  					}
   567  					if _, isAnyValue := valueMatcher.(AnyValue); !isAnyValue {
   568  						otherMatchersAreAllAnyValue = false
   569  						break
   570  					}
   571  				}
   572  				return extractData{
   573  					extractedMatcher:            extractedMatcher,
   574  					otherMatchers:               otherMatchers,
   575  					otherMatchersSig:            otherMatchersSig,
   576  					extractedMatcherIsAnyValue:  extractedMatcherIsAnyValue,
   577  					otherMatchersAreAllAnyValue: otherMatchersAreAllAnyValue,
   578  				}, true
   579  			},
   580  		} {
   581  			clear(allOtherMatchersSigs)
   582  			clear(argExprToOtherMatchersSigs)
   583  			allExtractable := true
   584  			allArgNumMatchersAreAnyValue := true
   585  			allOtherMatchersAreAnyValue := true
   586  			for _, subRule := range orRule {
   587  				ed, extractable := extractFn(subRule.(PerArg))
   588  				if allExtractable = allExtractable && extractable; !allExtractable {
   589  					break
   590  				}
   591  				allArgNumMatchersAreAnyValue = allArgNumMatchersAreAnyValue && ed.extractedMatcherIsAnyValue
   592  				allOtherMatchersAreAnyValue = allOtherMatchersAreAnyValue && ed.otherMatchersAreAllAnyValue
   593  				repr := ed.extractedMatcher.Repr()
   594  				allOtherMatchersSigs[ed.otherMatchersSig] = struct{}{}
   595  				if _, reprSeen := argExprToOtherMatchersSigs[repr]; !reprSeen {
   596  					argExprToOtherMatchersSigs[repr] = make(map[string]struct{}, len(orRule))
   597  				}
   598  				argExprToOtherMatchersSigs[repr][ed.otherMatchersSig] = struct{}{}
   599  			}
   600  			if !allExtractable || allArgNumMatchersAreAnyValue || allOtherMatchersAreAnyValue {
   601  				// Cannot optimize.
   602  				continue
   603  			}
   604  			// Now check if each possible repr of `argNum` got the same set of
   605  			// signatures for other matchers as `allOtherMatchersSigs`.
   606  			sameOtherMatchers := true
   607  			for _, omsigs := range argExprToOtherMatchersSigs {
   608  				if !sameStringSet(omsigs, allOtherMatchersSigs) {
   609  					sameOtherMatchers = false
   610  					break
   611  				}
   612  			}
   613  			if !sameOtherMatchers {
   614  				continue
   615  			}
   616  			// We can simplify the rule by extracting `argNum` out.
   617  			// Create two copies of `orRule`: One with only `argNum`,
   618  			// and the other one with all arguments except `argNum`.
   619  			// This will likely contain many duplicates but that's OK,
   620  			// they'll be optimized out by `deduplicatePerArgs`.
   621  			argNumMatch := Or(make([]SyscallRule, len(orRule)))
   622  			otherArgsMatch := Or(make([]SyscallRule, len(orRule)))
   623  			for i, subRule := range orRule {
   624  				ed, _ := extractFn(subRule.(PerArg))
   625  				onlyArg := PerArg{AnyValue{}, AnyValue{}, AnyValue{}, AnyValue{}, AnyValue{}, AnyValue{}, AnyValue{}}
   626  				onlyArg[argNum] = ed.extractedMatcher
   627  				argNumMatch[i] = onlyArg
   628  				otherArgsMatch[i] = ed.otherMatchers
   629  			}
   630  			// Attempt to optimize the "other" arguments:
   631  			otherArgsMatchOpt, _ := extractRepeatedMatchers(otherArgsMatch)
   632  			return And{argNumMatch, otherArgsMatchOpt}, true
   633  		}
   634  	}
   635  	return rule, false
   636  }
   637  
   638  // optimizationRun is a stateful struct tracking the state of an optimization
   639  // over a rule. It may not be used concurrently.
   640  type optimizationRun struct {
   641  	// funcs is the list of optimizer functions to run on the rules.
   642  	// Optimizers should be ranked in order of importance, with the most
   643  	// important first.
   644  	// An optimizer will be exhausted before the next one is ever run.
   645  	// Earlier optimizers are re-exhausted if later optimizers cause change.
   646  	funcs []ruleOptimizerFunc
   647  
   648  	// recurseFuncs is a list of closures that correspond one-to-one to `funcs`
   649  	// and are suitable for passing to `SyscallRule.Recurse`. They are stored
   650  	// here in order to be allocated once, as opposed to escaping if they were
   651  	// specified directly as argument to `SyscallRule.Recurse`.
   652  	recurseFuncs []func(subRule SyscallRule) SyscallRule
   653  
   654  	// changed tracks whether any change has been made in the current pass.
   655  	// It is updated as the optimizer runs.
   656  	changed bool
   657  }
   658  
   659  // apply recursively applies `opt.funcs[funcIndex]` to the given `rule`.
   660  // It sets `opt.changed` to true if there has been any change.
   661  func (opt *optimizationRun) apply(rule SyscallRule, funcIndex int) SyscallRule {
   662  	rule.Recurse(opt.recurseFuncs[funcIndex])
   663  	if opt.changed {
   664  		return rule
   665  	}
   666  	rule, opt.changed = opt.funcs[funcIndex](rule)
   667  	return rule
   668  }
   669  
   670  // optimize losslessly optimizes a SyscallRule using the `optimizationRun`'s
   671  // optimizer functions.
   672  // It may not be called concurrently.
   673  func (opt *optimizationRun) optimize(rule SyscallRule) SyscallRule {
   674  	opt.recurseFuncs = make([]func(SyscallRule) SyscallRule, len(opt.funcs))
   675  	for i := range opt.funcs {
   676  		funcIndex := i
   677  		opt.recurseFuncs[funcIndex] = func(subRule SyscallRule) SyscallRule {
   678  			return opt.apply(subRule, funcIndex)
   679  		}
   680  	}
   681  	for opt.changed = true; opt.changed; {
   682  		for i := range opt.funcs {
   683  			opt.changed = false
   684  			rule = opt.apply(rule, i)
   685  			if opt.changed {
   686  				break
   687  			}
   688  		}
   689  	}
   690  	return rule
   691  }
   692  
   693  // optimizeSyscallRule losslessly optimizes a `SyscallRule`.
   694  func optimizeSyscallRule(rule SyscallRule) SyscallRule {
   695  	return (&optimizationRun{
   696  		funcs: []ruleOptimizerFunc{
   697  			// Convert Or / And rules with a single rule into that single rule.
   698  			convertSingleCompoundRuleToThatRule[Or],
   699  			convertSingleCompoundRuleToThatRule[And],
   700  
   701  			// Flatten Or/And rules.
   702  			flattenCompoundRules[Or],
   703  			flattenCompoundRules[And],
   704  
   705  			// Handle MatchAll. This is best done after flattening so that we
   706  			// effectively traverse the whole tree to find a MatchAll by just
   707  			// linearly scanning through the first (and only) level of rules.
   708  			convertMatchAllOrXToMatchAll,
   709  			convertMatchAllAndXToX,
   710  
   711  			// Replace all `nil` values in `PerArg` to `AnyValue`, to simplify
   712  			// the `PerArg` matchers below.
   713  			nilInPerArgToAnyValue,
   714  
   715  			// Deduplicate redundant `PerArg`s in Or and And.
   716  			// This must come after `nilInPerArgToAnyValue` because it does not
   717  			// handle the nil case.
   718  			deduplicatePerArgs[Or],
   719  			deduplicatePerArgs[And],
   720  
   721  			// Remove useless `PerArg` matchers.
   722  			// This must come after `nilInPerArgToAnyValue` because it does not
   723  			// handle the nil case.
   724  			convertUselessPerArgToMatchAll,
   725  
   726  			// Replace `ValueMatcher`s that are splittable into their split version.
   727  			// Like `nilInPerArgToAnyValue`, this isn't so much an optimization,
   728  			// but allows the matchers below (which are `splitMatcher`-aware) to not
   729  			// have to carry logic to split the matchers they encounter.
   730  			splitMatchers,
   731  
   732  			// Replace `halfValueMatcher`s with their simplified version.
   733  			simplifyHalfValueMatchers,
   734  
   735  			// Replace `splitMatchers` that match any value with `AnyValue`.
   736  			anySplitMatchersToAnyValue,
   737  
   738  			// Extract repeated argument matchers out of `Or` expressions.
   739  			// This must come after `nilInPerArgToAnyValue` because it does not
   740  			// handle the nil case.
   741  			// This should ideally run late in the list because it does a bunch
   742  			// of memory allocations (even in the non-optimizable case), which
   743  			// should be avoided unless there is nothing else left to optimize.
   744  			extractRepeatedMatchers,
   745  		},
   746  	}).optimize(rule)
   747  }