github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/transforms/transforms.go (about)

     1  /*
     2   * Copyright (c) 2022, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package transforms provides a mechanism to define and apply string data
    21  // transformations, with the transformations defined by regular expressions
    22  // to match data to be transformed, and regular expression generators to
    23  // specify additional or replacement data.
    24  package transforms
    25  
    26  import (
    27  	"regexp"
    28  	"regexp/syntax"
    29  
    30  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    31  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
    32  	regen "github.com/zach-klippenstein/goregen"
    33  )
    34  
    35  const (
    36  	SCOPE_ANY = ""
    37  )
    38  
    39  // Spec is a transform spec. A spec is a list of individual transforms to be
    40  // applied in order. Each transform is defined by two elements: a regular
    41  // expression to by matched against the input; and a regular expression
    42  // generator which generates new data. Subgroups from the regular expression
    43  // may be specified in the regular expression generator, and are populated
    44  // with the subgroup match, and in this way parts of the original matching
    45  // data may be retained in the transformed data.
    46  //
    47  // For example, with the transform [2]string{"([a-b])", "\\$\\
    48  // {1\\}"c}, substrings consisting of the characters 'a' and 'b' will be
    49  // transformed into the same substring with a single character 'c' appended.
    50  type Spec [][2]string
    51  
    52  // Specs is a set of named Specs.
    53  type Specs map[string]Spec
    54  
    55  // Validate checks that all entries in a set of Specs is well-formed, with
    56  // valid regular expressions.
    57  func (specs Specs) Validate() error {
    58  	seed, err := prng.NewSeed()
    59  	if err != nil {
    60  		return errors.Trace(err)
    61  	}
    62  	for _, spec := range specs {
    63  		// Call Apply to compile/validate the regular expressions and generators.
    64  		_, err := spec.Apply(seed, "")
    65  		if err != nil {
    66  			return errors.Trace(err)
    67  		}
    68  	}
    69  
    70  	return nil
    71  }
    72  
    73  // ScopedSpecNames groups a list of Specs, referenced by their Spec name, with
    74  // the group defined by a scope. The meaning of scope depends on the context
    75  // in which the transforms are to be used.
    76  //
    77  // For example, in the context of DNS request transforms, the scope is the DNS
    78  // server for which a specific group of transforms is known to be effective.
    79  //
    80  // The scope name "" is SCOPE_ANY, and matches any input scope name when there
    81  // is no specific entry for that scope name in ScopedSpecNames.
    82  type ScopedSpecNames map[string][]string
    83  
    84  // Validate checks that the ScopedSpecNames is well-formed and referenced Spec
    85  // names are defined in the corresponding input specs.
    86  func (scopedSpecs ScopedSpecNames) Validate(specs Specs) error {
    87  
    88  	for _, scoped := range scopedSpecs {
    89  		for _, specName := range scoped {
    90  			_, ok := specs[specName]
    91  			if !ok {
    92  				return errors.Tracef("undefined spec name: %s", specName)
    93  			}
    94  		}
    95  	}
    96  
    97  	return nil
    98  }
    99  
   100  // Select picks a Spec from Specs based on the input scope and scoping rules.
   101  // If the input scope name is defined in scopedSpecs, that match takes
   102  // precedence. Otherwise SCOPE_ANY is selected, when present.
   103  //
   104  // After the scope is resolved, Select randomly selects from the matching Spec
   105  // list.
   106  //
   107  // Select will return "", nil when no selection can be made.
   108  func (specs Specs) Select(scope string, scopedSpecs ScopedSpecNames) (string, Spec) {
   109  
   110  	if scope != SCOPE_ANY {
   111  		scoped, ok := scopedSpecs[scope]
   112  		if ok {
   113  			// If the specific scope is defined but empty, this means select
   114  			// nothing -- don't fall through to SCOPE_ANY.
   115  			if len(scoped) == 0 {
   116  				return "", nil
   117  			}
   118  
   119  			specName := scoped[prng.Intn(len(scoped))]
   120  			spec, ok := specs[specName]
   121  			if !ok {
   122  				// specName is not found in specs, which should not happen if
   123  				// Validate passes; select nothing in this case.
   124  				return "", nil
   125  			}
   126  			return specName, spec
   127  		}
   128  		// Fall through to SCOPE_ANY.
   129  	}
   130  
   131  	anyScope, ok := scopedSpecs[SCOPE_ANY]
   132  	if !ok || len(anyScope) == 0 {
   133  		// No SCOPE_ANY, or SCOPE_ANY is an empty list.
   134  		return "", nil
   135  	}
   136  
   137  	specName := anyScope[prng.Intn(len(anyScope))]
   138  	spec, ok := specs[specName]
   139  	if !ok {
   140  		return "", nil
   141  	}
   142  	return specName, spec
   143  }
   144  
   145  // Apply applies the Spec to the input string, producing the output string.
   146  //
   147  // The input seed is used for all random generation. The same seed can be
   148  // supplied to produce the same output, for replay.
   149  func (spec Spec) Apply(seed *prng.Seed, input string) (string, error) {
   150  
   151  	// TODO: the compiled regexp and regen could be cached, but the seed is an
   152  	// issue with caching the regen.
   153  
   154  	value := input
   155  	for _, transform := range spec {
   156  
   157  		args := &regen.GeneratorArgs{
   158  			RngSource: prng.NewPRNGWithSeed(seed),
   159  			Flags:     syntax.OneLine | syntax.NonGreedy,
   160  		}
   161  		rg, err := regen.NewGenerator(transform[1], args)
   162  		if err != nil {
   163  			panic(err.Error())
   164  		}
   165  		replacement := rg.Generate()
   166  
   167  		re := regexp.MustCompile(transform[0])
   168  		value = re.ReplaceAllString(value, replacement)
   169  	}
   170  	return value, nil
   171  }