github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/transforms/transforms.go (about) 1 /* 2 * Copyright (c) 2022, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 // Package transforms provides a mechanism to define and apply string data 21 // transformations, with the transformations defined by regular expressions 22 // to match data to be transformed, and regular expression generators to 23 // specify additional or replacement data. 24 package transforms 25 26 import ( 27 "regexp" 28 "regexp/syntax" 29 30 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 31 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng" 32 regen "github.com/zach-klippenstein/goregen" 33 ) 34 35 const ( 36 SCOPE_ANY = "" 37 ) 38 39 // Spec is a transform spec. A spec is a list of individual transforms to be 40 // applied in order. Each transform is defined by two elements: a regular 41 // expression to by matched against the input; and a regular expression 42 // generator which generates new data. Subgroups from the regular expression 43 // may be specified in the regular expression generator, and are populated 44 // with the subgroup match, and in this way parts of the original matching 45 // data may be retained in the transformed data. 46 // 47 // For example, with the transform [2]string{"([a-b])", "\\$\\ 48 // {1\\}"c}, substrings consisting of the characters 'a' and 'b' will be 49 // transformed into the same substring with a single character 'c' appended. 50 type Spec [][2]string 51 52 // Specs is a set of named Specs. 53 type Specs map[string]Spec 54 55 // Validate checks that all entries in a set of Specs is well-formed, with 56 // valid regular expressions. 57 func (specs Specs) Validate() error { 58 seed, err := prng.NewSeed() 59 if err != nil { 60 return errors.Trace(err) 61 } 62 for _, spec := range specs { 63 // Call Apply to compile/validate the regular expressions and generators. 64 _, err := spec.Apply(seed, "") 65 if err != nil { 66 return errors.Trace(err) 67 } 68 } 69 70 return nil 71 } 72 73 // ScopedSpecNames groups a list of Specs, referenced by their Spec name, with 74 // the group defined by a scope. The meaning of scope depends on the context 75 // in which the transforms are to be used. 76 // 77 // For example, in the context of DNS request transforms, the scope is the DNS 78 // server for which a specific group of transforms is known to be effective. 79 // 80 // The scope name "" is SCOPE_ANY, and matches any input scope name when there 81 // is no specific entry for that scope name in ScopedSpecNames. 82 type ScopedSpecNames map[string][]string 83 84 // Validate checks that the ScopedSpecNames is well-formed and referenced Spec 85 // names are defined in the corresponding input specs. 86 func (scopedSpecs ScopedSpecNames) Validate(specs Specs) error { 87 88 for _, scoped := range scopedSpecs { 89 for _, specName := range scoped { 90 _, ok := specs[specName] 91 if !ok { 92 return errors.Tracef("undefined spec name: %s", specName) 93 } 94 } 95 } 96 97 return nil 98 } 99 100 // Select picks a Spec from Specs based on the input scope and scoping rules. 101 // If the input scope name is defined in scopedSpecs, that match takes 102 // precedence. Otherwise SCOPE_ANY is selected, when present. 103 // 104 // After the scope is resolved, Select randomly selects from the matching Spec 105 // list. 106 // 107 // Select will return "", nil when no selection can be made. 108 func (specs Specs) Select(scope string, scopedSpecs ScopedSpecNames) (string, Spec) { 109 110 if scope != SCOPE_ANY { 111 scoped, ok := scopedSpecs[scope] 112 if ok { 113 // If the specific scope is defined but empty, this means select 114 // nothing -- don't fall through to SCOPE_ANY. 115 if len(scoped) == 0 { 116 return "", nil 117 } 118 119 specName := scoped[prng.Intn(len(scoped))] 120 spec, ok := specs[specName] 121 if !ok { 122 // specName is not found in specs, which should not happen if 123 // Validate passes; select nothing in this case. 124 return "", nil 125 } 126 return specName, spec 127 } 128 // Fall through to SCOPE_ANY. 129 } 130 131 anyScope, ok := scopedSpecs[SCOPE_ANY] 132 if !ok || len(anyScope) == 0 { 133 // No SCOPE_ANY, or SCOPE_ANY is an empty list. 134 return "", nil 135 } 136 137 specName := anyScope[prng.Intn(len(anyScope))] 138 spec, ok := specs[specName] 139 if !ok { 140 return "", nil 141 } 142 return specName, spec 143 } 144 145 // Apply applies the Spec to the input string, producing the output string. 146 // 147 // The input seed is used for all random generation. The same seed can be 148 // supplied to produce the same output, for replay. 149 func (spec Spec) Apply(seed *prng.Seed, input string) (string, error) { 150 151 // TODO: the compiled regexp and regen could be cached, but the seed is an 152 // issue with caching the regen. 153 154 value := input 155 for _, transform := range spec { 156 157 args := ®en.GeneratorArgs{ 158 RngSource: prng.NewPRNGWithSeed(seed), 159 Flags: syntax.OneLine | syntax.NonGreedy, 160 } 161 rg, err := regen.NewGenerator(transform[1], args) 162 if err != nil { 163 panic(err.Error()) 164 } 165 replacement := rg.Generate() 166 167 re := regexp.MustCompile(transform[0]) 168 value = re.ReplaceAllString(value, replacement) 169 } 170 return value, nil 171 }