github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/obfuscator/history.go (about)

     1  /*
     2   * Copyright (c) 2020, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package obfuscator
    21  
    22  import (
    23  	"encoding/hex"
    24  	"time"
    25  
    26  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    27  	lrucache "github.com/cognusion/go-cache-lru"
    28  )
    29  
    30  const (
    31  	HISTORY_SEED_TTL              = 24 * time.Hour
    32  	HISTORY_SEED_MAX_ENTRIES      = 1000000
    33  	HISTORY_CLIENT_IP_TTL         = 2 * time.Minute
    34  	HISTORY_CLIENT_IP_MAX_ENTRIES = 10000
    35  )
    36  
    37  // SeedHistory maintains a history of recently observed obfuscation seed values.
    38  // This history is used to identify duplicate seed messages.
    39  //
    40  // As a heurististic to exclude expected duplicates, due to, for example, meek
    41  // retries, the source client IP is retained for comparison for a short
    42  // duration -- long enough to cover meek retries without retaining client
    43  // IPs in memory long past a client connection lifetime.
    44  type SeedHistory struct {
    45  	seedTTL        time.Duration
    46  	seedToTime     *lrucache.Cache
    47  	seedToClientIP *lrucache.Cache
    48  }
    49  
    50  type SeedHistoryConfig struct {
    51  	SeedTTL            time.Duration
    52  	SeedMaxEntries     int
    53  	ClientIPTTL        time.Duration
    54  	ClientIPMaxEntries int
    55  }
    56  
    57  // NewSeedHistory creates a new SeedHistory. Config is optional.
    58  func NewSeedHistory(config *SeedHistoryConfig) *SeedHistory {
    59  
    60  	// Default TTL and MAX_ENTRIES are tuned to provide an effective history size
    61  	// while bounding the amount of memory that will be used. While a
    62  	// probabilistic data structure such as a Bloom filter would provide a
    63  	// smaller memory footprint, we wish to avoid the associated risk of false
    64  	// positives.
    65  	//
    66  	// Limitation: As go-cache-lru does not currently support iterating over all
    67  	// items (without making a full copy of the enture cache), the client IP with
    68  	// shorter TTL is stored in a second, smaller cache instead of the same cache
    69  	// with a a pruner. This incurs some additional overhead, as the seed key is
    70  	// stored twice, once in each cache.
    71  
    72  	useConfig := SeedHistoryConfig{
    73  		SeedTTL:            HISTORY_SEED_TTL,
    74  		SeedMaxEntries:     HISTORY_SEED_MAX_ENTRIES,
    75  		ClientIPTTL:        HISTORY_CLIENT_IP_TTL,
    76  		ClientIPMaxEntries: HISTORY_CLIENT_IP_MAX_ENTRIES,
    77  	}
    78  
    79  	if config != nil {
    80  		if config.SeedTTL != 0 {
    81  			useConfig.SeedTTL = config.SeedTTL
    82  		}
    83  		if config.SeedMaxEntries != 0 {
    84  			useConfig.SeedMaxEntries = config.SeedMaxEntries
    85  		}
    86  		if config.ClientIPTTL != 0 {
    87  			useConfig.ClientIPTTL = config.ClientIPTTL
    88  		}
    89  		if config.ClientIPMaxEntries != 0 {
    90  			useConfig.ClientIPMaxEntries = config.ClientIPMaxEntries
    91  		}
    92  	}
    93  
    94  	return &SeedHistory{
    95  		seedTTL: useConfig.SeedTTL,
    96  
    97  		seedToTime: lrucache.NewWithLRU(
    98  			useConfig.SeedTTL,
    99  			1*time.Minute,
   100  			useConfig.SeedMaxEntries),
   101  
   102  		seedToClientIP: lrucache.NewWithLRU(
   103  			useConfig.ClientIPTTL,
   104  			30*time.Second,
   105  			useConfig.ClientIPMaxEntries),
   106  	}
   107  }
   108  
   109  // AddNew calls AddNewWithTTL using the SeedTTL that was specified in the
   110  // SeedHistoryConfig.
   111  func (h *SeedHistory) AddNew(
   112  	strictMode bool,
   113  	clientIP string,
   114  	seedType string,
   115  	seed []byte) (bool, *common.LogFields) {
   116  
   117  	return h.AddNewWithTTL(
   118  		strictMode, clientIP, seedType, seed, lrucache.DefaultExpiration)
   119  }
   120  
   121  // AddNewWithTTL adds a new seed value to the history, set to expire with the
   122  // specified TTL. If the seed value is already in the history, and an expected
   123  // case such as a meek retry is ruled out (or strictMode is on), AddNew
   124  // returns false.
   125  //
   126  // When a duplicate seed is found, a common.LogFields instance is returned,
   127  // populated with event data. Log fields may be returned in either the false
   128  // or true case.
   129  func (h *SeedHistory) AddNewWithTTL(
   130  	strictMode bool,
   131  	clientIP string,
   132  	seedType string,
   133  	seed []byte,
   134  	TTL time.Duration) (bool, *common.LogFields) {
   135  
   136  	key := string(seed)
   137  
   138  	// Limitation: go-cache-lru does not currently support atomically setting if
   139  	// a key is unset and otherwise _returning the corresponding value_. There is
   140  	// an unlikely possibility that this Add and the following Get don't see the
   141  	// same existing key/value state.
   142  
   143  	now := time.Now()
   144  
   145  	if h.seedToTime.Add(key, now, TTL) == nil {
   146  		// Seed was not already in cache
   147  		// TODO: if TTL < SeedHistory.ClientIPTTL, use the shorter TTL here
   148  		h.seedToClientIP.Set(key, clientIP, lrucache.DefaultExpiration)
   149  		return true, nil
   150  	}
   151  
   152  	previousTime, ok := h.seedToTime.Get(key)
   153  	if !ok {
   154  		// Inconsistent Add/Get state: assume cache item just expired.
   155  		previousTime = now.Add(-h.seedTTL)
   156  	}
   157  
   158  	logFields := common.LogFields{
   159  		"duplicate_seed":            hex.EncodeToString(seed),
   160  		"duplicate_seed_type":       seedType,
   161  		"duplicate_elapsed_time_ms": int64(time.Since(previousTime.(time.Time)) / time.Millisecond),
   162  	}
   163  
   164  	previousClientIP, ok := h.seedToClientIP.Get(key)
   165  	if ok {
   166  		if clientIP == previousClientIP.(string) {
   167  			logFields["duplicate_client_ip"] = "equal"
   168  			return !strictMode, &logFields
   169  		} else {
   170  			logFields["duplicate_client_ip"] = "unequal"
   171  			return false, &logFields
   172  		}
   173  	}
   174  
   175  	logFields["duplicate_client_ip"] = "unknown"
   176  	return false, &logFields
   177  }