github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/server/psinet/psinet.go (about)

     1  /*
     2   * Copyright (c) 2016, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package psinet implements psinet database services. The psinet database is a
    21  // JSON-format file containing information about the Psiphon network, including
    22  // sponsors, home pages, stats regexes, available upgrades, and other servers for
    23  // discovery. This package also implements the Psiphon discovery algorithm.
    24  package psinet
    25  
    26  import (
    27  	"crypto/md5"
    28  	"encoding/json"
    29  	"math"
    30  	"math/rand"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  
    35  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    36  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    37  )
    38  
    39  const (
    40  	MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY = 48 * time.Hour
    41  )
    42  
    43  // Database serves Psiphon API data requests. It's safe for
    44  // concurrent usage. The Reload function supports hot reloading
    45  // of Psiphon network data while the server is running.
    46  type Database struct {
    47  	common.ReloadableFile
    48  
    49  	Sponsors               map[string]*Sponsor        `json:"sponsors"`
    50  	Versions               map[string][]ClientVersion `json:"client_versions"`
    51  	DefaultSponsorID       string                     `json:"default_sponsor_id"`
    52  	DefaultAlertActionURLs map[string][]string        `json:"default_alert_action_urls"`
    53  	ValidServerEntryTags   map[string]bool            `json:"valid_server_entry_tags"`
    54  	DiscoveryServers       []*DiscoveryServer         `json:"discovery_servers"`
    55  
    56  	fileModTime time.Time
    57  }
    58  
    59  type DiscoveryServer struct {
    60  	DiscoveryDateRange []time.Time `json:"discovery_date_range"`
    61  	EncodedServerEntry string      `json:"encoded_server_entry"`
    62  }
    63  
    64  type Sponsor struct {
    65  	ID                  string                `json:"id"`
    66  	HomePages           map[string][]HomePage `json:"home_pages"`
    67  	MobileHomePages     map[string][]HomePage `json:"mobile_home_pages"`
    68  	AlertActionURLs     map[string][]string   `json:"alert_action_urls"`
    69  	HttpsRequestRegexes []HttpsRequestRegex   `json:"https_request_regexes"`
    70  
    71  	domainBytesChecksum []byte `json:"-"`
    72  }
    73  
    74  type ClientVersion struct {
    75  	Version string `json:"version"`
    76  }
    77  
    78  type HomePage struct {
    79  	Region string `json:"region"`
    80  	URL    string `json:"url"`
    81  }
    82  
    83  type HttpsRequestRegex struct {
    84  	Regex   string `json:"regex"`
    85  	Replace string `json:"replace"`
    86  }
    87  
    88  // NewDatabase initializes a Database, calling Reload on the specified
    89  // filename.
    90  func NewDatabase(filename string) (*Database, error) {
    91  
    92  	database := &Database{}
    93  
    94  	database.ReloadableFile = common.NewReloadableFile(
    95  		filename,
    96  		true,
    97  		func(fileContent []byte, fileModTime time.Time) error {
    98  			var newDatabase *Database
    99  			err := json.Unmarshal(fileContent, &newDatabase)
   100  			if err != nil {
   101  				return errors.Trace(err)
   102  			}
   103  			// Note: an unmarshal directly into &database would fail
   104  			// to reset to zero value fields not present in the JSON.
   105  			database.Sponsors = newDatabase.Sponsors
   106  			database.Versions = newDatabase.Versions
   107  			database.DefaultSponsorID = newDatabase.DefaultSponsorID
   108  			database.DefaultAlertActionURLs = newDatabase.DefaultAlertActionURLs
   109  			database.ValidServerEntryTags = newDatabase.ValidServerEntryTags
   110  			database.DiscoveryServers = newDatabase.DiscoveryServers
   111  			database.fileModTime = fileModTime
   112  
   113  			for _, sponsor := range database.Sponsors {
   114  
   115  				value, err := json.Marshal(sponsor.HttpsRequestRegexes)
   116  				if err != nil {
   117  					return errors.Trace(err)
   118  				}
   119  
   120  				// MD5 hash is used solely as a data checksum and not for any
   121  				// security purpose.
   122  				checksum := md5.Sum(value)
   123  				sponsor.domainBytesChecksum = checksum[:]
   124  			}
   125  
   126  			return nil
   127  		})
   128  
   129  	_, err := database.Reload()
   130  	if err != nil {
   131  		return nil, errors.Trace(err)
   132  	}
   133  
   134  	return database, nil
   135  }
   136  
   137  // GetRandomizedHomepages returns a randomly ordered list of home pages
   138  // for the specified sponsor, region, and platform.
   139  func (db *Database) GetRandomizedHomepages(
   140  	sponsorID, clientRegion, clientASN string, isMobilePlatform bool) []string {
   141  
   142  	homepages := db.GetHomepages(sponsorID, clientRegion, clientASN, isMobilePlatform)
   143  	if len(homepages) > 1 {
   144  		shuffledHomepages := make([]string, len(homepages))
   145  		perm := rand.Perm(len(homepages))
   146  		for i, v := range perm {
   147  			shuffledHomepages[v] = homepages[i]
   148  		}
   149  		return shuffledHomepages
   150  	}
   151  	return homepages
   152  }
   153  
   154  // GetHomepages returns a list of home pages for the specified sponsor,
   155  // region, and platform.
   156  func (db *Database) GetHomepages(
   157  	sponsorID, clientRegion, clientASN string, isMobilePlatform bool) []string {
   158  
   159  	db.ReloadableFile.RLock()
   160  	defer db.ReloadableFile.RUnlock()
   161  
   162  	sponsorHomePages := make([]string, 0)
   163  
   164  	// Sponsor id does not exist: fail gracefully
   165  	sponsor, ok := db.Sponsors[sponsorID]
   166  	if !ok {
   167  		sponsor, ok = db.Sponsors[db.DefaultSponsorID]
   168  		if !ok {
   169  			return sponsorHomePages
   170  		}
   171  	}
   172  
   173  	if sponsor == nil {
   174  		return sponsorHomePages
   175  	}
   176  
   177  	homePages := sponsor.HomePages
   178  
   179  	if isMobilePlatform {
   180  		if len(sponsor.MobileHomePages) > 0 {
   181  			homePages = sponsor.MobileHomePages
   182  		}
   183  	}
   184  
   185  	// Case: lookup succeeded and corresponding homepages found for region
   186  	homePagesByRegion, ok := homePages[clientRegion]
   187  	if ok {
   188  		for _, homePage := range homePagesByRegion {
   189  			sponsorHomePages = append(
   190  				sponsorHomePages, homepageQueryParameterSubstitution(homePage.URL, clientRegion, clientASN))
   191  		}
   192  	}
   193  
   194  	// Case: lookup failed or no corresponding homepages found for region --> use default
   195  	if len(sponsorHomePages) == 0 {
   196  		defaultHomePages, ok := homePages["None"]
   197  		if ok {
   198  			for _, homePage := range defaultHomePages {
   199  				// client_region query parameter substitution
   200  				sponsorHomePages = append(
   201  					sponsorHomePages, homepageQueryParameterSubstitution(homePage.URL, clientRegion, clientASN))
   202  			}
   203  		}
   204  	}
   205  
   206  	return sponsorHomePages
   207  }
   208  
   209  func homepageQueryParameterSubstitution(
   210  	url, clientRegion, clientASN string) string {
   211  
   212  	return strings.Replace(
   213  		strings.Replace(url, "client_region=XX", "client_region="+clientRegion, 1),
   214  		"client_asn=XX", "client_asn="+clientASN, 1)
   215  }
   216  
   217  // GetAlertActionURLs returns a list of alert action URLs for the specified
   218  // alert reason and sponsor.
   219  func (db *Database) GetAlertActionURLs(
   220  	alertReason, sponsorID, clientRegion, clientASN string) []string {
   221  
   222  	db.ReloadableFile.RLock()
   223  	defer db.ReloadableFile.RUnlock()
   224  
   225  	// Prefer URLs from the Sponsor.AlertActionURLs. When there are no sponsor
   226  	// URLs, then select from Database.DefaultAlertActionURLs.
   227  
   228  	actionURLs := []string{}
   229  
   230  	sponsor := db.Sponsors[sponsorID]
   231  	if sponsor != nil {
   232  		for _, URL := range sponsor.AlertActionURLs[alertReason] {
   233  			actionURLs = append(
   234  				actionURLs, homepageQueryParameterSubstitution(URL, clientRegion, clientASN))
   235  		}
   236  	}
   237  
   238  	if len(actionURLs) == 0 {
   239  		for _, URL := range db.DefaultAlertActionURLs[alertReason] {
   240  			actionURLs = append(
   241  				actionURLs, homepageQueryParameterSubstitution(URL, clientRegion, clientASN))
   242  		}
   243  	}
   244  
   245  	return actionURLs
   246  }
   247  
   248  // GetUpgradeClientVersion returns a new client version when an upgrade is
   249  // indicated for the specified client current version. The result is "" when
   250  // no upgrade is available. Caller should normalize clientPlatform.
   251  func (db *Database) GetUpgradeClientVersion(clientVersion, clientPlatform string) string {
   252  	db.ReloadableFile.RLock()
   253  	defer db.ReloadableFile.RUnlock()
   254  
   255  	// Check lastest version number against client version number
   256  
   257  	clientVersions, ok := db.Versions[clientPlatform]
   258  	if !ok {
   259  		return ""
   260  	}
   261  
   262  	if len(clientVersions) == 0 {
   263  		return ""
   264  	}
   265  
   266  	// NOTE: Assumes versions list is in ascending version order
   267  	lastVersion := clientVersions[len(clientVersions)-1].Version
   268  
   269  	lastVersionInt, err := strconv.Atoi(lastVersion)
   270  	if err != nil {
   271  		return ""
   272  	}
   273  	clientVersionInt, err := strconv.Atoi(clientVersion)
   274  	if err != nil {
   275  		return ""
   276  	}
   277  
   278  	// Return latest version if upgrade needed
   279  	if lastVersionInt > clientVersionInt {
   280  		return lastVersion
   281  	}
   282  
   283  	return ""
   284  }
   285  
   286  // GetHttpsRequestRegexes returns bytes transferred stats regexes and the
   287  // associated checksum for the specified sponsor. The checksum may be nil.
   288  func (db *Database) GetHttpsRequestRegexes(sponsorID string) ([]map[string]string, []byte) {
   289  	db.ReloadableFile.RLock()
   290  	defer db.ReloadableFile.RUnlock()
   291  
   292  	regexes := make([]map[string]string, 0)
   293  
   294  	sponsor, ok := db.Sponsors[sponsorID]
   295  	if !ok {
   296  		sponsor = db.Sponsors[db.DefaultSponsorID]
   297  	}
   298  
   299  	if sponsor == nil {
   300  		return regexes, nil
   301  	}
   302  
   303  	// If neither sponsorID or DefaultSponsorID were found, sponsor will be the
   304  	// zero value of the map, an empty Sponsor struct.
   305  	for _, sponsorRegex := range sponsor.HttpsRequestRegexes {
   306  		regex := make(map[string]string)
   307  		regex["replace"] = sponsorRegex.Replace
   308  		regex["regex"] = sponsorRegex.Regex
   309  		regexes = append(regexes, regex)
   310  	}
   311  
   312  	return regexes, sponsor.domainBytesChecksum
   313  }
   314  
   315  // GetDomainBytesChecksum returns the bytes transferred stats regexes
   316  // checksum for the specified sponsor. The checksum may be nil.
   317  func (db *Database) GetDomainBytesChecksum(sponsorID string) []byte {
   318  	db.ReloadableFile.RLock()
   319  	defer db.ReloadableFile.RUnlock()
   320  
   321  	sponsor, ok := db.Sponsors[sponsorID]
   322  	if !ok {
   323  		sponsor = db.Sponsors[db.DefaultSponsorID]
   324  	}
   325  
   326  	if sponsor == nil {
   327  		return nil
   328  	}
   329  
   330  	return sponsor.domainBytesChecksum
   331  }
   332  
   333  // DiscoverServers selects new encoded server entries to be "discovered" by
   334  // the client, using the discoveryValue -- a function of the client's IP
   335  // address -- as the input into the discovery algorithm.
   336  func (db *Database) DiscoverServers(discoveryValue int) []string {
   337  	db.ReloadableFile.RLock()
   338  	defer db.ReloadableFile.RUnlock()
   339  
   340  	var servers []*DiscoveryServer
   341  
   342  	discoveryDate := time.Now().UTC()
   343  	candidateServers := make([]*DiscoveryServer, 0)
   344  
   345  	for _, server := range db.DiscoveryServers {
   346  		// All servers that are discoverable on this day are eligible for discovery
   347  		if len(server.DiscoveryDateRange) == 2 &&
   348  			discoveryDate.After(server.DiscoveryDateRange[0]) &&
   349  			discoveryDate.Before(server.DiscoveryDateRange[1]) {
   350  
   351  			candidateServers = append(candidateServers, server)
   352  		}
   353  	}
   354  
   355  	timeInSeconds := int(discoveryDate.Unix())
   356  	servers = selectServers(candidateServers, timeInSeconds, discoveryValue)
   357  
   358  	encodedServerEntries := make([]string, 0)
   359  
   360  	for _, server := range servers {
   361  		encodedServerEntries = append(encodedServerEntries, server.EncodedServerEntry)
   362  	}
   363  
   364  	return encodedServerEntries
   365  }
   366  
   367  // Combine client IP address and time-of-day strategies to give out different
   368  // discovery servers to different clients. The aim is to achieve defense against
   369  // enumerability. We also want to achieve a degree of load balancing clients
   370  // and these strategies are expected to have reasonably random distribution,
   371  // even for a cluster of users coming from the same network.
   372  //
   373  // We only select one server: multiple results makes enumeration easier; the
   374  // strategies have a built-in load balancing effect; and date range discoverability
   375  // means a client will actually learn more servers later even if they happen to
   376  // always pick the same result at this point.
   377  //
   378  // This is a blended strategy: as long as there are enough servers to pick from,
   379  // both aspects determine which server is selected. IP address is given the
   380  // priority: if there are only a couple of servers, for example, IP address alone
   381  // determines the outcome.
   382  func selectServers(
   383  	servers []*DiscoveryServer, timeInSeconds, discoveryValue int) []*DiscoveryServer {
   384  
   385  	TIME_GRANULARITY := 3600
   386  
   387  	if len(servers) == 0 {
   388  		return nil
   389  	}
   390  
   391  	// Time truncated to an hour
   392  	timeStrategyValue := timeInSeconds / TIME_GRANULARITY
   393  
   394  	// Divide servers into buckets. The bucket count is chosen such that the number
   395  	// of buckets and the number of items in each bucket are close (using sqrt).
   396  	// IP address selects the bucket, time selects the item in the bucket.
   397  
   398  	// NOTE: this code assumes that the range of possible timeStrategyValues
   399  	// and discoveryValues are sufficient to index to all bucket items.
   400  
   401  	bucketCount := calculateBucketCount(len(servers))
   402  
   403  	buckets := bucketizeServerList(servers, bucketCount)
   404  
   405  	if len(buckets) == 0 {
   406  		return nil
   407  	}
   408  
   409  	bucket := buckets[discoveryValue%len(buckets)]
   410  
   411  	if len(bucket) == 0 {
   412  		return nil
   413  	}
   414  
   415  	server := bucket[timeStrategyValue%len(bucket)]
   416  
   417  	serverList := make([]*DiscoveryServer, 1)
   418  	serverList[0] = server
   419  
   420  	return serverList
   421  }
   422  
   423  // Number of buckets such that first strategy picks among about the same number
   424  // of choices as the second strategy. Gives an edge to the "outer" strategy.
   425  func calculateBucketCount(length int) int {
   426  	return int(math.Ceil(math.Sqrt(float64(length))))
   427  }
   428  
   429  // bucketizeServerList creates nearly equal sized slices of the input list.
   430  func bucketizeServerList(servers []*DiscoveryServer, bucketCount int) [][]*DiscoveryServer {
   431  
   432  	// This code creates the same partitions as legacy servers:
   433  	// https://github.com/Psiphon-Inc/psiphon-automation/blob/685f91a85bcdb33a75a200d936eadcb0686eadd7/Automation/psi_ops_discovery.py
   434  	//
   435  	// Both use the same algorithm from:
   436  	// http://stackoverflow.com/questions/2659900/python-slicing-a-list-into-n-nearly-equal-length-partitions
   437  
   438  	// TODO: this partition is constant for fixed Database content, so it could
   439  	// be done once and cached in the Database ReloadableFile reloadAction.
   440  
   441  	buckets := make([][]*DiscoveryServer, bucketCount)
   442  
   443  	division := float64(len(servers)) / float64(bucketCount)
   444  
   445  	for i := 0; i < bucketCount; i++ {
   446  		start := int((division * float64(i)) + 0.5)
   447  		end := int((division * (float64(i) + 1)) + 0.5)
   448  		buckets[i] = servers[start:end]
   449  	}
   450  
   451  	return buckets
   452  }
   453  
   454  // IsValidServerEntryTag checks if the specified server entry tag is valid.
   455  func (db *Database) IsValidServerEntryTag(serverEntryTag string) bool {
   456  	db.ReloadableFile.RLock()
   457  	defer db.ReloadableFile.RUnlock()
   458  
   459  	// Default to "valid" if the valid list is unexpectedly empty or stale. This
   460  	// helps prevent premature client-side server-entry pruning when there is an
   461  	// issue with updating the database.
   462  
   463  	if len(db.ValidServerEntryTags) == 0 ||
   464  		db.fileModTime.Add(MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY).Before(time.Now()) {
   465  		return true
   466  	}
   467  
   468  	// The tag must be in the map and have the value "true".
   469  	return db.ValidServerEntryTags[serverEntryTag]
   470  }