github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/server/psinet/psinet.go (about) 1 /* 2 * Copyright (c) 2016, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 // Package psinet implements psinet database services. The psinet database is a 21 // JSON-format file containing information about the Psiphon network, including 22 // sponsors, home pages, stats regexes, available upgrades, and other servers for 23 // discovery. This package also implements the Psiphon discovery algorithm. 24 package psinet 25 26 import ( 27 "crypto/md5" 28 "encoding/json" 29 "math" 30 "math/rand" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 36 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 37 ) 38 39 const ( 40 MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY = 48 * time.Hour 41 ) 42 43 // Database serves Psiphon API data requests. It's safe for 44 // concurrent usage. The Reload function supports hot reloading 45 // of Psiphon network data while the server is running. 46 type Database struct { 47 common.ReloadableFile 48 49 Sponsors map[string]*Sponsor `json:"sponsors"` 50 Versions map[string][]ClientVersion `json:"client_versions"` 51 DefaultSponsorID string `json:"default_sponsor_id"` 52 DefaultAlertActionURLs map[string][]string `json:"default_alert_action_urls"` 53 ValidServerEntryTags map[string]bool `json:"valid_server_entry_tags"` 54 DiscoveryServers []*DiscoveryServer `json:"discovery_servers"` 55 56 fileModTime time.Time 57 } 58 59 type DiscoveryServer struct { 60 DiscoveryDateRange []time.Time `json:"discovery_date_range"` 61 EncodedServerEntry string `json:"encoded_server_entry"` 62 } 63 64 type Sponsor struct { 65 ID string `json:"id"` 66 HomePages map[string][]HomePage `json:"home_pages"` 67 MobileHomePages map[string][]HomePage `json:"mobile_home_pages"` 68 AlertActionURLs map[string][]string `json:"alert_action_urls"` 69 HttpsRequestRegexes []HttpsRequestRegex `json:"https_request_regexes"` 70 71 domainBytesChecksum []byte `json:"-"` 72 } 73 74 type ClientVersion struct { 75 Version string `json:"version"` 76 } 77 78 type HomePage struct { 79 Region string `json:"region"` 80 URL string `json:"url"` 81 } 82 83 type HttpsRequestRegex struct { 84 Regex string `json:"regex"` 85 Replace string `json:"replace"` 86 } 87 88 // NewDatabase initializes a Database, calling Reload on the specified 89 // filename. 90 func NewDatabase(filename string) (*Database, error) { 91 92 database := &Database{} 93 94 database.ReloadableFile = common.NewReloadableFile( 95 filename, 96 true, 97 func(fileContent []byte, fileModTime time.Time) error { 98 var newDatabase *Database 99 err := json.Unmarshal(fileContent, &newDatabase) 100 if err != nil { 101 return errors.Trace(err) 102 } 103 // Note: an unmarshal directly into &database would fail 104 // to reset to zero value fields not present in the JSON. 105 database.Sponsors = newDatabase.Sponsors 106 database.Versions = newDatabase.Versions 107 database.DefaultSponsorID = newDatabase.DefaultSponsorID 108 database.DefaultAlertActionURLs = newDatabase.DefaultAlertActionURLs 109 database.ValidServerEntryTags = newDatabase.ValidServerEntryTags 110 database.DiscoveryServers = newDatabase.DiscoveryServers 111 database.fileModTime = fileModTime 112 113 for _, sponsor := range database.Sponsors { 114 115 value, err := json.Marshal(sponsor.HttpsRequestRegexes) 116 if err != nil { 117 return errors.Trace(err) 118 } 119 120 // MD5 hash is used solely as a data checksum and not for any 121 // security purpose. 122 checksum := md5.Sum(value) 123 sponsor.domainBytesChecksum = checksum[:] 124 } 125 126 return nil 127 }) 128 129 _, err := database.Reload() 130 if err != nil { 131 return nil, errors.Trace(err) 132 } 133 134 return database, nil 135 } 136 137 // GetRandomizedHomepages returns a randomly ordered list of home pages 138 // for the specified sponsor, region, and platform. 139 func (db *Database) GetRandomizedHomepages( 140 sponsorID, clientRegion, clientASN string, isMobilePlatform bool) []string { 141 142 homepages := db.GetHomepages(sponsorID, clientRegion, clientASN, isMobilePlatform) 143 if len(homepages) > 1 { 144 shuffledHomepages := make([]string, len(homepages)) 145 perm := rand.Perm(len(homepages)) 146 for i, v := range perm { 147 shuffledHomepages[v] = homepages[i] 148 } 149 return shuffledHomepages 150 } 151 return homepages 152 } 153 154 // GetHomepages returns a list of home pages for the specified sponsor, 155 // region, and platform. 156 func (db *Database) GetHomepages( 157 sponsorID, clientRegion, clientASN string, isMobilePlatform bool) []string { 158 159 db.ReloadableFile.RLock() 160 defer db.ReloadableFile.RUnlock() 161 162 sponsorHomePages := make([]string, 0) 163 164 // Sponsor id does not exist: fail gracefully 165 sponsor, ok := db.Sponsors[sponsorID] 166 if !ok { 167 sponsor, ok = db.Sponsors[db.DefaultSponsorID] 168 if !ok { 169 return sponsorHomePages 170 } 171 } 172 173 if sponsor == nil { 174 return sponsorHomePages 175 } 176 177 homePages := sponsor.HomePages 178 179 if isMobilePlatform { 180 if len(sponsor.MobileHomePages) > 0 { 181 homePages = sponsor.MobileHomePages 182 } 183 } 184 185 // Case: lookup succeeded and corresponding homepages found for region 186 homePagesByRegion, ok := homePages[clientRegion] 187 if ok { 188 for _, homePage := range homePagesByRegion { 189 sponsorHomePages = append( 190 sponsorHomePages, homepageQueryParameterSubstitution(homePage.URL, clientRegion, clientASN)) 191 } 192 } 193 194 // Case: lookup failed or no corresponding homepages found for region --> use default 195 if len(sponsorHomePages) == 0 { 196 defaultHomePages, ok := homePages["None"] 197 if ok { 198 for _, homePage := range defaultHomePages { 199 // client_region query parameter substitution 200 sponsorHomePages = append( 201 sponsorHomePages, homepageQueryParameterSubstitution(homePage.URL, clientRegion, clientASN)) 202 } 203 } 204 } 205 206 return sponsorHomePages 207 } 208 209 func homepageQueryParameterSubstitution( 210 url, clientRegion, clientASN string) string { 211 212 return strings.Replace( 213 strings.Replace(url, "client_region=XX", "client_region="+clientRegion, 1), 214 "client_asn=XX", "client_asn="+clientASN, 1) 215 } 216 217 // GetAlertActionURLs returns a list of alert action URLs for the specified 218 // alert reason and sponsor. 219 func (db *Database) GetAlertActionURLs( 220 alertReason, sponsorID, clientRegion, clientASN string) []string { 221 222 db.ReloadableFile.RLock() 223 defer db.ReloadableFile.RUnlock() 224 225 // Prefer URLs from the Sponsor.AlertActionURLs. When there are no sponsor 226 // URLs, then select from Database.DefaultAlertActionURLs. 227 228 actionURLs := []string{} 229 230 sponsor := db.Sponsors[sponsorID] 231 if sponsor != nil { 232 for _, URL := range sponsor.AlertActionURLs[alertReason] { 233 actionURLs = append( 234 actionURLs, homepageQueryParameterSubstitution(URL, clientRegion, clientASN)) 235 } 236 } 237 238 if len(actionURLs) == 0 { 239 for _, URL := range db.DefaultAlertActionURLs[alertReason] { 240 actionURLs = append( 241 actionURLs, homepageQueryParameterSubstitution(URL, clientRegion, clientASN)) 242 } 243 } 244 245 return actionURLs 246 } 247 248 // GetUpgradeClientVersion returns a new client version when an upgrade is 249 // indicated for the specified client current version. The result is "" when 250 // no upgrade is available. Caller should normalize clientPlatform. 251 func (db *Database) GetUpgradeClientVersion(clientVersion, clientPlatform string) string { 252 db.ReloadableFile.RLock() 253 defer db.ReloadableFile.RUnlock() 254 255 // Check lastest version number against client version number 256 257 clientVersions, ok := db.Versions[clientPlatform] 258 if !ok { 259 return "" 260 } 261 262 if len(clientVersions) == 0 { 263 return "" 264 } 265 266 // NOTE: Assumes versions list is in ascending version order 267 lastVersion := clientVersions[len(clientVersions)-1].Version 268 269 lastVersionInt, err := strconv.Atoi(lastVersion) 270 if err != nil { 271 return "" 272 } 273 clientVersionInt, err := strconv.Atoi(clientVersion) 274 if err != nil { 275 return "" 276 } 277 278 // Return latest version if upgrade needed 279 if lastVersionInt > clientVersionInt { 280 return lastVersion 281 } 282 283 return "" 284 } 285 286 // GetHttpsRequestRegexes returns bytes transferred stats regexes and the 287 // associated checksum for the specified sponsor. The checksum may be nil. 288 func (db *Database) GetHttpsRequestRegexes(sponsorID string) ([]map[string]string, []byte) { 289 db.ReloadableFile.RLock() 290 defer db.ReloadableFile.RUnlock() 291 292 regexes := make([]map[string]string, 0) 293 294 sponsor, ok := db.Sponsors[sponsorID] 295 if !ok { 296 sponsor = db.Sponsors[db.DefaultSponsorID] 297 } 298 299 if sponsor == nil { 300 return regexes, nil 301 } 302 303 // If neither sponsorID or DefaultSponsorID were found, sponsor will be the 304 // zero value of the map, an empty Sponsor struct. 305 for _, sponsorRegex := range sponsor.HttpsRequestRegexes { 306 regex := make(map[string]string) 307 regex["replace"] = sponsorRegex.Replace 308 regex["regex"] = sponsorRegex.Regex 309 regexes = append(regexes, regex) 310 } 311 312 return regexes, sponsor.domainBytesChecksum 313 } 314 315 // GetDomainBytesChecksum returns the bytes transferred stats regexes 316 // checksum for the specified sponsor. The checksum may be nil. 317 func (db *Database) GetDomainBytesChecksum(sponsorID string) []byte { 318 db.ReloadableFile.RLock() 319 defer db.ReloadableFile.RUnlock() 320 321 sponsor, ok := db.Sponsors[sponsorID] 322 if !ok { 323 sponsor = db.Sponsors[db.DefaultSponsorID] 324 } 325 326 if sponsor == nil { 327 return nil 328 } 329 330 return sponsor.domainBytesChecksum 331 } 332 333 // DiscoverServers selects new encoded server entries to be "discovered" by 334 // the client, using the discoveryValue -- a function of the client's IP 335 // address -- as the input into the discovery algorithm. 336 func (db *Database) DiscoverServers(discoveryValue int) []string { 337 db.ReloadableFile.RLock() 338 defer db.ReloadableFile.RUnlock() 339 340 var servers []*DiscoveryServer 341 342 discoveryDate := time.Now().UTC() 343 candidateServers := make([]*DiscoveryServer, 0) 344 345 for _, server := range db.DiscoveryServers { 346 // All servers that are discoverable on this day are eligible for discovery 347 if len(server.DiscoveryDateRange) == 2 && 348 discoveryDate.After(server.DiscoveryDateRange[0]) && 349 discoveryDate.Before(server.DiscoveryDateRange[1]) { 350 351 candidateServers = append(candidateServers, server) 352 } 353 } 354 355 timeInSeconds := int(discoveryDate.Unix()) 356 servers = selectServers(candidateServers, timeInSeconds, discoveryValue) 357 358 encodedServerEntries := make([]string, 0) 359 360 for _, server := range servers { 361 encodedServerEntries = append(encodedServerEntries, server.EncodedServerEntry) 362 } 363 364 return encodedServerEntries 365 } 366 367 // Combine client IP address and time-of-day strategies to give out different 368 // discovery servers to different clients. The aim is to achieve defense against 369 // enumerability. We also want to achieve a degree of load balancing clients 370 // and these strategies are expected to have reasonably random distribution, 371 // even for a cluster of users coming from the same network. 372 // 373 // We only select one server: multiple results makes enumeration easier; the 374 // strategies have a built-in load balancing effect; and date range discoverability 375 // means a client will actually learn more servers later even if they happen to 376 // always pick the same result at this point. 377 // 378 // This is a blended strategy: as long as there are enough servers to pick from, 379 // both aspects determine which server is selected. IP address is given the 380 // priority: if there are only a couple of servers, for example, IP address alone 381 // determines the outcome. 382 func selectServers( 383 servers []*DiscoveryServer, timeInSeconds, discoveryValue int) []*DiscoveryServer { 384 385 TIME_GRANULARITY := 3600 386 387 if len(servers) == 0 { 388 return nil 389 } 390 391 // Time truncated to an hour 392 timeStrategyValue := timeInSeconds / TIME_GRANULARITY 393 394 // Divide servers into buckets. The bucket count is chosen such that the number 395 // of buckets and the number of items in each bucket are close (using sqrt). 396 // IP address selects the bucket, time selects the item in the bucket. 397 398 // NOTE: this code assumes that the range of possible timeStrategyValues 399 // and discoveryValues are sufficient to index to all bucket items. 400 401 bucketCount := calculateBucketCount(len(servers)) 402 403 buckets := bucketizeServerList(servers, bucketCount) 404 405 if len(buckets) == 0 { 406 return nil 407 } 408 409 bucket := buckets[discoveryValue%len(buckets)] 410 411 if len(bucket) == 0 { 412 return nil 413 } 414 415 server := bucket[timeStrategyValue%len(bucket)] 416 417 serverList := make([]*DiscoveryServer, 1) 418 serverList[0] = server 419 420 return serverList 421 } 422 423 // Number of buckets such that first strategy picks among about the same number 424 // of choices as the second strategy. Gives an edge to the "outer" strategy. 425 func calculateBucketCount(length int) int { 426 return int(math.Ceil(math.Sqrt(float64(length)))) 427 } 428 429 // bucketizeServerList creates nearly equal sized slices of the input list. 430 func bucketizeServerList(servers []*DiscoveryServer, bucketCount int) [][]*DiscoveryServer { 431 432 // This code creates the same partitions as legacy servers: 433 // https://github.com/Psiphon-Inc/psiphon-automation/blob/685f91a85bcdb33a75a200d936eadcb0686eadd7/Automation/psi_ops_discovery.py 434 // 435 // Both use the same algorithm from: 436 // http://stackoverflow.com/questions/2659900/python-slicing-a-list-into-n-nearly-equal-length-partitions 437 438 // TODO: this partition is constant for fixed Database content, so it could 439 // be done once and cached in the Database ReloadableFile reloadAction. 440 441 buckets := make([][]*DiscoveryServer, bucketCount) 442 443 division := float64(len(servers)) / float64(bucketCount) 444 445 for i := 0; i < bucketCount; i++ { 446 start := int((division * float64(i)) + 0.5) 447 end := int((division * (float64(i) + 1)) + 0.5) 448 buckets[i] = servers[start:end] 449 } 450 451 return buckets 452 } 453 454 // IsValidServerEntryTag checks if the specified server entry tag is valid. 455 func (db *Database) IsValidServerEntryTag(serverEntryTag string) bool { 456 db.ReloadableFile.RLock() 457 defer db.ReloadableFile.RUnlock() 458 459 // Default to "valid" if the valid list is unexpectedly empty or stale. This 460 // helps prevent premature client-side server-entry pruning when there is an 461 // issue with updating the database. 462 463 if len(db.ValidServerEntryTags) == 0 || 464 db.fileModTime.Add(MAX_DATABASE_AGE_FOR_SERVER_ENTRY_VALIDITY).Before(time.Now()) { 465 return true 466 } 467 468 // The tag must be in the map and have the value "true". 469 return db.ValidServerEntryTags[serverEntryTag] 470 }