github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/server/geoip.go (about)

     1  /*
     2   * Copyright (c) 2016, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package server
    21  
    22  import (
    23  	"fmt"
    24  	"io"
    25  	"net"
    26  	"os"
    27  	"path/filepath"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    33  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    34  	maxminddb "github.com/oschwald/maxminddb-golang"
    35  	cache "github.com/patrickmn/go-cache"
    36  )
    37  
    38  const (
    39  	GEOIP_SESSION_CACHE_TTL = 60 * time.Minute
    40  	GEOIP_UNKNOWN_VALUE     = "None"
    41  	GEOIP_DATABASE_TYPE_ISP = "GeoIP2-ISP"
    42  )
    43  
    44  // GeoIPData is GeoIP data for a client session. Individual client
    45  // IP addresses are neither logged nor explicitly referenced during a session.
    46  // The GeoIP country, city, and ISP corresponding to a client IP address are
    47  // resolved and then logged along with usage stats.
    48  type GeoIPData struct {
    49  	Country string
    50  	City    string
    51  	ISP     string
    52  	ASN     string
    53  	ASO     string
    54  }
    55  
    56  // NewGeoIPData returns a GeoIPData initialized with the expected
    57  // GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails.
    58  func NewGeoIPData() GeoIPData {
    59  	return GeoIPData{
    60  		Country: GEOIP_UNKNOWN_VALUE,
    61  		City:    GEOIP_UNKNOWN_VALUE,
    62  		ISP:     GEOIP_UNKNOWN_VALUE,
    63  		ASN:     GEOIP_UNKNOWN_VALUE,
    64  		ASO:     GEOIP_UNKNOWN_VALUE,
    65  	}
    66  }
    67  
    68  // SetLogFields adds the GeoIPData fields to LogFields, following Psiphon
    69  // metric field name and format conventions.
    70  func (g GeoIPData) SetLogFields(logFields LogFields) {
    71  	g.SetLogFieldsWithPrefix("", logFields)
    72  }
    73  
    74  func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) {
    75  
    76  	// In psi_web, the space replacement was done to accommodate space
    77  	// delimited logging, which is no longer required; we retain the
    78  	// transformation so that stats aggregation isn't impacted.
    79  	logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1)
    80  	logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1)
    81  	logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1)
    82  	logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1)
    83  	logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1)
    84  }
    85  
    86  // GeoIPService implements GeoIP lookup and session/GeoIP caching.
    87  // Lookup is via a MaxMind database; the ReloadDatabase function
    88  // supports hot reloading of MaxMind data while the server is
    89  // running.
    90  type GeoIPService struct {
    91  	databases    []*geoIPDatabase
    92  	sessionCache *cache.Cache
    93  }
    94  
    95  type geoIPDatabase struct {
    96  	common.ReloadableFile
    97  	filename       string
    98  	tempFilename   string
    99  	tempFileSuffix int64
   100  	isISPType      bool
   101  	maxMindReader  *maxminddb.Reader
   102  }
   103  
   104  // NewGeoIPService initializes a new GeoIPService.
   105  func NewGeoIPService(databaseFilenames []string) (*GeoIPService, error) {
   106  
   107  	geoIP := &GeoIPService{
   108  		databases:    make([]*geoIPDatabase, len(databaseFilenames)),
   109  		sessionCache: cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute),
   110  	}
   111  
   112  	for i, filename := range databaseFilenames {
   113  
   114  		database := &geoIPDatabase{
   115  			filename: filename,
   116  		}
   117  
   118  		database.ReloadableFile = common.NewReloadableFile(
   119  			filename,
   120  			false,
   121  			func(_ []byte, _ time.Time) error {
   122  
   123  				// In order to safely mmap the database file, a temporary copy
   124  				// is made and that copy is mmapped. The original file may be
   125  				// repaved without affecting the mmap; upon hot reload, a new
   126  				// temporary copy is made and once it is successful, the old
   127  				// mmap is closed and previous temporary file deleted.
   128  				//
   129  				// On any reload error, database state remains the same.
   130  
   131  				src, err := os.Open(database.filename)
   132  				if err != nil {
   133  					return errors.Trace(err)
   134  				}
   135  
   136  				tempFileSuffix := database.tempFileSuffix + 1
   137  
   138  				tempFilename := fmt.Sprintf(
   139  					"%s.%d",
   140  					filepath.Join(os.TempDir(), filepath.Base(database.filename)),
   141  					tempFileSuffix)
   142  
   143  				dst, err := os.Create(tempFilename)
   144  				if err != nil {
   145  					src.Close()
   146  					return errors.Trace(err)
   147  				}
   148  
   149  				_, err = io.Copy(dst, src)
   150  				src.Close()
   151  				dst.Close()
   152  				if err != nil {
   153  					_ = os.Remove(tempFilename)
   154  					return errors.Trace(err)
   155  				}
   156  
   157  				maxMindReader, err := maxminddb.Open(tempFilename)
   158  				if err != nil {
   159  					_ = os.Remove(tempFilename)
   160  					return errors.Trace(err)
   161  				}
   162  
   163  				if database.maxMindReader != nil {
   164  					database.maxMindReader.Close()
   165  					_ = os.Remove(database.tempFilename)
   166  				}
   167  
   168  				isISPType := (maxMindReader.Metadata.DatabaseType == GEOIP_DATABASE_TYPE_ISP)
   169  
   170  				database.maxMindReader = maxMindReader
   171  				database.isISPType = isISPType
   172  				database.tempFilename = tempFilename
   173  				database.tempFileSuffix = tempFileSuffix
   174  
   175  				return nil
   176  			})
   177  
   178  		_, err := database.Reload()
   179  		if err != nil {
   180  			return nil, errors.Trace(err)
   181  		}
   182  
   183  		geoIP.databases[i] = database
   184  	}
   185  
   186  	return geoIP, nil
   187  }
   188  
   189  // Reloaders gets the list of reloadable databases in use
   190  // by the GeoIPService. This list is used to hot reload
   191  // these databases.
   192  func (geoIP *GeoIPService) Reloaders() []common.Reloader {
   193  	reloaders := make([]common.Reloader, len(geoIP.databases))
   194  	for i, database := range geoIP.databases {
   195  		reloaders[i] = database
   196  	}
   197  	return reloaders
   198  }
   199  
   200  // Lookup determines a GeoIPData for a given string client IP address.
   201  func (geoIP *GeoIPService) Lookup(strIP string) GeoIPData {
   202  	return geoIP.LookupIP(net.ParseIP(strIP))
   203  }
   204  
   205  // LookupIP determines a GeoIPData for a given client IP address.
   206  func (geoIP *GeoIPService) LookupIP(IP net.IP) GeoIPData {
   207  	return geoIP.lookupIP(IP, false)
   208  }
   209  
   210  // LookupISPForIP determines a GeoIPData for a given client IP address. Only
   211  // ISP, ASN, and ASO fields will be populated. This lookup is faster than a
   212  // full lookup.
   213  func (geoIP *GeoIPService) LookupISPForIP(IP net.IP) GeoIPData {
   214  	return geoIP.lookupIP(IP, true)
   215  }
   216  
   217  func (geoIP *GeoIPService) lookupIP(IP net.IP, ISPOnly bool) GeoIPData {
   218  
   219  	result := NewGeoIPData()
   220  
   221  	if IP == nil {
   222  		return result
   223  	}
   224  
   225  	// Populate GeoIP fields.
   226  
   227  	var geoIPFields struct {
   228  		Country struct {
   229  			ISOCode string `maxminddb:"iso_code"`
   230  		} `maxminddb:"country"`
   231  		City struct {
   232  			Names map[string]string `maxminddb:"names"`
   233  		} `maxminddb:"city"`
   234  		ISP string `maxminddb:"isp"`
   235  		ASN int    `maxminddb:"autonomous_system_number"`
   236  		ASO string `maxminddb:"autonomous_system_organization"`
   237  	}
   238  
   239  	geoIPFields.ASN = -1
   240  
   241  	// Each database will populate geoIPFields with the values it contains. In the
   242  	// current MaxMind deployment, the City database populates Country and City and
   243  	// the separate ISP database populates ISP.
   244  	for _, database := range geoIP.databases {
   245  		database.ReloadableFile.RLock()
   246  		var err error
   247  		// Don't lookup the City database when only ISP fields are required;
   248  		// skipping the City lookup is 5-10x faster.
   249  		if !ISPOnly || database.isISPType {
   250  			err = database.maxMindReader.Lookup(IP, &geoIPFields)
   251  		}
   252  		database.ReloadableFile.RUnlock()
   253  		if err != nil {
   254  			log.WithTraceFields(LogFields{"error": err}).Warning("GeoIP lookup failed")
   255  		}
   256  	}
   257  
   258  	if geoIPFields.Country.ISOCode != "" {
   259  		result.Country = geoIPFields.Country.ISOCode
   260  	}
   261  
   262  	name, ok := geoIPFields.City.Names["en"]
   263  	if ok && name != "" {
   264  		result.City = name
   265  	}
   266  
   267  	if geoIPFields.ISP != "" {
   268  		result.ISP = geoIPFields.ISP
   269  	}
   270  
   271  	if geoIPFields.ASN != -1 {
   272  		result.ASN = strconv.Itoa(geoIPFields.ASN)
   273  	}
   274  
   275  	if geoIPFields.ASO != "" {
   276  		result.ASO = geoIPFields.ASO
   277  	}
   278  
   279  	return result
   280  }
   281  
   282  // SetSessionCache adds the sessionID/geoIPData pair to the
   283  // session cache. This value will not expire; the caller must
   284  // call MarkSessionCacheToExpire to initiate expiry.
   285  // Calling SetSessionCache for an existing sessionID will
   286  // replace the previous value and reset any expiry.
   287  func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) {
   288  	geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration)
   289  }
   290  
   291  // MarkSessionCacheToExpire initiates expiry for an existing
   292  // session cache entry, if the session ID is found in the cache.
   293  // Concurrency note: SetSessionCache and MarkSessionCacheToExpire
   294  // should not be called concurrently for a single session ID.
   295  func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) {
   296  	geoIPData, found := geoIP.sessionCache.Get(sessionID)
   297  	// Note: potential race condition between Get and Set. In practice,
   298  	// the tunnel server won't clobber a SetSessionCache value by calling
   299  	// MarkSessionCacheToExpire concurrently.
   300  	if found {
   301  		geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration)
   302  	}
   303  }
   304  
   305  // GetSessionCache returns the cached GeoIPData for the
   306  // specified session ID; a blank GeoIPData is returned
   307  // if the session ID is not found in the cache.
   308  func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData {
   309  	geoIPData, found := geoIP.sessionCache.Get(sessionID)
   310  	if !found {
   311  		return NewGeoIPData()
   312  	}
   313  	return geoIPData.(GeoIPData)
   314  }
   315  
   316  // InSessionCache returns whether the session ID is present
   317  // in the session cache.
   318  func (geoIP *GeoIPService) InSessionCache(sessionID string) bool {
   319  	_, found := geoIP.sessionCache.Get(sessionID)
   320  	return found
   321  }