github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/server/blocklist.go (about)

     1  /*
     2   * Copyright (c) 2019, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package server
    21  
    22  import (
    23  	"encoding/csv"
    24  	"io"
    25  	"net"
    26  	"os"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    31  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    32  	"github.com/miekg/dns"
    33  )
    34  
    35  // Blocklist provides a fast lookup of IP addresses and domains that are
    36  // candidates for egress blocking. This is intended to be used to block
    37  // malware and other malicious traffic.
    38  //
    39  // The Reload function supports hot reloading of rules data while the server
    40  // is running.
    41  //
    42  // Limitations: the blocklist is implemented with in-memory Go maps, which
    43  // limits the practical size of the blocklist.
    44  type Blocklist struct {
    45  	common.ReloadableFile
    46  	loaded int32
    47  	data   atomic.Value
    48  }
    49  
    50  // BlocklistTag indicates the source containing an IP address and the subject,
    51  // or name of the suspected malicious traffic.
    52  type BlocklistTag struct {
    53  	Source  string
    54  	Subject string
    55  }
    56  
    57  type blocklistData struct {
    58  	lookupIP        map[[net.IPv6len]byte][]BlocklistTag
    59  	lookupDomain    map[string][]BlocklistTag
    60  	internedStrings map[string]string
    61  }
    62  
    63  // NewBlocklist creates a new block list.
    64  //
    65  // The input file must be a 3 field comma-delimited and optional quote-escaped
    66  // CSV. Fields: <IPv4 address>,<source>,<subject>.
    67  //
    68  // IP addresses may appear multiple times in the input file; each distinct
    69  // source/subject is associated with the IP address and returned in the Lookup
    70  // tag list.
    71  func NewBlocklist(filename string) (*Blocklist, error) {
    72  
    73  	blocklist := &Blocklist{}
    74  
    75  	blocklist.ReloadableFile = common.NewReloadableFile(
    76  		filename,
    77  		false,
    78  		func(_ []byte, _ time.Time) error {
    79  
    80  			newData, err := loadBlocklistFromFile(filename)
    81  			if err != nil {
    82  				return errors.Trace(err)
    83  			}
    84  
    85  			blocklist.data.Store(newData)
    86  			atomic.StoreInt32(&blocklist.loaded, 1)
    87  
    88  			return nil
    89  		})
    90  
    91  	_, err := blocklist.Reload()
    92  	if err != nil {
    93  		return nil, errors.Trace(err)
    94  	}
    95  
    96  	return blocklist, nil
    97  }
    98  
    99  // LookupIP returns the blocklist tags for any IP address that is on the
   100  // blocklist, or returns nil for any IP address not on the blocklist. Lookup
   101  // may be called concurrently. The caller must not modify the return value.
   102  func (b *Blocklist) LookupIP(IPAddress net.IP) []BlocklistTag {
   103  
   104  	// When not configured, no blocklist is loaded/initialized.
   105  	if atomic.LoadInt32(&b.loaded) != 1 {
   106  		return nil
   107  	}
   108  
   109  	// IPAddress may be an IPv4 or IPv6 address. To16 will return the 16-byte
   110  	// representation of an IPv4 address, with the net.v4InV6Prefix prefix.
   111  
   112  	var key [net.IPv6len]byte
   113  	IPAddress16 := IPAddress.To16()
   114  	if IPAddress16 == nil {
   115  		return nil
   116  	}
   117  	copy(key[:], IPAddress16)
   118  
   119  	// As data is an atomic.Value, it's not necessary to call
   120  	// ReloadableFile.RLock/ReloadableFile.RUnlock in this case.
   121  
   122  	tags, ok := b.data.Load().(*blocklistData).lookupIP[key]
   123  	if !ok {
   124  		return nil
   125  	}
   126  	return tags
   127  }
   128  
   129  // LookupDomain returns the blocklist tags for any domain that is on the
   130  // blocklist, or returns nil for any domain not on the blocklist. Lookup may
   131  // be called concurrently. The caller must not modify the return value.
   132  func (b *Blocklist) LookupDomain(domain string) []BlocklistTag {
   133  
   134  	if atomic.LoadInt32(&b.loaded) != 1 {
   135  		return nil
   136  	}
   137  
   138  	// Domains parsed out of DNS queries will be fully-qualified domain names,
   139  	// while list entries do not end in a dot.
   140  	if len(domain) > 0 && domain[len(domain)-1] == '.' {
   141  		domain = domain[:len(domain)-1]
   142  	}
   143  
   144  	tags, ok := b.data.Load().(*blocklistData).lookupDomain[domain]
   145  	if !ok {
   146  		return nil
   147  	}
   148  	return tags
   149  }
   150  
   151  func loadBlocklistFromFile(filename string) (*blocklistData, error) {
   152  
   153  	data := newBlocklistData()
   154  
   155  	file, err := os.Open(filename)
   156  	if err != nil {
   157  		return nil, errors.Trace(err)
   158  	}
   159  	defer file.Close()
   160  
   161  	reader := csv.NewReader(file)
   162  
   163  	reader.FieldsPerRecord = 3
   164  	reader.Comment = '#'
   165  	reader.ReuseRecord = true
   166  
   167  	for {
   168  		record, err := reader.Read()
   169  
   170  		if err == io.EOF {
   171  			break
   172  		} else if err != nil {
   173  			return nil, errors.Trace(err)
   174  		}
   175  
   176  		// Intern the source and subject strings so we only store one copy of
   177  		// each in memory. These values are expected to repeat often.
   178  		source := data.internString(record[1])
   179  		subject := data.internString(record[2])
   180  
   181  		tag := BlocklistTag{
   182  			Source:  source,
   183  			Subject: subject,
   184  		}
   185  
   186  		IPAddress := net.ParseIP(record[0])
   187  		if IPAddress != nil {
   188  
   189  			IPAddress16 := IPAddress.To16()
   190  			if IPAddress16 == nil {
   191  				return nil, errors.Tracef("invalid IP address: %s", record[0])
   192  			}
   193  
   194  			var key [net.IPv6len]byte
   195  			copy(key[:], IPAddress16)
   196  
   197  			tags := data.lookupIP[key]
   198  
   199  			found := false
   200  			for _, existingTag := range tags {
   201  				if tag == existingTag {
   202  					found = true
   203  					break
   204  				}
   205  			}
   206  
   207  			if !found {
   208  				data.lookupIP[key] = append(tags, tag)
   209  			}
   210  
   211  		} else {
   212  
   213  			if _, ok := dns.IsDomainName(record[0]); !ok {
   214  				return nil, errors.Tracef("invalid domain name: %s", record[0])
   215  			}
   216  
   217  			key := record[0]
   218  
   219  			tags := data.lookupDomain[key]
   220  
   221  			found := false
   222  			for _, existingTag := range tags {
   223  				if tag == existingTag {
   224  					found = true
   225  					break
   226  				}
   227  			}
   228  
   229  			if !found {
   230  				data.lookupDomain[key] = append(tags, tag)
   231  			}
   232  		}
   233  	}
   234  
   235  	return data, nil
   236  }
   237  
   238  func newBlocklistData() *blocklistData {
   239  	return &blocklistData{
   240  		lookupIP:        make(map[[net.IPv6len]byte][]BlocklistTag),
   241  		lookupDomain:    make(map[string][]BlocklistTag),
   242  		internedStrings: make(map[string]string),
   243  	}
   244  }
   245  
   246  func (data *blocklistData) internString(str string) string {
   247  	if internedStr, ok := data.internedStrings[str]; ok {
   248  		return internedStr
   249  	}
   250  	data.internedStrings[str] = str
   251  	return str
   252  }