github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/server/blocklist.go (about) 1 /* 2 * Copyright (c) 2019, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package server 21 22 import ( 23 "encoding/csv" 24 "io" 25 "net" 26 "os" 27 "sync/atomic" 28 "time" 29 30 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 31 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 32 "github.com/miekg/dns" 33 ) 34 35 // Blocklist provides a fast lookup of IP addresses and domains that are 36 // candidates for egress blocking. This is intended to be used to block 37 // malware and other malicious traffic. 38 // 39 // The Reload function supports hot reloading of rules data while the server 40 // is running. 41 // 42 // Limitations: the blocklist is implemented with in-memory Go maps, which 43 // limits the practical size of the blocklist. 44 type Blocklist struct { 45 common.ReloadableFile 46 loaded int32 47 data atomic.Value 48 } 49 50 // BlocklistTag indicates the source containing an IP address and the subject, 51 // or name of the suspected malicious traffic. 52 type BlocklistTag struct { 53 Source string 54 Subject string 55 } 56 57 type blocklistData struct { 58 lookupIP map[[net.IPv6len]byte][]BlocklistTag 59 lookupDomain map[string][]BlocklistTag 60 internedStrings map[string]string 61 } 62 63 // NewBlocklist creates a new block list. 64 // 65 // The input file must be a 3 field comma-delimited and optional quote-escaped 66 // CSV. Fields: <IPv4 address>,<source>,<subject>. 67 // 68 // IP addresses may appear multiple times in the input file; each distinct 69 // source/subject is associated with the IP address and returned in the Lookup 70 // tag list. 71 func NewBlocklist(filename string) (*Blocklist, error) { 72 73 blocklist := &Blocklist{} 74 75 blocklist.ReloadableFile = common.NewReloadableFile( 76 filename, 77 false, 78 func(_ []byte, _ time.Time) error { 79 80 newData, err := loadBlocklistFromFile(filename) 81 if err != nil { 82 return errors.Trace(err) 83 } 84 85 blocklist.data.Store(newData) 86 atomic.StoreInt32(&blocklist.loaded, 1) 87 88 return nil 89 }) 90 91 _, err := blocklist.Reload() 92 if err != nil { 93 return nil, errors.Trace(err) 94 } 95 96 return blocklist, nil 97 } 98 99 // LookupIP returns the blocklist tags for any IP address that is on the 100 // blocklist, or returns nil for any IP address not on the blocklist. Lookup 101 // may be called concurrently. The caller must not modify the return value. 102 func (b *Blocklist) LookupIP(IPAddress net.IP) []BlocklistTag { 103 104 // When not configured, no blocklist is loaded/initialized. 105 if atomic.LoadInt32(&b.loaded) != 1 { 106 return nil 107 } 108 109 // IPAddress may be an IPv4 or IPv6 address. To16 will return the 16-byte 110 // representation of an IPv4 address, with the net.v4InV6Prefix prefix. 111 112 var key [net.IPv6len]byte 113 IPAddress16 := IPAddress.To16() 114 if IPAddress16 == nil { 115 return nil 116 } 117 copy(key[:], IPAddress16) 118 119 // As data is an atomic.Value, it's not necessary to call 120 // ReloadableFile.RLock/ReloadableFile.RUnlock in this case. 121 122 tags, ok := b.data.Load().(*blocklistData).lookupIP[key] 123 if !ok { 124 return nil 125 } 126 return tags 127 } 128 129 // LookupDomain returns the blocklist tags for any domain that is on the 130 // blocklist, or returns nil for any domain not on the blocklist. Lookup may 131 // be called concurrently. The caller must not modify the return value. 132 func (b *Blocklist) LookupDomain(domain string) []BlocklistTag { 133 134 if atomic.LoadInt32(&b.loaded) != 1 { 135 return nil 136 } 137 138 // Domains parsed out of DNS queries will be fully-qualified domain names, 139 // while list entries do not end in a dot. 140 if len(domain) > 0 && domain[len(domain)-1] == '.' { 141 domain = domain[:len(domain)-1] 142 } 143 144 tags, ok := b.data.Load().(*blocklistData).lookupDomain[domain] 145 if !ok { 146 return nil 147 } 148 return tags 149 } 150 151 func loadBlocklistFromFile(filename string) (*blocklistData, error) { 152 153 data := newBlocklistData() 154 155 file, err := os.Open(filename) 156 if err != nil { 157 return nil, errors.Trace(err) 158 } 159 defer file.Close() 160 161 reader := csv.NewReader(file) 162 163 reader.FieldsPerRecord = 3 164 reader.Comment = '#' 165 reader.ReuseRecord = true 166 167 for { 168 record, err := reader.Read() 169 170 if err == io.EOF { 171 break 172 } else if err != nil { 173 return nil, errors.Trace(err) 174 } 175 176 // Intern the source and subject strings so we only store one copy of 177 // each in memory. These values are expected to repeat often. 178 source := data.internString(record[1]) 179 subject := data.internString(record[2]) 180 181 tag := BlocklistTag{ 182 Source: source, 183 Subject: subject, 184 } 185 186 IPAddress := net.ParseIP(record[0]) 187 if IPAddress != nil { 188 189 IPAddress16 := IPAddress.To16() 190 if IPAddress16 == nil { 191 return nil, errors.Tracef("invalid IP address: %s", record[0]) 192 } 193 194 var key [net.IPv6len]byte 195 copy(key[:], IPAddress16) 196 197 tags := data.lookupIP[key] 198 199 found := false 200 for _, existingTag := range tags { 201 if tag == existingTag { 202 found = true 203 break 204 } 205 } 206 207 if !found { 208 data.lookupIP[key] = append(tags, tag) 209 } 210 211 } else { 212 213 if _, ok := dns.IsDomainName(record[0]); !ok { 214 return nil, errors.Tracef("invalid domain name: %s", record[0]) 215 } 216 217 key := record[0] 218 219 tags := data.lookupDomain[key] 220 221 found := false 222 for _, existingTag := range tags { 223 if tag == existingTag { 224 found = true 225 break 226 } 227 } 228 229 if !found { 230 data.lookupDomain[key] = append(tags, tag) 231 } 232 } 233 } 234 235 return data, nil 236 } 237 238 func newBlocklistData() *blocklistData { 239 return &blocklistData{ 240 lookupIP: make(map[[net.IPv6len]byte][]BlocklistTag), 241 lookupDomain: make(map[string][]BlocklistTag), 242 internedStrings: make(map[string]string), 243 } 244 } 245 246 func (data *blocklistData) internString(str string) string { 247 if internedStr, ok := data.internedStrings[str]; ok { 248 return internedStr 249 } 250 data.internedStrings[str] = str 251 return str 252 }