github.com/database64128/shadowsocks-go@v1.10.2-0.20240315062903-143a773533f1/domainset/domainset.go (about) 1 package domainset 2 3 import ( 4 "bufio" 5 "encoding/gob" 6 "errors" 7 "fmt" 8 "io" 9 "strconv" 10 "strings" 11 12 "github.com/database64128/shadowsocks-go/bytestrings" 13 "github.com/database64128/shadowsocks-go/mmap" 14 ) 15 16 const ( 17 capacityHintPrefix = "# shadowsocks-go domain set capacity hint " 18 capacityHintPrefixLen = len(capacityHintPrefix) 19 capacityHintSuffix = "DSKR" 20 ) 21 22 const ( 23 domainPrefix = "domain:" 24 suffixPrefix = "suffix:" 25 keywordPrefix = "keyword:" 26 regexpPrefix = "regexp:" 27 domainPrefixLen = len(domainPrefix) 28 suffixPrefixLen = len(suffixPrefix) 29 keywordPrefixLen = len(keywordPrefix) 30 regexpPrefixLen = len(regexpPrefix) 31 ) 32 33 var errEmptySet = errors.New("empty domain set") 34 35 // Config is the configuration for a DomainSet. 36 type Config struct { 37 Name string `json:"name"` 38 Type string `json:"type"` 39 Path string `json:"path"` 40 } 41 42 // DomainSet creates a DomainSet from the configuration. 43 func (dsc Config) DomainSet() (DomainSet, error) { 44 data, err := mmap.ReadFile[string](dsc.Path) 45 if err != nil { 46 return nil, fmt.Errorf("failed to load domain set %s: %w", dsc.Name, err) 47 } 48 defer mmap.Unmap(data) 49 50 var dsb Builder 51 52 switch dsc.Type { 53 case "text", "": 54 dsb, err = BuilderFromTextFast(data) 55 case "gob": 56 r := strings.NewReader(data) 57 dsb, err = BuilderFromGob(r) 58 default: 59 err = fmt.Errorf("invalid domain set type: %s", dsc.Type) 60 } 61 62 if err != nil { 63 return nil, fmt.Errorf("failed to load domain set %s: %w", dsc.Name, err) 64 } 65 66 return dsb.DomainSet() 67 } 68 69 // Builder stores the content of a domain set and 70 // provides methods for writing in different formats. 71 type Builder [4]MatcherBuilder 72 73 func (dsb Builder) DomainSet() (DomainSet, error) { 74 var capacity int 75 for _, mb := range dsb { 76 capacity += mb.MatcherCount() 77 } 78 ds := make(DomainSet, 0, capacity) 79 var err error 80 for _, mb := range dsb { 81 ds, err = mb.AppendTo(ds) 82 if err != nil { 83 return nil, err 84 } 85 } 86 return ds, nil 87 } 88 89 func (dsb Builder) WriteGob(w io.Writer) error { 90 return BuilderGobFromBuilder(dsb).WriteGob(w) 91 } 92 93 func (dsb Builder) WriteText(w io.Writer) error { 94 bw := bufio.NewWriter(w) 95 domains := dsb[0].Rules() 96 suffixes := dsb[1].Rules() 97 keywords := dsb[2].Rules() 98 regexps := dsb[3].Rules() 99 capacityHint := fmt.Sprintf("%s%d %d %d %d %s\n", capacityHintPrefix, len(domains), len(suffixes), len(keywords), len(regexps), capacityHintSuffix) 100 101 bw.WriteString(capacityHint) 102 103 for _, d := range domains { 104 bw.WriteString(domainPrefix) 105 bw.WriteString(d) 106 bw.WriteByte('\n') 107 } 108 109 for _, s := range suffixes { 110 bw.WriteString(suffixPrefix) 111 bw.WriteString(s) 112 bw.WriteByte('\n') 113 } 114 115 for _, k := range keywords { 116 bw.WriteString(keywordPrefix) 117 bw.WriteString(k) 118 bw.WriteByte('\n') 119 } 120 121 for _, r := range regexps { 122 bw.WriteString(regexpPrefix) 123 bw.WriteString(r) 124 bw.WriteByte('\n') 125 } 126 127 return bw.Flush() 128 } 129 130 func BuilderFromGob(r io.Reader) (Builder, error) { 131 bg, err := BuilderGobFromReader(r) 132 if err != nil { 133 return Builder{}, err 134 } 135 return bg.Builder(), nil 136 } 137 138 func BuilderFromText(text string) (Builder, error) { 139 return BuilderFromTextFunc(text, NewDomainMapMatcher, NewDomainSuffixTrie, NewKeywordLinearMatcher, NewRegexpMatcherBuilder) 140 } 141 142 func BuilderFromTextFast(text string) (Builder, error) { 143 return BuilderFromTextFunc(text, NewDomainMapMatcher, NewSuffixMapMatcher, NewKeywordLinearMatcher, NewRegexpMatcherBuilder) 144 } 145 146 func BuilderFromTextFunc( 147 text string, 148 newDomainMatcherBuilderFunc, 149 newSuffixMatcherBuilderFunc, 150 newKeywordMatcherBuilderFunc, 151 newRegexpMatcherBuilderFunc func(int) MatcherBuilder, 152 ) (Builder, error) { 153 line, text := bytestrings.NextNonEmptyLine(text) 154 if len(line) == 0 { 155 return Builder{}, errEmptySet 156 } 157 158 dskr, found, err := ParseCapacityHint(line) 159 if err != nil { 160 return Builder{}, err 161 } 162 if found { 163 line, text = bytestrings.NextNonEmptyLine(text) 164 if len(line) == 0 { 165 return Builder{}, errEmptySet 166 } 167 } 168 169 dsb := Builder{ 170 newDomainMatcherBuilderFunc(dskr[0]), 171 newSuffixMatcherBuilderFunc(dskr[1]), 172 newKeywordMatcherBuilderFunc(dskr[2]), 173 newRegexpMatcherBuilderFunc(dskr[3]), 174 } 175 176 for { 177 // domainPrefixLen == suffixPrefixLen == regexpPrefixLen == 7 178 if len(line) <= 7 { 179 if line[0] != '#' { 180 return dsb, fmt.Errorf("invalid line: %s", line) 181 } 182 goto next 183 } 184 185 switch line[:7] { 186 case suffixPrefix: 187 dsb[1].Insert(strings.Clone(line[7:])) 188 case domainPrefix: 189 dsb[0].Insert(strings.Clone(line[7:])) 190 case regexpPrefix: 191 dsb[3].Insert(strings.Clone(line[7:])) 192 default: 193 switch { 194 case len(line) > keywordPrefixLen && string(line[:keywordPrefixLen]) == keywordPrefix: 195 dsb[2].Insert(strings.Clone(line[keywordPrefixLen:])) 196 case line[0] != '#': 197 return dsb, fmt.Errorf("invalid line: %s", line) 198 } 199 } 200 201 next: 202 line, text = bytestrings.NextNonEmptyLine(text) 203 if len(line) == 0 { 204 break 205 } 206 } 207 208 return dsb, nil 209 } 210 211 func ParseCapacityHint(line string) ([4]int, bool, error) { 212 var dskr [4]int 213 214 found := len(line) > capacityHintPrefixLen && line[:capacityHintPrefixLen] == capacityHintPrefix 215 if found { 216 h := line[capacityHintPrefixLen:] 217 218 for i := range dskr { 219 delimiterIndex := strings.IndexByte(h, ' ') 220 if delimiterIndex == -1 { 221 return dskr, found, fmt.Errorf("bad capacity hint: %s", line) 222 } 223 224 c, err := strconv.Atoi(h[:delimiterIndex]) 225 if err != nil { 226 return dskr, found, fmt.Errorf("bad capacity hint: %s: %w", line, err) 227 } 228 if c < 0 { 229 return dskr, found, fmt.Errorf("bad capacity hint: %s: capacity cannot be negative", line) 230 } 231 dskr[i] = c 232 h = h[delimiterIndex+1:] 233 } 234 235 if h != capacityHintSuffix { 236 return dskr, found, fmt.Errorf("bad capacity hint: %s: expected suffix '%s'", line, capacityHintSuffix) 237 } 238 } 239 240 return dskr, found, nil 241 } 242 243 // BuilderGob is the builder's gob serialization structure. 244 type BuilderGob struct { 245 Domains DomainMapMatcher 246 Suffixes *DomainSuffixTrie 247 Keywords KeywordLinearMatcher 248 Regexps RegexpMatcherBuilder 249 } 250 251 func (bg BuilderGob) Builder() Builder { 252 return Builder{&bg.Domains, bg.Suffixes, &bg.Keywords, &bg.Regexps} 253 } 254 255 func (bg BuilderGob) WriteGob(w io.Writer) error { 256 return gob.NewEncoder(w).Encode(bg) 257 } 258 259 func BuilderGobFromBuilder(dsb Builder) (bg BuilderGob) { 260 switch d := dsb[0].(type) { 261 case *DomainMapMatcher: 262 bg.Domains = *d 263 default: 264 bg.Domains = DomainMapMatcherFromSlice(d.Rules()) 265 } 266 267 switch s := dsb[1].(type) { 268 case *DomainSuffixTrie: 269 bg.Suffixes = s 270 default: 271 bg.Suffixes = DomainSuffixTrieFromSlice(s.Rules()) 272 } 273 274 switch k := dsb[2].(type) { 275 case *KeywordLinearMatcher: 276 bg.Keywords = *k 277 default: 278 bg.Keywords = KeywordLinearMatcher(k.Rules()) 279 } 280 281 switch r := dsb[3].(type) { 282 case *RegexpMatcherBuilder: 283 bg.Regexps = *r 284 default: 285 bg.Regexps = RegexpMatcherBuilder(r.Rules()) 286 } 287 288 return bg 289 } 290 291 func BuilderGobFromReader(r io.Reader) (bg BuilderGob, err error) { 292 err = gob.NewDecoder(r).Decode(&bg) 293 return 294 } 295 296 // DomainSet is a set of domain matchers built from matching rules. 297 type DomainSet []Matcher 298 299 // Match returns whether the domain set contains the domain. 300 func (ds DomainSet) Match(domain string) bool { 301 for _, m := range ds { 302 if m.Match(domain) { 303 return true 304 } 305 } 306 return false 307 }