github.com/database64128/shadowsocks-go@v1.10.2-0.20240315062903-143a773533f1/domainset/domainset.go (about)

     1  package domainset
     2  
     3  import (
     4  	"bufio"
     5  	"encoding/gob"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/database64128/shadowsocks-go/bytestrings"
    13  	"github.com/database64128/shadowsocks-go/mmap"
    14  )
    15  
    16  const (
    17  	capacityHintPrefix    = "# shadowsocks-go domain set capacity hint "
    18  	capacityHintPrefixLen = len(capacityHintPrefix)
    19  	capacityHintSuffix    = "DSKR"
    20  )
    21  
    22  const (
    23  	domainPrefix     = "domain:"
    24  	suffixPrefix     = "suffix:"
    25  	keywordPrefix    = "keyword:"
    26  	regexpPrefix     = "regexp:"
    27  	domainPrefixLen  = len(domainPrefix)
    28  	suffixPrefixLen  = len(suffixPrefix)
    29  	keywordPrefixLen = len(keywordPrefix)
    30  	regexpPrefixLen  = len(regexpPrefix)
    31  )
    32  
    33  var errEmptySet = errors.New("empty domain set")
    34  
    35  // Config is the configuration for a DomainSet.
    36  type Config struct {
    37  	Name string `json:"name"`
    38  	Type string `json:"type"`
    39  	Path string `json:"path"`
    40  }
    41  
    42  // DomainSet creates a DomainSet from the configuration.
    43  func (dsc Config) DomainSet() (DomainSet, error) {
    44  	data, err := mmap.ReadFile[string](dsc.Path)
    45  	if err != nil {
    46  		return nil, fmt.Errorf("failed to load domain set %s: %w", dsc.Name, err)
    47  	}
    48  	defer mmap.Unmap(data)
    49  
    50  	var dsb Builder
    51  
    52  	switch dsc.Type {
    53  	case "text", "":
    54  		dsb, err = BuilderFromTextFast(data)
    55  	case "gob":
    56  		r := strings.NewReader(data)
    57  		dsb, err = BuilderFromGob(r)
    58  	default:
    59  		err = fmt.Errorf("invalid domain set type: %s", dsc.Type)
    60  	}
    61  
    62  	if err != nil {
    63  		return nil, fmt.Errorf("failed to load domain set %s: %w", dsc.Name, err)
    64  	}
    65  
    66  	return dsb.DomainSet()
    67  }
    68  
    69  // Builder stores the content of a domain set and
    70  // provides methods for writing in different formats.
    71  type Builder [4]MatcherBuilder
    72  
    73  func (dsb Builder) DomainSet() (DomainSet, error) {
    74  	var capacity int
    75  	for _, mb := range dsb {
    76  		capacity += mb.MatcherCount()
    77  	}
    78  	ds := make(DomainSet, 0, capacity)
    79  	var err error
    80  	for _, mb := range dsb {
    81  		ds, err = mb.AppendTo(ds)
    82  		if err != nil {
    83  			return nil, err
    84  		}
    85  	}
    86  	return ds, nil
    87  }
    88  
    89  func (dsb Builder) WriteGob(w io.Writer) error {
    90  	return BuilderGobFromBuilder(dsb).WriteGob(w)
    91  }
    92  
    93  func (dsb Builder) WriteText(w io.Writer) error {
    94  	bw := bufio.NewWriter(w)
    95  	domains := dsb[0].Rules()
    96  	suffixes := dsb[1].Rules()
    97  	keywords := dsb[2].Rules()
    98  	regexps := dsb[3].Rules()
    99  	capacityHint := fmt.Sprintf("%s%d %d %d %d %s\n", capacityHintPrefix, len(domains), len(suffixes), len(keywords), len(regexps), capacityHintSuffix)
   100  
   101  	bw.WriteString(capacityHint)
   102  
   103  	for _, d := range domains {
   104  		bw.WriteString(domainPrefix)
   105  		bw.WriteString(d)
   106  		bw.WriteByte('\n')
   107  	}
   108  
   109  	for _, s := range suffixes {
   110  		bw.WriteString(suffixPrefix)
   111  		bw.WriteString(s)
   112  		bw.WriteByte('\n')
   113  	}
   114  
   115  	for _, k := range keywords {
   116  		bw.WriteString(keywordPrefix)
   117  		bw.WriteString(k)
   118  		bw.WriteByte('\n')
   119  	}
   120  
   121  	for _, r := range regexps {
   122  		bw.WriteString(regexpPrefix)
   123  		bw.WriteString(r)
   124  		bw.WriteByte('\n')
   125  	}
   126  
   127  	return bw.Flush()
   128  }
   129  
   130  func BuilderFromGob(r io.Reader) (Builder, error) {
   131  	bg, err := BuilderGobFromReader(r)
   132  	if err != nil {
   133  		return Builder{}, err
   134  	}
   135  	return bg.Builder(), nil
   136  }
   137  
   138  func BuilderFromText(text string) (Builder, error) {
   139  	return BuilderFromTextFunc(text, NewDomainMapMatcher, NewDomainSuffixTrie, NewKeywordLinearMatcher, NewRegexpMatcherBuilder)
   140  }
   141  
   142  func BuilderFromTextFast(text string) (Builder, error) {
   143  	return BuilderFromTextFunc(text, NewDomainMapMatcher, NewSuffixMapMatcher, NewKeywordLinearMatcher, NewRegexpMatcherBuilder)
   144  }
   145  
   146  func BuilderFromTextFunc(
   147  	text string,
   148  	newDomainMatcherBuilderFunc,
   149  	newSuffixMatcherBuilderFunc,
   150  	newKeywordMatcherBuilderFunc,
   151  	newRegexpMatcherBuilderFunc func(int) MatcherBuilder,
   152  ) (Builder, error) {
   153  	line, text := bytestrings.NextNonEmptyLine(text)
   154  	if len(line) == 0 {
   155  		return Builder{}, errEmptySet
   156  	}
   157  
   158  	dskr, found, err := ParseCapacityHint(line)
   159  	if err != nil {
   160  		return Builder{}, err
   161  	}
   162  	if found {
   163  		line, text = bytestrings.NextNonEmptyLine(text)
   164  		if len(line) == 0 {
   165  			return Builder{}, errEmptySet
   166  		}
   167  	}
   168  
   169  	dsb := Builder{
   170  		newDomainMatcherBuilderFunc(dskr[0]),
   171  		newSuffixMatcherBuilderFunc(dskr[1]),
   172  		newKeywordMatcherBuilderFunc(dskr[2]),
   173  		newRegexpMatcherBuilderFunc(dskr[3]),
   174  	}
   175  
   176  	for {
   177  		// domainPrefixLen == suffixPrefixLen == regexpPrefixLen == 7
   178  		if len(line) <= 7 {
   179  			if line[0] != '#' {
   180  				return dsb, fmt.Errorf("invalid line: %s", line)
   181  			}
   182  			goto next
   183  		}
   184  
   185  		switch line[:7] {
   186  		case suffixPrefix:
   187  			dsb[1].Insert(strings.Clone(line[7:]))
   188  		case domainPrefix:
   189  			dsb[0].Insert(strings.Clone(line[7:]))
   190  		case regexpPrefix:
   191  			dsb[3].Insert(strings.Clone(line[7:]))
   192  		default:
   193  			switch {
   194  			case len(line) > keywordPrefixLen && string(line[:keywordPrefixLen]) == keywordPrefix:
   195  				dsb[2].Insert(strings.Clone(line[keywordPrefixLen:]))
   196  			case line[0] != '#':
   197  				return dsb, fmt.Errorf("invalid line: %s", line)
   198  			}
   199  		}
   200  
   201  	next:
   202  		line, text = bytestrings.NextNonEmptyLine(text)
   203  		if len(line) == 0 {
   204  			break
   205  		}
   206  	}
   207  
   208  	return dsb, nil
   209  }
   210  
   211  func ParseCapacityHint(line string) ([4]int, bool, error) {
   212  	var dskr [4]int
   213  
   214  	found := len(line) > capacityHintPrefixLen && line[:capacityHintPrefixLen] == capacityHintPrefix
   215  	if found {
   216  		h := line[capacityHintPrefixLen:]
   217  
   218  		for i := range dskr {
   219  			delimiterIndex := strings.IndexByte(h, ' ')
   220  			if delimiterIndex == -1 {
   221  				return dskr, found, fmt.Errorf("bad capacity hint: %s", line)
   222  			}
   223  
   224  			c, err := strconv.Atoi(h[:delimiterIndex])
   225  			if err != nil {
   226  				return dskr, found, fmt.Errorf("bad capacity hint: %s: %w", line, err)
   227  			}
   228  			if c < 0 {
   229  				return dskr, found, fmt.Errorf("bad capacity hint: %s: capacity cannot be negative", line)
   230  			}
   231  			dskr[i] = c
   232  			h = h[delimiterIndex+1:]
   233  		}
   234  
   235  		if h != capacityHintSuffix {
   236  			return dskr, found, fmt.Errorf("bad capacity hint: %s: expected suffix '%s'", line, capacityHintSuffix)
   237  		}
   238  	}
   239  
   240  	return dskr, found, nil
   241  }
   242  
   243  // BuilderGob is the builder's gob serialization structure.
   244  type BuilderGob struct {
   245  	Domains  DomainMapMatcher
   246  	Suffixes *DomainSuffixTrie
   247  	Keywords KeywordLinearMatcher
   248  	Regexps  RegexpMatcherBuilder
   249  }
   250  
   251  func (bg BuilderGob) Builder() Builder {
   252  	return Builder{&bg.Domains, bg.Suffixes, &bg.Keywords, &bg.Regexps}
   253  }
   254  
   255  func (bg BuilderGob) WriteGob(w io.Writer) error {
   256  	return gob.NewEncoder(w).Encode(bg)
   257  }
   258  
   259  func BuilderGobFromBuilder(dsb Builder) (bg BuilderGob) {
   260  	switch d := dsb[0].(type) {
   261  	case *DomainMapMatcher:
   262  		bg.Domains = *d
   263  	default:
   264  		bg.Domains = DomainMapMatcherFromSlice(d.Rules())
   265  	}
   266  
   267  	switch s := dsb[1].(type) {
   268  	case *DomainSuffixTrie:
   269  		bg.Suffixes = s
   270  	default:
   271  		bg.Suffixes = DomainSuffixTrieFromSlice(s.Rules())
   272  	}
   273  
   274  	switch k := dsb[2].(type) {
   275  	case *KeywordLinearMatcher:
   276  		bg.Keywords = *k
   277  	default:
   278  		bg.Keywords = KeywordLinearMatcher(k.Rules())
   279  	}
   280  
   281  	switch r := dsb[3].(type) {
   282  	case *RegexpMatcherBuilder:
   283  		bg.Regexps = *r
   284  	default:
   285  		bg.Regexps = RegexpMatcherBuilder(r.Rules())
   286  	}
   287  
   288  	return bg
   289  }
   290  
   291  func BuilderGobFromReader(r io.Reader) (bg BuilderGob, err error) {
   292  	err = gob.NewDecoder(r).Decode(&bg)
   293  	return
   294  }
   295  
   296  // DomainSet is a set of domain matchers built from matching rules.
   297  type DomainSet []Matcher
   298  
   299  // Match returns whether the domain set contains the domain.
   300  func (ds DomainSet) Match(domain string) bool {
   301  	for _, m := range ds {
   302  		if m.Match(domain) {
   303  			return true
   304  		}
   305  	}
   306  	return false
   307  }