github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/chat/unfurl/extractor.go (about)

     1  package unfurl
     2  
     3  import (
     4  	"context"
     5  	"regexp"
     6  	"sync"
     7  
     8  	"mvdan.cc/xurls/v2"
     9  
    10  	"github.com/keybase/client/go/chat/globals"
    11  	"github.com/keybase/client/go/chat/types"
    12  	"github.com/keybase/client/go/chat/utils"
    13  	"github.com/keybase/client/go/protocol/chat1"
    14  	"github.com/keybase/client/go/protocol/gregor1"
    15  )
    16  
    17  type ExtractorHitTyp int
    18  
    19  const (
    20  	ExtractorHitUnfurl ExtractorHitTyp = iota
    21  	ExtractorHitPrompt
    22  )
    23  
    24  type ExtractorHit struct {
    25  	URL string
    26  	Typ ExtractorHitTyp
    27  }
    28  
    29  type Extractor struct {
    30  	utils.DebugLabeler
    31  
    32  	urlRegexp      *regexp.Regexp
    33  	quoteRegexp    *regexp.Regexp
    34  	maxHits        int
    35  	exemptionsLock sync.Mutex
    36  	exemptions     map[string]*WhitelistExemptionList
    37  }
    38  
    39  func NewExtractor(g *globals.Context) *Extractor {
    40  	return &Extractor{
    41  		DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "Extractor", false),
    42  		urlRegexp:    xurls.Strict(),
    43  		quoteRegexp:  regexp.MustCompile("`[^`]*`"),
    44  		exemptions:   make(map[string]*WhitelistExemptionList),
    45  		maxHits:      5,
    46  	}
    47  }
    48  
    49  func (e *Extractor) getExemptionList(uid gregor1.UID) (res *WhitelistExemptionList) {
    50  	e.exemptionsLock.Lock()
    51  	defer e.exemptionsLock.Unlock()
    52  	var ok bool
    53  	res, ok = e.exemptions[uid.String()]
    54  	if !ok {
    55  		res = NewWhitelistExemptionList()
    56  		e.exemptions[uid.String()] = res
    57  	}
    58  	return res
    59  }
    60  
    61  func (e *Extractor) isAutoWhitelist(domain string) bool {
    62  	switch domain {
    63  	case "giphy.com", types.MapsDomain:
    64  		return true
    65  	}
    66  	return false
    67  }
    68  
    69  func (e *Extractor) isAutoWhitelistFromHit(ctx context.Context, hit string) bool {
    70  	domain, err := GetDomain(hit)
    71  	if err != nil {
    72  		e.Debug(ctx, "isAutoWhitelistFromHit: failed to get domain: %s", err)
    73  		return false
    74  	}
    75  	return e.isAutoWhitelist(domain)
    76  }
    77  
    78  func (e *Extractor) isWhitelistHit(ctx context.Context, convID chat1.ConversationID, msgID chat1.MessageID,
    79  	hit string, whitelist map[string]bool, exemptions *WhitelistExemptionList) bool {
    80  	domain, err := GetDomain(hit)
    81  	if err != nil {
    82  		e.Debug(ctx, "isWhitelistHit: failed to get domain: %s", err)
    83  		return false
    84  	}
    85  	if e.isAutoWhitelist(domain) || whitelist[domain] {
    86  		return true
    87  	}
    88  	// Check exemptions
    89  	if exemptions.Use(convID, msgID, domain) {
    90  		e.Debug(ctx, "isWhitelistHit: hit exemption for domain, letting through")
    91  		return true
    92  	}
    93  	return false
    94  }
    95  
    96  func (e *Extractor) Extract(ctx context.Context, uid gregor1.UID, convID chat1.ConversationID,
    97  	msgID chat1.MessageID, body string, userSettings *Settings) (res []ExtractorHit, err error) {
    98  	defer e.Trace(ctx, &err, "Extract")()
    99  	body = e.quoteRegexp.ReplaceAllString(body, "")
   100  	hits := e.urlRegexp.FindAllString(body, -1)
   101  	if len(hits) == 0 {
   102  		return res, nil
   103  	}
   104  	settings, err := userSettings.Get(ctx, uid)
   105  	if err != nil {
   106  		return res, err
   107  	}
   108  	for _, h := range hits {
   109  		ehit := ExtractorHit{
   110  			URL: h,
   111  			Typ: ExtractorHitPrompt,
   112  		}
   113  		switch settings.Mode {
   114  		case chat1.UnfurlMode_ALWAYS:
   115  			ehit.Typ = ExtractorHitUnfurl
   116  		case chat1.UnfurlMode_WHITELISTED:
   117  			if e.isWhitelistHit(ctx, convID, msgID, h, settings.Whitelist, e.getExemptionList(uid)) {
   118  				ehit.Typ = ExtractorHitUnfurl
   119  			}
   120  		case chat1.UnfurlMode_NEVER:
   121  			if e.isAutoWhitelistFromHit(ctx, h) {
   122  				ehit.Typ = ExtractorHitUnfurl
   123  			} else {
   124  				continue
   125  			}
   126  		}
   127  		res = append(res, ehit)
   128  		if len(res) >= e.maxHits {
   129  			e.Debug(ctx, "Extract: max hits reached, aborting")
   130  			break
   131  		}
   132  	}
   133  	return res, nil
   134  }
   135  
   136  func (e *Extractor) AddWhitelistExemption(ctx context.Context, uid gregor1.UID,
   137  	exemption types.WhitelistExemption) {
   138  	defer e.Trace(ctx, nil, "AddWhitelistExemption")()
   139  	e.getExemptionList(uid).Add(exemption)
   140  }