github.com/prebid/prebid-server@v0.275.0/gdpr/vendorlist-fetching.go (about)

     1  package gdpr
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"net/http"
     8  	"strconv"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/golang/glog"
    14  	"github.com/prebid/go-gdpr/api"
    15  	"github.com/prebid/go-gdpr/vendorlist"
    16  	"github.com/prebid/go-gdpr/vendorlist2"
    17  	"github.com/prebid/prebid-server/config"
    18  	"golang.org/x/net/context/ctxhttp"
    19  )
    20  
    21  type saveVendors func(uint16, uint16, api.VendorList)
    22  type VendorListFetcher func(ctx context.Context, specVersion uint16, listVersion uint16) (vendorlist.VendorList, error)
    23  
    24  // This file provides the vendorlist-fetching function for Prebid Server.
    25  //
    26  // For more info, see https://github.com/prebid/prebid-server/issues/504
    27  //
    28  // Nothing in this file is exported. Public APIs can be found in gdpr.go
    29  
    30  func NewVendorListFetcher(initCtx context.Context, cfg config.GDPR, client *http.Client, urlMaker func(uint16, uint16) string) VendorListFetcher {
    31  	cacheSave, cacheLoad := newVendorListCache()
    32  
    33  	preloadContext, cancel := context.WithTimeout(initCtx, cfg.Timeouts.InitTimeout())
    34  	defer cancel()
    35  	preloadCache(preloadContext, client, urlMaker, cacheSave)
    36  
    37  	saveOneRateLimited := newOccasionalSaver(cfg.Timeouts.ActiveTimeout())
    38  	return func(ctx context.Context, specVersion, listVersion uint16) (vendorlist.VendorList, error) {
    39  		// Attempt To Load From Cache
    40  		if list := cacheLoad(specVersion, listVersion); list != nil {
    41  			return list, nil
    42  		}
    43  
    44  		// Attempt To Download
    45  		// - May not add to cache immediately.
    46  		saveOneRateLimited(ctx, client, urlMaker(specVersion, listVersion), cacheSave)
    47  
    48  		// Attempt To Load From Cache Again
    49  		// - May have been added by the call to saveOneRateLimited.
    50  		if list := cacheLoad(specVersion, listVersion); list != nil {
    51  			return list, nil
    52  		}
    53  
    54  		// Give Up
    55  		return nil, makeVendorListNotFoundError(specVersion, listVersion)
    56  	}
    57  }
    58  
    59  func makeVendorListNotFoundError(specVersion, listVersion uint16) error {
    60  	return fmt.Errorf("gdpr vendor list spec version %d list version %d does not exist, or has not been loaded yet. Try again in a few minutes", specVersion, listVersion)
    61  }
    62  
    63  // preloadCache saves all the known versions of the vendor list for future use.
    64  func preloadCache(ctx context.Context, client *http.Client, urlMaker func(uint16, uint16) string, saver saveVendors) {
    65  	versions := [2]struct {
    66  		specVersion      uint16
    67  		firstListVersion uint16
    68  	}{
    69  		{
    70  			specVersion:      2,
    71  			firstListVersion: 2, // The GVL for TCF2 has no vendors defined in its first version. It's very unlikely to be used, so don't preload it.
    72  		},
    73  		{
    74  			specVersion:      3,
    75  			firstListVersion: 1,
    76  		},
    77  	}
    78  	for _, v := range versions {
    79  		latestVersion := saveOne(ctx, client, urlMaker(v.specVersion, 0), saver)
    80  
    81  		for i := v.firstListVersion; i < latestVersion; i++ {
    82  			saveOne(ctx, client, urlMaker(v.specVersion, i), saver)
    83  		}
    84  	}
    85  }
    86  
    87  // Make a URL which can be used to fetch a given version of the Global Vendor List. If the version is 0,
    88  // this will fetch the latest version.
    89  func VendorListURLMaker(specVersion, listVersion uint16) string {
    90  	if listVersion == 0 {
    91  		return "https://vendor-list.consensu.org/v" + strconv.Itoa(int(specVersion)) + "/vendor-list.json"
    92  	}
    93  	return "https://vendor-list.consensu.org/v" + strconv.Itoa(int(specVersion)) + "/archives/vendor-list-v" + strconv.Itoa(int(listVersion)) + ".json"
    94  }
    95  
    96  // newOccasionalSaver returns a wrapped version of saveOne() which only activates every few minutes.
    97  //
    98  // The goal here is to update quickly when new versions of the VendorList are released, but not wreck
    99  // server performance if a bad CMP starts sending us malformed consent strings that advertize a version
   100  // that doesn't exist yet.
   101  func newOccasionalSaver(timeout time.Duration) func(ctx context.Context, client *http.Client, url string, saver saveVendors) {
   102  	lastSaved := &atomic.Value{}
   103  	lastSaved.Store(time.Time{})
   104  
   105  	return func(ctx context.Context, client *http.Client, url string, saver saveVendors) {
   106  		now := time.Now()
   107  		timeSinceLastSave := now.Sub(lastSaved.Load().(time.Time))
   108  
   109  		if timeSinceLastSave.Minutes() > 10 {
   110  			withTimeout, cancel := context.WithTimeout(ctx, timeout)
   111  			defer cancel()
   112  			saveOne(withTimeout, client, url, saver)
   113  			lastSaved.Store(now)
   114  		}
   115  	}
   116  }
   117  
   118  func saveOne(ctx context.Context, client *http.Client, url string, saver saveVendors) uint16 {
   119  	req, err := http.NewRequest("GET", url, nil)
   120  	if err != nil {
   121  		glog.Errorf("Failed to build GET %s request. Cookie syncs may be affected: %v", url, err)
   122  		return 0
   123  	}
   124  
   125  	resp, err := ctxhttp.Do(ctx, client, req)
   126  	if err != nil {
   127  		glog.Errorf("Error calling GET %s. Cookie syncs may be affected: %v", url, err)
   128  		return 0
   129  	}
   130  	defer resp.Body.Close()
   131  
   132  	respBody, err := io.ReadAll(resp.Body)
   133  	if err != nil {
   134  		glog.Errorf("Error reading response body from GET %s. Cookie syncs may be affected: %v", url, err)
   135  		return 0
   136  	}
   137  	if resp.StatusCode != http.StatusOK {
   138  		glog.Errorf("GET %s returned %d. Cookie syncs may be affected.", url, resp.StatusCode)
   139  		return 0
   140  	}
   141  	var newList api.VendorList
   142  	newList, err = vendorlist2.ParseEagerly(respBody)
   143  	if err != nil {
   144  		glog.Errorf("GET %s returned malformed JSON. Cookie syncs may be affected. Error was %v. Body was %s", url, err, string(respBody))
   145  		return 0
   146  	}
   147  
   148  	saver(newList.SpecVersion(), newList.Version(), newList)
   149  	return newList.Version()
   150  }
   151  
   152  func newVendorListCache() (save func(specVersion, listVersion uint16, list api.VendorList), load func(specVersion, listVersion uint16) api.VendorList) {
   153  	cache := &sync.Map{}
   154  
   155  	save = func(specVersion uint16, listVersion uint16, list api.VendorList) {
   156  		key := fmt.Sprint(specVersion) + "-" + fmt.Sprint(listVersion)
   157  		cache.Store(key, list)
   158  	}
   159  
   160  	load = func(specVersion, listVersion uint16) api.VendorList {
   161  		key := fmt.Sprint(specVersion) + "-" + fmt.Sprint(listVersion)
   162  		list, ok := cache.Load(key)
   163  		if ok {
   164  			return list.(vendorlist.VendorList)
   165  		}
   166  		return nil
   167  	}
   168  	return
   169  }