github.com/prebid/prebid-server/v2@v2.18.0/privacysandbox/topics.go (about)

     1  package privacysandbox
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/prebid/openrtb/v20/openrtb2"
    11  	"github.com/prebid/prebid-server/v2/errortypes"
    12  	"github.com/prebid/prebid-server/v2/util/jsonutil"
    13  )
    14  
    15  type Topic struct {
    16  	SegTax   int    `json:"segtax,omitempty"`
    17  	SegClass string `json:"segclass,omitempty"`
    18  	SegIDs   []int  `json:"segids,omitempty"`
    19  }
    20  
    21  // ParseTopicsFromHeader parses the Sec-Browsing-Topics header data into Topics object
    22  func ParseTopicsFromHeader(secBrowsingTopics string) ([]Topic, []error) {
    23  	topics := make([]Topic, 0, 10)
    24  	var warnings []error
    25  
    26  	for _, field := range strings.Split(secBrowsingTopics, ",") {
    27  		field = strings.TrimSpace(field)
    28  		if field == "" || strings.HasPrefix(field, "();p=") {
    29  			continue
    30  		}
    31  
    32  		if len(topics) < 10 {
    33  			if topic, ok := parseTopicSegment(field); ok {
    34  				topics = append(topics, topic)
    35  			} else {
    36  				warnings = append(warnings, formatWarning(field))
    37  			}
    38  		} else {
    39  			warnings = append(warnings, formatWarning(field+" discarded due to limit reached."))
    40  		}
    41  	}
    42  
    43  	return topics, warnings
    44  }
    45  
    46  // parseTopicSegment parses a single topic segment from the header into Topics object
    47  func parseTopicSegment(field string) (Topic, bool) {
    48  	segment := strings.Split(field, ";")
    49  	if len(segment) != 2 {
    50  		return Topic{}, false
    51  	}
    52  
    53  	segmentsIDs := strings.TrimSpace(segment[0])
    54  	if len(segmentsIDs) < 3 || segmentsIDs[0] != '(' || segmentsIDs[len(segmentsIDs)-1] != ')' {
    55  		return Topic{}, false
    56  	}
    57  
    58  	segtax, segclass := parseSegTaxSegClass(segment[1])
    59  	if segtax == 0 || segclass == "" {
    60  		return Topic{}, false
    61  	}
    62  
    63  	segIDs, err := parseSegmentIDs(segmentsIDs[1 : len(segmentsIDs)-1])
    64  	if err != nil {
    65  		return Topic{}, false
    66  	}
    67  
    68  	return Topic{
    69  		SegTax:   segtax,
    70  		SegClass: segclass,
    71  		SegIDs:   segIDs,
    72  	}, true
    73  }
    74  
    75  func parseSegTaxSegClass(seg string) (int, string) {
    76  	taxanomyModel := strings.Split(seg, ":")
    77  	if len(taxanomyModel) != 3 {
    78  		return 0, ""
    79  	}
    80  
    81  	// taxanomyModel[0] is v=browser_version, we don't need it
    82  	taxanomyVer := strings.TrimSpace(taxanomyModel[1])
    83  	taxanomy, err := strconv.Atoi(taxanomyVer)
    84  	if err != nil || taxanomy < 1 || taxanomy > 10 {
    85  		return 0, ""
    86  	}
    87  
    88  	segtax := 600 + (taxanomy - 1)
    89  	segclass := strings.TrimSpace(taxanomyModel[2])
    90  	return segtax, segclass
    91  }
    92  
    93  // parseSegmentIDs parses the segment ids from the header string into int array
    94  func parseSegmentIDs(segmentsIDs string) ([]int, error) {
    95  	var selectedSegmentIDs []int
    96  	for _, segmentID := range strings.Fields(segmentsIDs) {
    97  		segmentID = strings.TrimSpace(segmentID)
    98  		selectedSegmentID, err := strconv.Atoi(segmentID)
    99  		if err != nil || selectedSegmentID <= 0 {
   100  			return selectedSegmentIDs, errors.New("invalid segment id")
   101  		}
   102  		selectedSegmentIDs = append(selectedSegmentIDs, selectedSegmentID)
   103  	}
   104  
   105  	return selectedSegmentIDs, nil
   106  }
   107  
   108  func UpdateUserDataWithTopics(userData []openrtb2.Data, headerData []Topic, topicsDomain string) []openrtb2.Data {
   109  	if topicsDomain == "" {
   110  		return userData
   111  	}
   112  
   113  	// headerDataMap groups segIDs by segtax and segclass for faster lookup and tracking of new segIDs yet to be added to user.data
   114  	// tracking is done by removing segIDs from segIDsMap once they are added to user.data, ensuring that headerDataMap will always have unique segtax-segclass-segIDs
   115  	// the only drawback of tracking via deleting segtax-segclass from headerDataMap is that this would not track duplicate entries within user.data which is fine because we are only merging header data with the provided user.data
   116  	headerDataMap := createHeaderDataMap(headerData)
   117  
   118  	for i, data := range userData {
   119  		ext := &Topic{}
   120  		err := json.Unmarshal(data.Ext, ext)
   121  		if err != nil {
   122  			continue
   123  		}
   124  
   125  		if ext.SegTax == 0 || ext.SegClass == "" {
   126  			continue
   127  		}
   128  
   129  		if newSegIDs := findNewSegIDs(data.Name, topicsDomain, *ext, data.Segment, headerDataMap); newSegIDs != nil {
   130  			for _, segID := range newSegIDs {
   131  				userData[i].Segment = append(userData[i].Segment, openrtb2.Segment{ID: strconv.Itoa(segID)})
   132  			}
   133  
   134  			delete(headerDataMap[ext.SegTax], ext.SegClass)
   135  		}
   136  	}
   137  
   138  	for segTax, segClassMap := range headerDataMap {
   139  		for segClass, segIDs := range segClassMap {
   140  			if len(segIDs) != 0 {
   141  				data := openrtb2.Data{
   142  					Name: topicsDomain,
   143  				}
   144  
   145  				var err error
   146  				data.Ext, err = jsonutil.Marshal(Topic{SegTax: segTax, SegClass: segClass})
   147  				if err != nil {
   148  					continue
   149  				}
   150  
   151  				for segID := range segIDs {
   152  					data.Segment = append(data.Segment, openrtb2.Segment{
   153  						ID: strconv.Itoa(segID),
   154  					})
   155  				}
   156  
   157  				userData = append(userData, data)
   158  			}
   159  		}
   160  	}
   161  
   162  	return userData
   163  }
   164  
   165  // createHeaderDataMap creates a map of header data (segtax-segclass-segIDs) for faster lookup
   166  // topicsdomain is not needed as we are only interested data from one domain configured in host config
   167  func createHeaderDataMap(headerData []Topic) map[int]map[string]map[int]struct{} {
   168  	headerDataMap := make(map[int]map[string]map[int]struct{})
   169  
   170  	for _, topic := range headerData {
   171  		segClassMap, ok := headerDataMap[topic.SegTax]
   172  		if !ok {
   173  			segClassMap = make(map[string]map[int]struct{})
   174  			headerDataMap[topic.SegTax] = segClassMap
   175  		}
   176  
   177  		segIDsMap, ok := segClassMap[topic.SegClass]
   178  		if !ok {
   179  			segIDsMap = make(map[int]struct{})
   180  			segClassMap[topic.SegClass] = segIDsMap
   181  		}
   182  
   183  		for _, segID := range topic.SegIDs {
   184  			segIDsMap[segID] = struct{}{}
   185  		}
   186  	}
   187  
   188  	return headerDataMap
   189  }
   190  
   191  // findNewSegIDs merge unique segIDs in single user.data if request.user.data and header data match. i.e. segclass, segtax and topicsdomain match
   192  func findNewSegIDs(dataName, topicsDomain string, userData Topic, userDataSegments []openrtb2.Segment, headerDataMap map[int]map[string]map[int]struct{}) []int {
   193  	if dataName != topicsDomain {
   194  		return nil
   195  	}
   196  
   197  	segClassMap, exists := headerDataMap[userData.SegTax]
   198  	if !exists {
   199  		return nil
   200  	}
   201  
   202  	segIDsMap, exists := segClassMap[userData.SegClass]
   203  	if !exists {
   204  		return nil
   205  	}
   206  
   207  	// remove existing segIDs entries
   208  	for _, segID := range userDataSegments {
   209  		if id, err := strconv.Atoi(segID.ID); err == nil {
   210  			delete(segIDsMap, id)
   211  		}
   212  	}
   213  
   214  	// collect remaining segIDs
   215  	segIDs := make([]int, 0, len(segIDsMap))
   216  	for segID := range segIDsMap {
   217  		segIDs = append(segIDs, segID)
   218  	}
   219  
   220  	return segIDs
   221  }
   222  
   223  func formatWarning(msg string) error {
   224  	return &errortypes.DebugWarning{
   225  		WarningCode: errortypes.SecBrowsingTopicsWarningCode,
   226  		Message:     fmt.Sprintf("Invalid field in Sec-Browsing-Topics header: %s", msg),
   227  	}
   228  }