git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/useragent/ua.go (about)

     1  package useragent
     2  
     3  import (
     4  	"bytes"
     5  	"regexp"
     6  	"strings"
     7  )
     8  
     9  // UserAgent struct containing all data extracted from parsed user-agent string
    10  type UserAgent struct {
    11  	VersionNo   VersionNo
    12  	OSVersionNo VersionNo
    13  	URL         string
    14  	String      string
    15  	Name        string
    16  	Version     string
    17  	OS          string
    18  	OSVersion   string
    19  	Device      string
    20  	Mobile      bool
    21  	Tablet      bool
    22  	Desktop     bool
    23  	Bot         bool
    24  }
    25  
    26  // Constants for browsers and operating systems for easier comparison
    27  const (
    28  	Windows      = "Windows"
    29  	WindowsPhone = "Windows Phone"
    30  	Android      = "Android"
    31  	MacOS        = "macOS"
    32  	IOS          = "iOS"
    33  	Linux        = "Linux"
    34  	ChromeOS     = "ChromeOS"
    35  
    36  	Opera            = "Opera"
    37  	OperaMini        = "Opera Mini"
    38  	OperaTouch       = "Opera Touch"
    39  	Chrome           = "Chrome"
    40  	HeadlessChrome   = "Headless Chrome"
    41  	Firefox          = "Firefox"
    42  	InternetExplorer = "Internet Explorer"
    43  	Safari           = "Safari"
    44  	Edge             = "Edge"
    45  	Vivaldi          = "Vivaldi"
    46  
    47  	GoogleAdsBot        = "Google Ads Bot"
    48  	Googlebot           = "Googlebot"
    49  	Twitterbot          = "Twitterbot"
    50  	FacebookExternalHit = "facebookexternalhit"
    51  	Applebot            = "Applebot"
    52  	Bingbot             = "Bingbot"
    53  
    54  	FacebookApp  = "Facebook App"
    55  	InstagramApp = "Instagram App"
    56  	TiktokApp    = "TikTok App"
    57  )
    58  
    59  // Parse user agent string returning UserAgent struct
    60  func Parse(userAgent string) UserAgent {
    61  	ua := UserAgent{
    62  		String: userAgent,
    63  	}
    64  
    65  	tokens := parse(userAgent)
    66  
    67  	// check is there URL
    68  	for i, token := range tokens.list {
    69  		if strings.HasPrefix(token.Key, "http://") || strings.HasPrefix(token.Key, "https://") {
    70  			ua.URL = token.Key
    71  			tokens.list = append(tokens.list[:i], tokens.list[i+1:]...)
    72  			break
    73  		}
    74  	}
    75  
    76  	// OS lookup
    77  	switch {
    78  	case tokens.exists("Android"):
    79  		ua.OS = Android
    80  		var osIndex int
    81  		osIndex, ua.OSVersion = tokens.getIndexValue(Android)
    82  		ua.Tablet = strings.Contains(strings.ToLower(ua.String), "tablet")
    83  		ua.Device = tokens.findAndroidDevice(osIndex)
    84  
    85  	case tokens.exists("iPhone"):
    86  		ua.OS = IOS
    87  		ua.OSVersion = tokens.findMacOSVersion()
    88  		ua.Device = "iPhone"
    89  		ua.Mobile = true
    90  
    91  	case tokens.exists("iPad"):
    92  		ua.OS = IOS
    93  		ua.OSVersion = tokens.findMacOSVersion()
    94  		ua.Device = "iPad"
    95  		ua.Tablet = true
    96  
    97  	case tokens.exists("Windows NT"):
    98  		ua.OS = Windows
    99  		ua.OSVersion = tokens.get("Windows NT")
   100  		ua.Desktop = true
   101  
   102  	case tokens.exists("Windows Phone OS"):
   103  		ua.OS = WindowsPhone
   104  		ua.OSVersion = tokens.get("Windows Phone OS")
   105  		ua.Mobile = true
   106  
   107  	case tokens.exists("Macintosh"):
   108  		ua.OS = MacOS
   109  		ua.OSVersion = tokens.findMacOSVersion()
   110  		ua.Desktop = true
   111  
   112  	case tokens.exists("Linux"):
   113  		ua.OS = Linux
   114  		ua.OSVersion = tokens.get(Linux)
   115  		ua.Desktop = true
   116  
   117  	case tokens.exists("CrOS"):
   118  		ua.OS = ChromeOS
   119  		ua.OSVersion = tokens.get("CrOS")
   120  		ua.Desktop = true
   121  	}
   122  
   123  	switch {
   124  	case tokens.exists("Googlebot"):
   125  		ua.Name = Googlebot
   126  		ua.Version = tokens.get(Googlebot)
   127  		ua.Bot = true
   128  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   129  
   130  	case tokens.exists("Applebot"):
   131  		ua.Name = Applebot
   132  		ua.Version = tokens.get(Applebot)
   133  		ua.Bot = true
   134  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   135  		ua.OS = ""
   136  
   137  	case tokens.get("Opera Mini") != "":
   138  		ua.Name = OperaMini
   139  		ua.Version = tokens.get(OperaMini)
   140  		ua.Mobile = true
   141  
   142  	case tokens.get("OPR") != "":
   143  		ua.Name = Opera
   144  		ua.Version = tokens.get("OPR")
   145  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   146  
   147  	case tokens.get("OPT") != "":
   148  		ua.Name = OperaTouch
   149  		ua.Version = tokens.get("OPT")
   150  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   151  
   152  	// Opera on iOS
   153  	case tokens.get("OPiOS") != "":
   154  		ua.Name = Opera
   155  		ua.Version = tokens.get("OPiOS")
   156  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   157  
   158  	// Chrome on iOS
   159  	case tokens.get("CriOS") != "":
   160  		ua.Name = Chrome
   161  		ua.Version = tokens.get("CriOS")
   162  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   163  
   164  	// Firefox on iOS
   165  	case tokens.get("FxiOS") != "":
   166  		ua.Name = Firefox
   167  		ua.Version = tokens.get("FxiOS")
   168  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   169  
   170  	case tokens.get("Firefox") != "":
   171  		ua.Name = Firefox
   172  		ua.Version = tokens.get(Firefox)
   173  		ua.Mobile = tokens.exists("Mobile")
   174  		ua.Tablet = tokens.exists("Tablet")
   175  
   176  	case tokens.get("Vivaldi") != "":
   177  		ua.Name = Vivaldi
   178  		ua.Version = tokens.get(Vivaldi)
   179  
   180  	case tokens.exists("MSIE"):
   181  		ua.Name = InternetExplorer
   182  		ua.Version = tokens.get("MSIE")
   183  
   184  	case tokens.get("EdgiOS") != "":
   185  		ua.Name = Edge
   186  		ua.Version = tokens.get("EdgiOS")
   187  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   188  
   189  	case tokens.get("Edge") != "":
   190  		ua.Name = Edge
   191  		ua.Version = tokens.get("Edge")
   192  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   193  
   194  	case tokens.get("Edg") != "":
   195  		ua.Name = Edge
   196  		ua.Version = tokens.get("Edg")
   197  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   198  
   199  	case tokens.get("EdgA") != "":
   200  		ua.Name = Edge
   201  		ua.Version = tokens.get("EdgA")
   202  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   203  
   204  	case tokens.get("bingbot") != "":
   205  		ua.Name = Bingbot
   206  		ua.Version = tokens.get("bingbot")
   207  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   208  
   209  	case tokens.get("YandexBot") != "":
   210  		ua.Name = "YandexBot"
   211  		ua.Version = tokens.get("YandexBot")
   212  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   213  
   214  	case tokens.get("SamsungBrowser") != "":
   215  		ua.Name = "Samsung Browser"
   216  		ua.Version = tokens.get("SamsungBrowser")
   217  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   218  
   219  	case tokens.get("HeadlessChrome") != "":
   220  		ua.Name = HeadlessChrome
   221  		ua.Version = tokens.get("HeadlessChrome")
   222  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   223  		ua.Bot = true
   224  
   225  	case tokens.existsAny("AdsBot-Google-Mobile", "Mediapartners-Google", "AdsBot-Google"):
   226  		ua.Name = GoogleAdsBot
   227  		ua.Bot = true
   228  		ua.Mobile = ua.IsAndroid() || ua.IsIOS()
   229  
   230  	case tokens.exists("Yahoo Ad monitoring"):
   231  		ua.Name = "Yahoo Ad monitoring"
   232  		ua.Bot = true
   233  		ua.Mobile = ua.IsAndroid() || ua.IsIOS()
   234  
   235  	case tokens.exists("XiaoMi"):
   236  		miui := tokens.get("XiaoMi")
   237  		if strings.HasPrefix(miui, "MiuiBrowser") {
   238  			ua.Name = "Miui Browser"
   239  			ua.Version = strings.TrimPrefix(miui, "MiuiBrowser/")
   240  			ua.Mobile = true
   241  		}
   242  
   243  	case tokens.exists("FBAN"):
   244  		ua.Name = FacebookApp
   245  		ua.Version = tokens.get("FBAN")
   246  	case tokens.exists("FB_IAB"):
   247  		ua.Name = FacebookApp
   248  		ua.Version = tokens.get("FBAV")
   249  
   250  	case tokens.startsWith("Instagram"):
   251  		ua.Name = InstagramApp
   252  		ua.Version = tokens.findInstagramVersion()
   253  
   254  	case tokens.exists("BytedanceWebview"):
   255  		ua.Name = TiktokApp
   256  		ua.Version = tokens.get("app_version")
   257  
   258  	case tokens.get("HuaweiBrowser") != "":
   259  		ua.Name = "Huawei Browser"
   260  		ua.Version = tokens.get("HuaweiBrowser")
   261  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   262  
   263  	// if chrome and Safari defined, find any other token sent descr
   264  	case tokens.exists(Chrome) && tokens.exists(Safari):
   265  		name := tokens.findBestMatch(true)
   266  		if name != "" {
   267  			ua.Name = name
   268  			ua.Version = tokens.get(name)
   269  			break
   270  		}
   271  		fallthrough
   272  
   273  	case tokens.exists("Chrome"):
   274  		ua.Name = Chrome
   275  		ua.Version = tokens.get("Chrome")
   276  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   277  
   278  	case tokens.exists("Brave Chrome"):
   279  		ua.Name = Chrome
   280  		ua.Version = tokens.get("Brave Chrome")
   281  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   282  
   283  	case tokens.exists("Safari"):
   284  		ua.Name = Safari
   285  		v := tokens.get("Version")
   286  		if v != "" {
   287  			ua.Version = v
   288  		} else {
   289  			ua.Version = tokens.get("Safari")
   290  		}
   291  		ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   292  
   293  	default:
   294  		if ua.OS == "Android" && tokens.get("Version") != "" {
   295  			ua.Name = "Android browser"
   296  			ua.Version = tokens.get("Version")
   297  			ua.Mobile = true
   298  		} else {
   299  			if name := tokens.findBestMatch(false); name != "" {
   300  				ua.Name = name
   301  				ua.Version = tokens.get(name)
   302  			} else {
   303  				ua.Name = ua.String
   304  			}
   305  			ua.Bot = strings.Contains(strings.ToLower(ua.Name), "bot")
   306  			ua.Mobile = tokens.existsAny("Mobile", "Mobile Safari")
   307  		}
   308  	}
   309  
   310  	if ua.IsAndroid() {
   311  		ua.Mobile = true
   312  	}
   313  
   314  	// if tablet, switch mobile to off
   315  	if ua.Tablet {
   316  		ua.Mobile = false
   317  	}
   318  
   319  	// if not already bot, check some popular bots and wether URL is set
   320  	if !ua.Bot {
   321  		ua.Bot = ua.URL != ""
   322  	}
   323  
   324  	if !ua.Bot {
   325  		switch ua.Name {
   326  		case Twitterbot, FacebookExternalHit:
   327  			ua.Bot = true
   328  		}
   329  	}
   330  
   331  	parseVersion(ua.Version, &ua.VersionNo)
   332  	parseVersion(ua.OSVersion, &ua.OSVersionNo)
   333  
   334  	return ua
   335  }
   336  
   337  func parse(userAgent string) properties {
   338  	clients := properties{
   339  		list: make([]property, 0, 8),
   340  	}
   341  	slash := false
   342  	isURL := false
   343  	var buff, val bytes.Buffer
   344  	addToken := func() {
   345  		if buff.Len() != 0 {
   346  			s := strings.TrimSpace(buff.String())
   347  			if !ignore(s) {
   348  				if isURL {
   349  					s = strings.TrimPrefix(s, "+")
   350  				}
   351  
   352  				if val.Len() == 0 { // only if value don't exists
   353  					var ver string
   354  					s, ver = checkVer(s) // determin version string and split
   355  					clients.add(s, ver)
   356  				} else {
   357  					clients.add(s, strings.TrimSpace(val.String()))
   358  				}
   359  			}
   360  		}
   361  		buff.Reset()
   362  		val.Reset()
   363  		slash = false
   364  		isURL = false
   365  	}
   366  
   367  	parOpen := false
   368  	braOpen := false
   369  
   370  	bua := []byte(userAgent)
   371  	for i, c := range bua {
   372  
   373  		//fmt.Println(string(c), c)
   374  		switch {
   375  		case c == 41: // )
   376  			addToken()
   377  			parOpen = false
   378  
   379  		case (parOpen || braOpen) && c == 59: // ;
   380  			addToken()
   381  
   382  		case c == 40: // (
   383  			addToken()
   384  			parOpen = true
   385  
   386  		case c == 91: // [
   387  			addToken()
   388  			braOpen = true
   389  		case c == 93: // ]
   390  			addToken()
   391  			braOpen = false
   392  
   393  		case slash && c == 32:
   394  			addToken()
   395  
   396  		case slash:
   397  			val.WriteByte(c)
   398  
   399  		case c == 47 && !isURL: //   /
   400  			if i != len(bua)-1 && bua[i+1] == 47 && (bytes.HasSuffix(buff.Bytes(), []byte("http:")) || bytes.HasSuffix(buff.Bytes(), []byte("https:"))) {
   401  				buff.WriteByte(c)
   402  				isURL = true
   403  			} else {
   404  				slash = true
   405  			}
   406  
   407  		default:
   408  			buff.WriteByte(c)
   409  		}
   410  	}
   411  	addToken()
   412  
   413  	return clients
   414  }
   415  
   416  func checkVer(s string) (name, v string) {
   417  	i := strings.LastIndex(s, " ")
   418  	if i == -1 {
   419  		return s, ""
   420  	}
   421  
   422  	//v = s[i+1:]
   423  
   424  	switch s[:i] {
   425  	case "Linux", "Windows NT", "Windows Phone OS", "MSIE", "Android":
   426  		return s[:i], s[i+1:]
   427  	case "CrOS x86_64", "CrOS aarch64":
   428  		j := strings.LastIndex(s[:i], " ")
   429  		return s[:j], s[j+1 : i]
   430  	default:
   431  		return s, ""
   432  	}
   433  
   434  	// for _, c := range v {
   435  	// 	if (c >= 48 && c <= 57) || c == 46 {
   436  	// 	} else {
   437  	// 		return s, ""
   438  	// 	}
   439  	// }
   440  	// return s[:i], s[i+1:]
   441  }
   442  
   443  // ignore retursn true if token should be ignored
   444  func ignore(s string) bool {
   445  	switch s {
   446  	case "KHTML, like Gecko", "U", "compatible", "Mozilla", "WOW64", "en", "en-us", "en-gb", "ru-ru":
   447  		return true
   448  	default:
   449  		return false
   450  	}
   451  }
   452  
   453  type property struct {
   454  	Key   string
   455  	Value string
   456  }
   457  type properties struct {
   458  	list []property
   459  }
   460  
   461  func (p *properties) add(key, value string) {
   462  	p.list = append(p.list, property{Key: key, Value: value})
   463  }
   464  
   465  func (p properties) get(key string) string {
   466  	for _, prop := range p.list {
   467  		if prop.Key == key {
   468  			return prop.Value
   469  		}
   470  	}
   471  	return ""
   472  }
   473  
   474  func (p properties) getIndexValue(key string) (int, string) {
   475  	for i, prop := range p.list {
   476  		if prop.Key == key {
   477  			return i, prop.Value
   478  		}
   479  	}
   480  	return -1, ""
   481  }
   482  
   483  func (p properties) exists(key string) bool {
   484  	for _, prop := range p.list {
   485  		if prop.Key == key {
   486  			return true
   487  		}
   488  	}
   489  	return false
   490  }
   491  
   492  // func (p properties) existsIgnoreCase(key string) bool {
   493  // 	for _, prop := range p.list {
   494  // 		if strings.EqualFold(prop.Key, key) {
   495  // 			return true
   496  // 		}
   497  // 	}
   498  // 	return false
   499  // }
   500  
   501  func (p properties) existsAny(keys ...string) bool {
   502  	for _, k := range keys {
   503  		for _, prop := range p.list {
   504  			if prop.Key == k {
   505  				return true
   506  			}
   507  		}
   508  	}
   509  	return false
   510  }
   511  
   512  func (p properties) findMacOSVersion() string {
   513  	for _, token := range p.list {
   514  		if strings.Contains(token.Key, "OS") {
   515  			if ver := findVersion(token.Value); ver != "" {
   516  				return ver
   517  			} else if ver = findVersion(token.Key); ver != "" {
   518  				return ver
   519  			}
   520  		}
   521  
   522  	}
   523  	return ""
   524  }
   525  
   526  func (p properties) startsWith(value string) bool {
   527  	for _, prop := range p.list {
   528  		if strings.HasPrefix(prop.Key, value) {
   529  			return true
   530  		}
   531  	}
   532  	return false
   533  }
   534  
   535  func (p properties) findInstagramVersion() string {
   536  	for _, token := range p.list {
   537  		if strings.HasPrefix(token.Key, "Instagram") {
   538  			if ver := findVersion(token.Value); ver != "" {
   539  				return ver
   540  			} else if ver = findVersion(token.Key); ver != "" {
   541  				return ver
   542  			}
   543  		}
   544  
   545  	}
   546  	return ""
   547  }
   548  
   549  // findBestMatch from the rest of the bunch
   550  // in first cycle only return key with version value
   551  // if withVerValue is false, do another cycle and return any token
   552  func (p properties) findBestMatch(withVerOnly bool) string {
   553  	n := 2
   554  	if withVerOnly {
   555  		n = 1
   556  	}
   557  	for i := 0; i < n; i++ {
   558  		for _, prop := range p.list {
   559  			switch prop.Key {
   560  			case Chrome, Firefox, Safari, "Version", "Mobile", "Mobile Safari", "Mozilla", "AppleWebKit", "Windows NT", "Windows Phone OS", Android, "Macintosh", Linux, "GSA", "CrOS", "Tablet":
   561  			default:
   562  				// don' pick if starts with number
   563  				if len(prop.Key) != 0 && prop.Key[0] >= 48 && prop.Key[0] <= 57 {
   564  					break
   565  				}
   566  				if i == 0 {
   567  					if prop.Value != "" { // in first check, only return keys with value
   568  						return prop.Key
   569  					}
   570  				} else {
   571  					return prop.Key
   572  				}
   573  			}
   574  		}
   575  	}
   576  	return ""
   577  }
   578  
   579  var rxMacOSVer = regexp.MustCompile(`[_\d\.]+`)
   580  
   581  func findVersion(s string) string {
   582  	if ver := rxMacOSVer.FindString(s); ver != "" {
   583  		return strings.Replace(ver, "_", ".", -1)
   584  	}
   585  	return ""
   586  }
   587  
   588  // findAndroidDevice in tokens
   589  func (p *properties) findAndroidDevice(startIndex int) string {
   590  	for i := startIndex; i < startIndex+1; i++ {
   591  		if len(p.list) > i+1 {
   592  			dev := p.list[i+1].Key
   593  			if len(dev) == 2 || (len(dev) == 5 && dev[2] == '-') {
   594  				// probably langage tag (en-us etc..), ignore and continue loop
   595  				continue
   596  			}
   597  			switch dev {
   598  			case Chrome, Firefox, Safari, "Opera Mini", "Presto", "Version", "Mobile", "Mobile Safari", "Mozilla", "AppleWebKit", "Windows NT", "Windows Phone OS", Android, "Macintosh", Linux, "CrOS":
   599  				// ignore this tokens, not device names
   600  			default:
   601  				if strings.Contains(strings.ToLower(dev), "tablet") {
   602  					p.list[i+1].Key = "Tablet" // leave Tablet tag for later table detection
   603  				} else {
   604  					p.list = append(p.list[:i+1], p.list[i+2:]...)
   605  				}
   606  				return strings.TrimSpace(strings.TrimSuffix(dev, "Build"))
   607  			}
   608  		}
   609  	}
   610  	return ""
   611  }