github.com/ooni/oohttp@v0.7.2/cookiejar/jar.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cookiejar implements an in-memory RFC 6265-compliant http.CookieJar. 6 package cookiejar 7 8 import ( 9 "errors" 10 "fmt" 11 "net" 12 "net/url" 13 "sort" 14 "strings" 15 "sync" 16 "time" 17 18 http "github.com/ooni/oohttp" 19 ascii "github.com/ooni/oohttp/internal/ascii" 20 ) 21 22 // PublicSuffixList provides the public suffix of a domain. For example: 23 // - the public suffix of "example.com" is "com", 24 // - the public suffix of "foo1.foo2.foo3.co.uk" is "co.uk", and 25 // - the public suffix of "bar.pvt.k12.ma.us" is "pvt.k12.ma.us". 26 // 27 // Implementations of PublicSuffixList must be safe for concurrent use by 28 // multiple goroutines. 29 // 30 // An implementation that always returns "" is valid and may be useful for 31 // testing but it is not secure: it means that the HTTP server for foo.com can 32 // set a cookie for bar.com. 33 // 34 // A public suffix list implementation is in the package 35 // golang.org/x/net/publicsuffix. 36 type PublicSuffixList interface { 37 // PublicSuffix returns the public suffix of domain. 38 // 39 // TODO: specify which of the caller and callee is responsible for IP 40 // addresses, for leading and trailing dots, for case sensitivity, and 41 // for IDN/Punycode. 42 PublicSuffix(domain string) string 43 44 // String returns a description of the source of this public suffix 45 // list. The description will typically contain something like a time 46 // stamp or version number. 47 String() string 48 } 49 50 // Options are the options for creating a new Jar. 51 type Options struct { 52 // PublicSuffixList is the public suffix list that determines whether 53 // an HTTP server can set a cookie for a domain. 54 // 55 // A nil value is valid and may be useful for testing but it is not 56 // secure: it means that the HTTP server for foo.co.uk can set a cookie 57 // for bar.co.uk. 58 PublicSuffixList PublicSuffixList 59 } 60 61 // Jar implements the http.CookieJar interface from the net/http package. 62 type Jar struct { 63 psList PublicSuffixList 64 65 // mu locks the remaining fields. 66 mu sync.Mutex 67 68 // entries is a set of entries, keyed by their eTLD+1 and subkeyed by 69 // their name/domain/path. 70 entries map[string]map[string]entry 71 72 // nextSeqNum is the next sequence number assigned to a new cookie 73 // created SetCookies. 74 nextSeqNum uint64 75 } 76 77 // New returns a new cookie jar. A nil *Options is equivalent to a zero 78 // Options. 79 func New(o *Options) (*Jar, error) { 80 jar := &Jar{ 81 entries: make(map[string]map[string]entry), 82 } 83 if o != nil { 84 jar.psList = o.PublicSuffixList 85 } 86 return jar, nil 87 } 88 89 // entry is the internal representation of a cookie. 90 // 91 // This struct type is not used outside of this package per se, but the exported 92 // fields are those of RFC 6265. 93 type entry struct { 94 Name string 95 Value string 96 Domain string 97 Path string 98 SameSite string 99 Secure bool 100 HttpOnly bool 101 Persistent bool 102 HostOnly bool 103 Expires time.Time 104 Creation time.Time 105 LastAccess time.Time 106 107 // seqNum is a sequence number so that Cookies returns cookies in a 108 // deterministic order, even for cookies that have equal Path length and 109 // equal Creation time. This simplifies testing. 110 seqNum uint64 111 } 112 113 // id returns the domain;path;name triple of e as an id. 114 func (e *entry) id() string { 115 return fmt.Sprintf("%s;%s;%s", e.Domain, e.Path, e.Name) 116 } 117 118 // shouldSend determines whether e's cookie qualifies to be included in a 119 // request to host/path. It is the caller's responsibility to check if the 120 // cookie is expired. 121 func (e *entry) shouldSend(https bool, host, path string) bool { 122 return e.domainMatch(host) && e.pathMatch(path) && (https || !e.Secure) 123 } 124 125 // domainMatch checks whether e's Domain allows sending e back to host. 126 // It differs from "domain-match" of RFC 6265 section 5.1.3 because we treat 127 // a cookie with an IP address in the Domain always as a host cookie. 128 func (e *entry) domainMatch(host string) bool { 129 if e.Domain == host { 130 return true 131 } 132 return !e.HostOnly && hasDotSuffix(host, e.Domain) 133 } 134 135 // pathMatch implements "path-match" according to RFC 6265 section 5.1.4. 136 func (e *entry) pathMatch(requestPath string) bool { 137 if requestPath == e.Path { 138 return true 139 } 140 if strings.HasPrefix(requestPath, e.Path) { 141 if e.Path[len(e.Path)-1] == '/' { 142 return true // The "/any/" matches "/any/path" case. 143 } else if requestPath[len(e.Path)] == '/' { 144 return true // The "/any" matches "/any/path" case. 145 } 146 } 147 return false 148 } 149 150 // hasDotSuffix reports whether s ends in "."+suffix. 151 func hasDotSuffix(s, suffix string) bool { 152 return len(s) > len(suffix) && s[len(s)-len(suffix)-1] == '.' && s[len(s)-len(suffix):] == suffix 153 } 154 155 // Cookies implements the Cookies method of the http.CookieJar interface. 156 // 157 // It returns an empty slice if the URL's scheme is not HTTP or HTTPS. 158 func (j *Jar) Cookies(u *url.URL) (cookies []*http.Cookie) { 159 return j.cookies(u, time.Now()) 160 } 161 162 // cookies is like Cookies but takes the current time as a parameter. 163 func (j *Jar) cookies(u *url.URL, now time.Time) (cookies []*http.Cookie) { 164 if u.Scheme != "http" && u.Scheme != "https" { 165 return cookies 166 } 167 host, err := canonicalHost(u.Host) 168 if err != nil { 169 return cookies 170 } 171 key := jarKey(host, j.psList) 172 173 j.mu.Lock() 174 defer j.mu.Unlock() 175 176 submap := j.entries[key] 177 if submap == nil { 178 return cookies 179 } 180 181 https := u.Scheme == "https" 182 path := u.Path 183 if path == "" { 184 path = "/" 185 } 186 187 modified := false 188 var selected []entry 189 for id, e := range submap { 190 if e.Persistent && !e.Expires.After(now) { 191 delete(submap, id) 192 modified = true 193 continue 194 } 195 if !e.shouldSend(https, host, path) { 196 continue 197 } 198 e.LastAccess = now 199 submap[id] = e 200 selected = append(selected, e) 201 modified = true 202 } 203 if modified { 204 if len(submap) == 0 { 205 delete(j.entries, key) 206 } else { 207 j.entries[key] = submap 208 } 209 } 210 211 // sort according to RFC 6265 section 5.4 point 2: by longest 212 // path and then by earliest creation time. 213 sort.Slice(selected, func(i, j int) bool { 214 s := selected 215 if len(s[i].Path) != len(s[j].Path) { 216 return len(s[i].Path) > len(s[j].Path) 217 } 218 if ret := s[i].Creation.Compare(s[j].Creation); ret != 0 { 219 return ret < 0 220 } 221 return s[i].seqNum < s[j].seqNum 222 }) 223 for _, e := range selected { 224 cookies = append(cookies, &http.Cookie{Name: e.Name, Value: e.Value}) 225 } 226 227 return cookies 228 } 229 230 // SetCookies implements the SetCookies method of the http.CookieJar interface. 231 // 232 // It does nothing if the URL's scheme is not HTTP or HTTPS. 233 func (j *Jar) SetCookies(u *url.URL, cookies []*http.Cookie) { 234 j.setCookies(u, cookies, time.Now()) 235 } 236 237 // setCookies is like SetCookies but takes the current time as parameter. 238 func (j *Jar) setCookies(u *url.URL, cookies []*http.Cookie, now time.Time) { 239 if len(cookies) == 0 { 240 return 241 } 242 if u.Scheme != "http" && u.Scheme != "https" { 243 return 244 } 245 host, err := canonicalHost(u.Host) 246 if err != nil { 247 return 248 } 249 key := jarKey(host, j.psList) 250 defPath := defaultPath(u.Path) 251 252 j.mu.Lock() 253 defer j.mu.Unlock() 254 255 submap := j.entries[key] 256 257 modified := false 258 for _, cookie := range cookies { 259 e, remove, err := j.newEntry(cookie, now, defPath, host) 260 if err != nil { 261 continue 262 } 263 id := e.id() 264 if remove { 265 if submap != nil { 266 if _, ok := submap[id]; ok { 267 delete(submap, id) 268 modified = true 269 } 270 } 271 continue 272 } 273 if submap == nil { 274 submap = make(map[string]entry) 275 } 276 277 if old, ok := submap[id]; ok { 278 e.Creation = old.Creation 279 e.seqNum = old.seqNum 280 } else { 281 e.Creation = now 282 e.seqNum = j.nextSeqNum 283 j.nextSeqNum++ 284 } 285 e.LastAccess = now 286 submap[id] = e 287 modified = true 288 } 289 290 if modified { 291 if len(submap) == 0 { 292 delete(j.entries, key) 293 } else { 294 j.entries[key] = submap 295 } 296 } 297 } 298 299 // canonicalHost strips port from host if present and returns the canonicalized 300 // host name. 301 func canonicalHost(host string) (string, error) { 302 var err error 303 if hasPort(host) { 304 host, _, err = net.SplitHostPort(host) 305 if err != nil { 306 return "", err 307 } 308 } 309 // Strip trailing dot from fully qualified domain names. 310 host = strings.TrimSuffix(host, ".") 311 encoded, err := toASCII(host) 312 if err != nil { 313 return "", err 314 } 315 // We know this is ascii, no need to check. 316 lower, _ := ascii.ToLower(encoded) 317 return lower, nil 318 } 319 320 // hasPort reports whether host contains a port number. host may be a host 321 // name, an IPv4 or an IPv6 address. 322 func hasPort(host string) bool { 323 colons := strings.Count(host, ":") 324 if colons == 0 { 325 return false 326 } 327 if colons == 1 { 328 return true 329 } 330 return host[0] == '[' && strings.Contains(host, "]:") 331 } 332 333 // jarKey returns the key to use for a jar. 334 func jarKey(host string, psl PublicSuffixList) string { 335 if isIP(host) { 336 return host 337 } 338 339 var i int 340 if psl == nil { 341 i = strings.LastIndex(host, ".") 342 if i <= 0 { 343 return host 344 } 345 } else { 346 suffix := psl.PublicSuffix(host) 347 if suffix == host { 348 return host 349 } 350 i = len(host) - len(suffix) 351 if i <= 0 || host[i-1] != '.' { 352 // The provided public suffix list psl is broken. 353 // Storing cookies under host is a safe stopgap. 354 return host 355 } 356 // Only len(suffix) is used to determine the jar key from 357 // here on, so it is okay if psl.PublicSuffix("www.buggy.psl") 358 // returns "com" as the jar key is generated from host. 359 } 360 prevDot := strings.LastIndex(host[:i-1], ".") 361 return host[prevDot+1:] 362 } 363 364 // isIP reports whether host is an IP address. 365 func isIP(host string) bool { 366 if strings.ContainsAny(host, ":%") { 367 // Probable IPv6 address. 368 // Hostnames can't contain : or %, so this is definitely not a valid host. 369 // Treating it as an IP is the more conservative option, and avoids the risk 370 // of interpeting ::1%.www.example.com as a subtomain of www.example.com. 371 return true 372 } 373 return net.ParseIP(host) != nil 374 } 375 376 // defaultPath returns the directory part of a URL's path according to 377 // RFC 6265 section 5.1.4. 378 func defaultPath(path string) string { 379 if len(path) == 0 || path[0] != '/' { 380 return "/" // Path is empty or malformed. 381 } 382 383 i := strings.LastIndex(path, "/") // Path starts with "/", so i != -1. 384 if i == 0 { 385 return "/" // Path has the form "/abc". 386 } 387 return path[:i] // Path is either of form "/abc/xyz" or "/abc/xyz/". 388 } 389 390 // newEntry creates an entry from an http.Cookie c. now is the current time and 391 // is compared to c.Expires to determine deletion of c. defPath and host are the 392 // default-path and the canonical host name of the URL c was received from. 393 // 394 // remove records whether the jar should delete this cookie, as it has already 395 // expired with respect to now. In this case, e may be incomplete, but it will 396 // be valid to call e.id (which depends on e's Name, Domain and Path). 397 // 398 // A malformed c.Domain will result in an error. 399 func (j *Jar) newEntry(c *http.Cookie, now time.Time, defPath, host string) (e entry, remove bool, err error) { 400 e.Name = c.Name 401 402 if c.Path == "" || c.Path[0] != '/' { 403 e.Path = defPath 404 } else { 405 e.Path = c.Path 406 } 407 408 e.Domain, e.HostOnly, err = j.domainAndType(host, c.Domain) 409 if err != nil { 410 return e, false, err 411 } 412 413 // MaxAge takes precedence over Expires. 414 if c.MaxAge < 0 { 415 return e, true, nil 416 } else if c.MaxAge > 0 { 417 e.Expires = now.Add(time.Duration(c.MaxAge) * time.Second) 418 e.Persistent = true 419 } else { 420 if c.Expires.IsZero() { 421 e.Expires = endOfTime 422 e.Persistent = false 423 } else { 424 if !c.Expires.After(now) { 425 return e, true, nil 426 } 427 e.Expires = c.Expires 428 e.Persistent = true 429 } 430 } 431 432 e.Value = c.Value 433 e.Secure = c.Secure 434 e.HttpOnly = c.HttpOnly 435 436 switch c.SameSite { 437 case http.SameSiteDefaultMode: 438 e.SameSite = "SameSite" 439 case http.SameSiteStrictMode: 440 e.SameSite = "SameSite=Strict" 441 case http.SameSiteLaxMode: 442 e.SameSite = "SameSite=Lax" 443 } 444 445 return e, false, nil 446 } 447 448 var ( 449 errIllegalDomain = errors.New("cookiejar: illegal cookie domain attribute") 450 errMalformedDomain = errors.New("cookiejar: malformed cookie domain attribute") 451 errNoHostname = errors.New("cookiejar: no host name available (IP only)") 452 ) 453 454 // endOfTime is the time when session (non-persistent) cookies expire. 455 // This instant is representable in most date/time formats (not just 456 // Go's time.Time) and should be far enough in the future. 457 var endOfTime = time.Date(9999, 12, 31, 23, 59, 59, 0, time.UTC) 458 459 // domainAndType determines the cookie's domain and hostOnly attribute. 460 func (j *Jar) domainAndType(host, domain string) (string, bool, error) { 461 if domain == "" { 462 // No domain attribute in the SetCookie header indicates a 463 // host cookie. 464 return host, true, nil 465 } 466 467 if isIP(host) { 468 // RFC 6265 is not super clear here, a sensible interpretation 469 // is that cookies with an IP address in the domain-attribute 470 // are allowed. 471 472 // RFC 6265 section 5.2.3 mandates to strip an optional leading 473 // dot in the domain-attribute before processing the cookie. 474 // 475 // Most browsers don't do that for IP addresses, only curl 476 // (version 7.54) and IE (version 11) do not reject a 477 // Set-Cookie: a=1; domain=.127.0.0.1 478 // This leading dot is optional and serves only as hint for 479 // humans to indicate that a cookie with "domain=.bbc.co.uk" 480 // would be sent to every subdomain of bbc.co.uk. 481 // It just doesn't make sense on IP addresses. 482 // The other processing and validation steps in RFC 6265 just 483 // collapse to: 484 if host != domain { 485 return "", false, errIllegalDomain 486 } 487 488 // According to RFC 6265 such cookies should be treated as 489 // domain cookies. 490 // As there are no subdomains of an IP address the treatment 491 // according to RFC 6265 would be exactly the same as that of 492 // a host-only cookie. Contemporary browsers (and curl) do 493 // allows such cookies but treat them as host-only cookies. 494 // So do we as it just doesn't make sense to label them as 495 // domain cookies when there is no domain; the whole notion of 496 // domain cookies requires a domain name to be well defined. 497 return host, true, nil 498 } 499 500 // From here on: If the cookie is valid, it is a domain cookie (with 501 // the one exception of a public suffix below). 502 // See RFC 6265 section 5.2.3. 503 if domain[0] == '.' { 504 domain = domain[1:] 505 } 506 507 if len(domain) == 0 || domain[0] == '.' { 508 // Received either "Domain=." or "Domain=..some.thing", 509 // both are illegal. 510 return "", false, errMalformedDomain 511 } 512 513 domain, isASCII := ascii.ToLower(domain) 514 if !isASCII { 515 // Received non-ASCII domain, e.g. "perché.com" instead of "xn--perch-fsa.com" 516 return "", false, errMalformedDomain 517 } 518 519 if domain[len(domain)-1] == '.' { 520 // We received stuff like "Domain=www.example.com.". 521 // Browsers do handle such stuff (actually differently) but 522 // RFC 6265 seems to be clear here (e.g. section 4.1.2.3) in 523 // requiring a reject. 4.1.2.3 is not normative, but 524 // "Domain Matching" (5.1.3) and "Canonicalized Host Names" 525 // (5.1.2) are. 526 return "", false, errMalformedDomain 527 } 528 529 // See RFC 6265 section 5.3 #5. 530 if j.psList != nil { 531 if ps := j.psList.PublicSuffix(domain); ps != "" && !hasDotSuffix(domain, ps) { 532 if host == domain { 533 // This is the one exception in which a cookie 534 // with a domain attribute is a host cookie. 535 return host, true, nil 536 } 537 return "", false, errIllegalDomain 538 } 539 } 540 541 // The domain must domain-match host: www.mycompany.com cannot 542 // set cookies for .ourcompetitors.com. 543 if host != domain && !hasDotSuffix(host, domain) { 544 return "", false, errIllegalDomain 545 } 546 547 return domain, false, nil 548 }