github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/publicsuffix/list_test.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package publicsuffix 6 7 import ( 8 "sort" 9 "strings" 10 "testing" 11 ) 12 13 func TestNodeLabel(t *testing.T) { 14 for i, want := range nodeLabels { 15 got := nodeLabel(uint32(i)) 16 if got != want { 17 t.Errorf("%d: got %q, want %q", i, got, want) 18 } 19 } 20 } 21 22 func TestFind(t *testing.T) { 23 testCases := []string{ 24 "", 25 "a", 26 "a0", 27 "aaaa", 28 "ao", 29 "ap", 30 "ar", 31 "aro", 32 "arp", 33 "arpa", 34 "arpaa", 35 "arpb", 36 "az", 37 "b", 38 "b0", 39 "ba", 40 "z", 41 "zu", 42 "zv", 43 "zw", 44 "zx", 45 "zy", 46 "zz", 47 "zzzz", 48 } 49 for _, tc := range testCases { 50 got := find(tc, 0, numTLD) 51 want := notFound 52 for i := uint32(0); i < numTLD; i++ { 53 if tc == nodeLabel(i) { 54 want = i 55 break 56 } 57 } 58 if got != want { 59 t.Errorf("%q: got %d, want %d", tc, got, want) 60 } 61 } 62 } 63 64 func TestICANN(t *testing.T) { 65 testCases := map[string]bool{ 66 "foo.org": true, 67 "foo.co.uk": true, 68 "foo.dyndns.org": false, 69 "foo.go.dyndns.org": false, 70 "foo.blogspot.co.uk": false, 71 "foo.intranet": false, 72 } 73 for domain, want := range testCases { 74 _, got := PublicSuffix(domain) 75 if got != want { 76 t.Errorf("%q: got %v, want %v", domain, got, want) 77 } 78 } 79 } 80 81 var publicSuffixTestCases = []struct { 82 domain string 83 wantPS string 84 wantICANN bool 85 }{ 86 // Empty string. 87 {"", "", false}, 88 89 // The .ao rules are: 90 // ao 91 // ed.ao 92 // gv.ao 93 // og.ao 94 // co.ao 95 // pb.ao 96 // it.ao 97 {"ao", "ao", true}, 98 {"www.ao", "ao", true}, 99 {"pb.ao", "pb.ao", true}, 100 {"www.pb.ao", "pb.ao", true}, 101 {"www.xxx.yyy.zzz.pb.ao", "pb.ao", true}, 102 103 // The .ar rules are: 104 // ar 105 // com.ar 106 // edu.ar 107 // gob.ar 108 // gov.ar 109 // int.ar 110 // mil.ar 111 // net.ar 112 // org.ar 113 // tur.ar 114 // blogspot.com.ar (in the PRIVATE DOMAIN section). 115 {"ar", "ar", true}, 116 {"www.ar", "ar", true}, 117 {"nic.ar", "ar", true}, 118 {"www.nic.ar", "ar", true}, 119 {"com.ar", "com.ar", true}, 120 {"www.com.ar", "com.ar", true}, 121 {"blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN. 122 {"www.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN. 123 {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN. 124 {"logspot.com.ar", "com.ar", true}, 125 {"zlogspot.com.ar", "com.ar", true}, 126 {"zblogspot.com.ar", "com.ar", true}, 127 128 // The .arpa rules are: 129 // arpa 130 // e164.arpa 131 // in-addr.arpa 132 // ip6.arpa 133 // iris.arpa 134 // uri.arpa 135 // urn.arpa 136 {"arpa", "arpa", true}, 137 {"www.arpa", "arpa", true}, 138 {"urn.arpa", "urn.arpa", true}, 139 {"www.urn.arpa", "urn.arpa", true}, 140 {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa", true}, 141 142 // The relevant {kobe,kyoto}.jp rules are: 143 // jp 144 // *.kobe.jp 145 // !city.kobe.jp 146 // kyoto.jp 147 // ide.kyoto.jp 148 {"jp", "jp", true}, 149 {"kobe.jp", "jp", true}, 150 {"c.kobe.jp", "c.kobe.jp", true}, 151 {"b.c.kobe.jp", "c.kobe.jp", true}, 152 {"a.b.c.kobe.jp", "c.kobe.jp", true}, 153 {"city.kobe.jp", "kobe.jp", true}, 154 {"www.city.kobe.jp", "kobe.jp", true}, 155 {"kyoto.jp", "kyoto.jp", true}, 156 {"test.kyoto.jp", "kyoto.jp", true}, 157 {"ide.kyoto.jp", "ide.kyoto.jp", true}, 158 {"b.ide.kyoto.jp", "ide.kyoto.jp", true}, 159 {"a.b.ide.kyoto.jp", "ide.kyoto.jp", true}, 160 161 // The .tw rules are: 162 // tw 163 // edu.tw 164 // gov.tw 165 // mil.tw 166 // com.tw 167 // net.tw 168 // org.tw 169 // idv.tw 170 // game.tw 171 // ebiz.tw 172 // club.tw 173 // 網路.tw (xn--zf0ao64a.tw) 174 // 組織.tw (xn--uc0atv.tw) 175 // 商業.tw (xn--czrw28b.tw) 176 // blogspot.tw 177 {"tw", "tw", true}, 178 {"aaa.tw", "tw", true}, 179 {"www.aaa.tw", "tw", true}, 180 {"xn--czrw28b.aaa.tw", "tw", true}, 181 {"edu.tw", "edu.tw", true}, 182 {"www.edu.tw", "edu.tw", true}, 183 {"xn--czrw28b.edu.tw", "edu.tw", true}, 184 {"xn--czrw28b.tw", "xn--czrw28b.tw", true}, 185 {"www.xn--czrw28b.tw", "xn--czrw28b.tw", true}, 186 {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw", true}, 187 {"xn--kpry57d.tw", "tw", true}, 188 189 // The .uk rules are: 190 // uk 191 // ac.uk 192 // co.uk 193 // gov.uk 194 // ltd.uk 195 // me.uk 196 // net.uk 197 // nhs.uk 198 // org.uk 199 // plc.uk 200 // police.uk 201 // *.sch.uk 202 // blogspot.co.uk (in the PRIVATE DOMAIN section). 203 {"uk", "uk", true}, 204 {"aaa.uk", "uk", true}, 205 {"www.aaa.uk", "uk", true}, 206 {"mod.uk", "uk", true}, 207 {"www.mod.uk", "uk", true}, 208 {"sch.uk", "uk", true}, 209 {"mod.sch.uk", "mod.sch.uk", true}, 210 {"www.sch.uk", "www.sch.uk", true}, 211 {"co.uk", "co.uk", true}, 212 {"www.co.uk", "co.uk", true}, 213 {"blogspot.co.uk", "blogspot.co.uk", false}, // PRIVATE DOMAIN. 214 {"blogspot.nic.uk", "uk", true}, 215 {"blogspot.sch.uk", "blogspot.sch.uk", true}, 216 217 // The .рф rules are 218 // рф (xn--p1ai) 219 {"xn--p1ai", "xn--p1ai", true}, 220 {"aaa.xn--p1ai", "xn--p1ai", true}, 221 {"www.xxx.yyy.xn--p1ai", "xn--p1ai", true}, 222 223 // The .bd rules are: 224 // *.bd 225 {"bd", "bd", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†). 226 {"www.bd", "www.bd", true}, 227 {"xxx.www.bd", "www.bd", true}, 228 {"zzz.bd", "zzz.bd", true}, 229 {"www.zzz.bd", "zzz.bd", true}, 230 {"www.xxx.yyy.zzz.bd", "zzz.bd", true}, 231 232 // The .ck rules are: 233 // *.ck 234 // !www.ck 235 {"ck", "ck", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†). 236 {"www.ck", "ck", true}, 237 {"xxx.www.ck", "ck", true}, 238 {"zzz.ck", "zzz.ck", true}, 239 {"www.zzz.ck", "zzz.ck", true}, 240 {"www.xxx.yyy.zzz.ck", "zzz.ck", true}, 241 242 // The .myjino.ru rules (in the PRIVATE DOMAIN section) are: 243 // myjino.ru 244 // *.hosting.myjino.ru 245 // *.landing.myjino.ru 246 // *.spectrum.myjino.ru 247 // *.vps.myjino.ru 248 {"myjino.ru", "myjino.ru", false}, 249 {"aaa.myjino.ru", "myjino.ru", false}, 250 {"bbb.ccc.myjino.ru", "myjino.ru", false}, 251 {"hosting.ddd.myjino.ru", "myjino.ru", false}, 252 {"landing.myjino.ru", "myjino.ru", false}, 253 {"www.landing.myjino.ru", "www.landing.myjino.ru", false}, 254 {"spectrum.vps.myjino.ru", "spectrum.vps.myjino.ru", false}, 255 256 // The .uberspace.de rules (in the PRIVATE DOMAIN section) are: 257 // *.uberspace.de 258 {"uberspace.de", "de", true}, // "de" is in the ICANN DOMAIN section. See footnote (†). 259 {"aaa.uberspace.de", "aaa.uberspace.de", false}, 260 {"bbb.ccc.uberspace.de", "ccc.uberspace.de", false}, 261 262 // There are no .nosuchtld rules. 263 {"nosuchtld", "nosuchtld", false}, 264 {"foo.nosuchtld", "nosuchtld", false}, 265 {"bar.foo.nosuchtld", "nosuchtld", false}, 266 267 // (†) There is some disagreement on how wildcards behave: what should the 268 // public suffix of "platform.sh" be when both "*.platform.sh" and "sh" is 269 // in the PSL, but "platform.sh" is not? Two possible answers are 270 // "platform.sh" and "sh", there are valid arguments for either behavior, 271 // and different browsers have implemented different behaviors. 272 // 273 // This implementation, Go's github.com/Andyfoo/golang/x/net/publicsuffix, returns "sh", 274 // the same as a literal interpretation of the "Formal Algorithm" section 275 // of https://publicsuffix.org/list/ 276 // 277 // Together, the TestPublicSuffix and TestSlowPublicSuffix tests check that 278 // the Go implementation (func PublicSuffix in list.go) and the literal 279 // interpretation (func slowPublicSuffix in list_test.go) produce the same 280 // (golden) results on every test case in this publicSuffixTestCases slice, 281 // including some "platform.sh" style cases. 282 // 283 // More discussion of "the platform.sh problem" is at: 284 // - https://github.com/publicsuffix/list/issues/694 285 // - https://bugzilla.mozilla.org/show_bug.cgi?id=1124625#c6 286 // - https://wiki.mozilla.org/Public_Suffix_List/platform.sh_Problem 287 } 288 289 func BenchmarkPublicSuffix(b *testing.B) { 290 for i := 0; i < b.N; i++ { 291 for _, tc := range publicSuffixTestCases { 292 List.PublicSuffix(tc.domain) 293 } 294 } 295 } 296 297 func TestPublicSuffix(t *testing.T) { 298 for _, tc := range publicSuffixTestCases { 299 gotPS, gotICANN := PublicSuffix(tc.domain) 300 if gotPS != tc.wantPS || gotICANN != tc.wantICANN { 301 t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN) 302 } 303 } 304 } 305 306 func TestSlowPublicSuffix(t *testing.T) { 307 for _, tc := range publicSuffixTestCases { 308 gotPS, gotICANN := slowPublicSuffix(tc.domain) 309 if gotPS != tc.wantPS || gotICANN != tc.wantICANN { 310 t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN) 311 } 312 } 313 } 314 315 func TestNumICANNRules(t *testing.T) { 316 if numICANNRules <= 0 { 317 t.Fatal("no ICANN rules") 318 } 319 if numICANNRules >= len(rules) { 320 t.Fatal("no Private rules") 321 } 322 // Check the last ICANN and first Private rules. If the underlying public 323 // suffix list changes, we may need to update these hard-coded checks. 324 if got, want := rules[numICANNRules-1], "zuerich"; got != want { 325 t.Errorf("last ICANN rule: got %q, wawnt %q", got, want) 326 } 327 if got, want := rules[numICANNRules], "cc.ua"; got != want { 328 t.Errorf("first Private rule: got %q, wawnt %q", got, want) 329 } 330 } 331 332 type slowPublicSuffixRule struct { 333 ruleParts []string 334 icann bool 335 } 336 337 // slowPublicSuffix implements the canonical (but O(number of rules)) public 338 // suffix algorithm described at http://publicsuffix.org/list/. 339 // 340 // 1. Match domain against all rules and take note of the matching ones. 341 // 2. If no rules match, the prevailing rule is "*". 342 // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule. 343 // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels. 344 // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label. 345 // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots). 346 // 7. The registered or registrable domain is the public suffix plus one additional label. 347 // 348 // This function returns the public suffix, not the registrable domain, and so 349 // it stops after step 6. 350 func slowPublicSuffix(domain string) (string, bool) { 351 match := func(rulePart, domainPart string) bool { 352 switch rulePart[0] { 353 case '*': 354 return true 355 case '!': 356 return rulePart[1:] == domainPart 357 } 358 return rulePart == domainPart 359 } 360 361 domainParts := strings.Split(domain, ".") 362 var matchingRules []slowPublicSuffixRule 363 364 loop: 365 for i, rule := range rules { 366 ruleParts := strings.Split(rule, ".") 367 if len(domainParts) < len(ruleParts) { 368 continue 369 } 370 for i := range ruleParts { 371 rulePart := ruleParts[len(ruleParts)-1-i] 372 domainPart := domainParts[len(domainParts)-1-i] 373 if !match(rulePart, domainPart) { 374 continue loop 375 } 376 } 377 matchingRules = append(matchingRules, slowPublicSuffixRule{ 378 ruleParts: ruleParts, 379 icann: i < numICANNRules, 380 }) 381 } 382 if len(matchingRules) == 0 { 383 matchingRules = append(matchingRules, slowPublicSuffixRule{ 384 ruleParts: []string{"*"}, 385 icann: false, 386 }) 387 } else { 388 sort.Sort(byPriority(matchingRules)) 389 } 390 391 prevailing := matchingRules[0] 392 if prevailing.ruleParts[0][0] == '!' { 393 prevailing.ruleParts = prevailing.ruleParts[1:] 394 } 395 if prevailing.ruleParts[0][0] == '*' { 396 replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)] 397 prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...) 398 } 399 return strings.Join(prevailing.ruleParts, "."), prevailing.icann 400 } 401 402 type byPriority []slowPublicSuffixRule 403 404 func (b byPriority) Len() int { return len(b) } 405 func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 406 func (b byPriority) Less(i, j int) bool { 407 if b[i].ruleParts[0][0] == '!' { 408 return true 409 } 410 if b[j].ruleParts[0][0] == '!' { 411 return false 412 } 413 return len(b[i].ruleParts) > len(b[j].ruleParts) 414 } 415 416 // eTLDPlusOneTestCases come from 417 // https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt 418 var eTLDPlusOneTestCases = []struct { 419 domain, want string 420 }{ 421 // Empty input. 422 {"", ""}, 423 // Unlisted TLD. 424 {"example", ""}, 425 {"example.example", "example.example"}, 426 {"b.example.example", "example.example"}, 427 {"a.b.example.example", "example.example"}, 428 // TLD with only 1 rule. 429 {"biz", ""}, 430 {"domain.biz", "domain.biz"}, 431 {"b.domain.biz", "domain.biz"}, 432 {"a.b.domain.biz", "domain.biz"}, 433 // TLD with some 2-level rules. 434 {"com", ""}, 435 {"example.com", "example.com"}, 436 {"b.example.com", "example.com"}, 437 {"a.b.example.com", "example.com"}, 438 {"uk.com", ""}, 439 {"example.uk.com", "example.uk.com"}, 440 {"b.example.uk.com", "example.uk.com"}, 441 {"a.b.example.uk.com", "example.uk.com"}, 442 {"test.ac", "test.ac"}, 443 // TLD with only 1 (wildcard) rule. 444 {"mm", ""}, 445 {"c.mm", ""}, 446 {"b.c.mm", "b.c.mm"}, 447 {"a.b.c.mm", "b.c.mm"}, 448 // More complex TLD. 449 {"jp", ""}, 450 {"test.jp", "test.jp"}, 451 {"www.test.jp", "test.jp"}, 452 {"ac.jp", ""}, 453 {"test.ac.jp", "test.ac.jp"}, 454 {"www.test.ac.jp", "test.ac.jp"}, 455 {"kyoto.jp", ""}, 456 {"test.kyoto.jp", "test.kyoto.jp"}, 457 {"ide.kyoto.jp", ""}, 458 {"b.ide.kyoto.jp", "b.ide.kyoto.jp"}, 459 {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"}, 460 {"c.kobe.jp", ""}, 461 {"b.c.kobe.jp", "b.c.kobe.jp"}, 462 {"a.b.c.kobe.jp", "b.c.kobe.jp"}, 463 {"city.kobe.jp", "city.kobe.jp"}, 464 {"www.city.kobe.jp", "city.kobe.jp"}, 465 // TLD with a wildcard rule and exceptions. 466 {"ck", ""}, 467 {"test.ck", ""}, 468 {"b.test.ck", "b.test.ck"}, 469 {"a.b.test.ck", "b.test.ck"}, 470 {"www.ck", "www.ck"}, 471 {"www.www.ck", "www.ck"}, 472 // US K12. 473 {"us", ""}, 474 {"test.us", "test.us"}, 475 {"www.test.us", "test.us"}, 476 {"ak.us", ""}, 477 {"test.ak.us", "test.ak.us"}, 478 {"www.test.ak.us", "test.ak.us"}, 479 {"k12.ak.us", ""}, 480 {"test.k12.ak.us", "test.k12.ak.us"}, 481 {"www.test.k12.ak.us", "test.k12.ak.us"}, 482 // Punycoded IDN labels 483 {"xn--85x722f.com.cn", "xn--85x722f.com.cn"}, 484 {"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"}, 485 {"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"}, 486 {"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"}, 487 {"xn--55qx5d.cn", ""}, 488 {"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"}, 489 {"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"}, 490 {"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"}, 491 {"xn--fiqs8s", ""}, 492 493 // Invalid input 494 {".", ""}, 495 {"de.", ""}, 496 {".de", ""}, 497 {".com.au", ""}, 498 {"com.au.", ""}, 499 {"com..au", ""}, 500 } 501 502 func TestEffectiveTLDPlusOne(t *testing.T) { 503 for _, tc := range eTLDPlusOneTestCases { 504 got, _ := EffectiveTLDPlusOne(tc.domain) 505 if got != tc.want { 506 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want) 507 } 508 } 509 }