github.com/ooni/oohttp@v0.7.2/cookiejar/punycode.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cookiejar
     6  
     7  // This file implements the Punycode algorithm from RFC 3492.
     8  
     9  import (
    10  	"fmt"
    11  	"strings"
    12  	"unicode/utf8"
    13  
    14  	ascii "github.com/ooni/oohttp/internal/ascii"
    15  )
    16  
    17  // These parameter values are specified in section 5.
    18  //
    19  // All computation is done with int32s, so that overflow behavior is identical
    20  // regardless of whether int is 32-bit or 64-bit.
    21  const (
    22  	base        int32 = 36
    23  	damp        int32 = 700
    24  	initialBias int32 = 72
    25  	initialN    int32 = 128
    26  	skew        int32 = 38
    27  	tmax        int32 = 26
    28  	tmin        int32 = 1
    29  )
    30  
    31  // encode encodes a string as specified in section 6.3 and prepends prefix to
    32  // the result.
    33  //
    34  // The "while h < length(input)" line in the specification becomes "for
    35  // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
    36  func encode(prefix, s string) (string, error) {
    37  	output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
    38  	copy(output, prefix)
    39  	delta, n, bias := int32(0), initialN, initialBias
    40  	b, remaining := int32(0), int32(0)
    41  	for _, r := range s {
    42  		if r < utf8.RuneSelf {
    43  			b++
    44  			output = append(output, byte(r))
    45  		} else {
    46  			remaining++
    47  		}
    48  	}
    49  	h := b
    50  	if b > 0 {
    51  		output = append(output, '-')
    52  	}
    53  	for remaining != 0 {
    54  		m := int32(0x7fffffff)
    55  		for _, r := range s {
    56  			if m > r && r >= n {
    57  				m = r
    58  			}
    59  		}
    60  		delta += (m - n) * (h + 1)
    61  		if delta < 0 {
    62  			return "", fmt.Errorf("cookiejar: invalid label %q", s)
    63  		}
    64  		n = m
    65  		for _, r := range s {
    66  			if r < n {
    67  				delta++
    68  				if delta < 0 {
    69  					return "", fmt.Errorf("cookiejar: invalid label %q", s)
    70  				}
    71  				continue
    72  			}
    73  			if r > n {
    74  				continue
    75  			}
    76  			q := delta
    77  			for k := base; ; k += base {
    78  				t := k - bias
    79  				if t < tmin {
    80  					t = tmin
    81  				} else if t > tmax {
    82  					t = tmax
    83  				}
    84  				if q < t {
    85  					break
    86  				}
    87  				output = append(output, encodeDigit(t+(q-t)%(base-t)))
    88  				q = (q - t) / (base - t)
    89  			}
    90  			output = append(output, encodeDigit(q))
    91  			bias = adapt(delta, h+1, h == b)
    92  			delta = 0
    93  			h++
    94  			remaining--
    95  		}
    96  		delta++
    97  		n++
    98  	}
    99  	return string(output), nil
   100  }
   101  
   102  func encodeDigit(digit int32) byte {
   103  	switch {
   104  	case 0 <= digit && digit < 26:
   105  		return byte(digit + 'a')
   106  	case 26 <= digit && digit < 36:
   107  		return byte(digit + ('0' - 26))
   108  	}
   109  	panic("cookiejar: internal error in punycode encoding")
   110  }
   111  
   112  // adapt is the bias adaptation function specified in section 6.1.
   113  func adapt(delta, numPoints int32, firstTime bool) int32 {
   114  	if firstTime {
   115  		delta /= damp
   116  	} else {
   117  		delta /= 2
   118  	}
   119  	delta += delta / numPoints
   120  	k := int32(0)
   121  	for delta > ((base-tmin)*tmax)/2 {
   122  		delta /= base - tmin
   123  		k += base
   124  	}
   125  	return k + (base-tmin+1)*delta/(delta+skew)
   126  }
   127  
   128  // Strictly speaking, the remaining code below deals with IDNA (RFC 5890 and
   129  // friends) and not Punycode (RFC 3492) per se.
   130  
   131  // acePrefix is the ASCII Compatible Encoding prefix.
   132  const acePrefix = "xn--"
   133  
   134  // toASCII converts a domain or domain label to its ASCII form. For example,
   135  // toASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   136  // toASCII("golang") is "golang".
   137  func toASCII(s string) (string, error) {
   138  	if ascii.Is(s) {
   139  		return s, nil
   140  	}
   141  	labels := strings.Split(s, ".")
   142  	for i, label := range labels {
   143  		if !ascii.Is(label) {
   144  			a, err := encode(acePrefix, label)
   145  			if err != nil {
   146  				return "", err
   147  			}
   148  			labels[i] = a
   149  		}
   150  	}
   151  	return strings.Join(labels, "."), nil
   152  }