github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/net/http/cookiejar/punycode.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cookiejar
     6  
     7  // This file implements the Punycode algorithm from RFC 3492.
     8  
     9  import (
    10  	"fmt"
    11  	"strings"
    12  	"unicode/utf8"
    13  )
    14  
    15  // These parameter values are specified in section 5.
    16  //
    17  // All computation is done with int32s, so that overflow behavior is identical
    18  // regardless of whether int is 32-bit or 64-bit.
    19  const (
    20  	base        int32 = 36
    21  	damp        int32 = 700
    22  	initialBias int32 = 72
    23  	initialN    int32 = 128
    24  	skew        int32 = 38
    25  	tmax        int32 = 26
    26  	tmin        int32 = 1
    27  )
    28  
    29  // encode encodes a string as specified in section 6.3 and prepends prefix to
    30  // the result.
    31  //
    32  // The "while h < length(input)" line in the specification becomes "for
    33  // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
    34  func encode(prefix, s string) (string, error) {
    35  	output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
    36  	copy(output, prefix)
    37  	delta, n, bias := int32(0), initialN, initialBias
    38  	b, remaining := int32(0), int32(0)
    39  	for _, r := range s {
    40  		if r < 0x80 {
    41  			b++
    42  			output = append(output, byte(r))
    43  		} else {
    44  			remaining++
    45  		}
    46  	}
    47  	h := b
    48  	if b > 0 {
    49  		output = append(output, '-')
    50  	}
    51  	for remaining != 0 {
    52  		m := int32(0x7fffffff)
    53  		for _, r := range s {
    54  			if m > r && r >= n {
    55  				m = r
    56  			}
    57  		}
    58  		delta += (m - n) * (h + 1)
    59  		if delta < 0 {
    60  			return "", fmt.Errorf("cookiejar: invalid label %q", s)
    61  		}
    62  		n = m
    63  		for _, r := range s {
    64  			if r < n {
    65  				delta++
    66  				if delta < 0 {
    67  					return "", fmt.Errorf("cookiejar: invalid label %q", s)
    68  				}
    69  				continue
    70  			}
    71  			if r > n {
    72  				continue
    73  			}
    74  			q := delta
    75  			for k := base; ; k += base {
    76  				t := k - bias
    77  				if t < tmin {
    78  					t = tmin
    79  				} else if t > tmax {
    80  					t = tmax
    81  				}
    82  				if q < t {
    83  					break
    84  				}
    85  				output = append(output, encodeDigit(t+(q-t)%(base-t)))
    86  				q = (q - t) / (base - t)
    87  			}
    88  			output = append(output, encodeDigit(q))
    89  			bias = adapt(delta, h+1, h == b)
    90  			delta = 0
    91  			h++
    92  			remaining--
    93  		}
    94  		delta++
    95  		n++
    96  	}
    97  	return string(output), nil
    98  }
    99  
   100  func encodeDigit(digit int32) byte {
   101  	switch {
   102  	case 0 <= digit && digit < 26:
   103  		return byte(digit + 'a')
   104  	case 26 <= digit && digit < 36:
   105  		return byte(digit + ('0' - 26))
   106  	}
   107  	panic("cookiejar: internal error in punycode encoding")
   108  }
   109  
   110  // adapt is the bias adaptation function specified in section 6.1.
   111  func adapt(delta, numPoints int32, firstTime bool) int32 {
   112  	if firstTime {
   113  		delta /= damp
   114  	} else {
   115  		delta /= 2
   116  	}
   117  	delta += delta / numPoints
   118  	k := int32(0)
   119  	for delta > ((base-tmin)*tmax)/2 {
   120  		delta /= base - tmin
   121  		k += base
   122  	}
   123  	return k + (base-tmin+1)*delta/(delta+skew)
   124  }
   125  
   126  // Strictly speaking, the remaining code below deals with IDNA (RFC 5890 and
   127  // friends) and not Punycode (RFC 3492) per se.
   128  
   129  // acePrefix is the ASCII Compatible Encoding prefix.
   130  const acePrefix = "xn--"
   131  
   132  // toASCII converts a domain or domain label to its ASCII form. For example,
   133  // toASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   134  // toASCII("golang") is "golang".
   135  func toASCII(s string) (string, error) {
   136  	if ascii(s) {
   137  		return s, nil
   138  	}
   139  	labels := strings.Split(s, ".")
   140  	for i, label := range labels {
   141  		if !ascii(label) {
   142  			a, err := encode(acePrefix, label)
   143  			if err != nil {
   144  				return "", err
   145  			}
   146  			labels[i] = a
   147  		}
   148  	}
   149  	return strings.Join(labels, "."), nil
   150  }
   151  
   152  func ascii(s string) bool {
   153  	for i := 0; i < len(s); i++ {
   154  		if s[i] >= utf8.RuneSelf {
   155  			return false
   156  		}
   157  	}
   158  	return true
   159  }