github.com/kidsbmilk/gofrontend_all@v0.0.0-20220701224323-6479d5976c5d/go/go-encode-id.cc (about)

     1  // go-encode-id.cc -- Go identifier and packagepath encoding/decoding hooks
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  #include "go-system.h"
     8  
     9  #include "gogo.h"
    10  #include "go-location.h"
    11  #include "go-linemap.h"
    12  #include "go-encode-id.h"
    13  #include "lex.h"
    14  
    15  // Return whether the character c can appear in a name that we are
    16  // encoding.  We only permit ASCII alphanumeric characters.
    17  
    18  static bool
    19  char_needs_encoding(char c)
    20  {
    21    switch (c)
    22      {
    23      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
    24      case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
    25      case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
    26      case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
    27      case 'Y': case 'Z':
    28      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
    29      case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
    30      case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
    31      case 's': case 't': case 'u': case 'v': case 'w': case 'x':
    32      case 'y': case 'z':
    33      case '0': case '1': case '2': case '3': case '4':
    34      case '5': case '6': case '7': case '8': case '9':
    35        return false;
    36      default:
    37        return true;
    38      }
    39  }
    40  
    41  // Return whether the identifier needs to be translated because it
    42  // contains non-ASCII characters.
    43  
    44  bool
    45  go_id_needs_encoding(const std::string& str)
    46  {
    47    for (std::string::const_iterator p = str.begin();
    48         p != str.end();
    49         ++p)
    50      if (char_needs_encoding(*p))
    51        return true;
    52    return false;
    53  }
    54  
    55  // Map from characters to the underscore encoding for them.
    56  
    57  class Special_char_code
    58  {
    59   public:
    60    Special_char_code();
    61  
    62    // Return the simple underscore encoding for C, or 0 if none.
    63    char
    64    code_for(unsigned int c) const
    65    {
    66      if (c <= 127)
    67        return this->codes_[c];
    68      return 0;
    69    }
    70  
    71   private:
    72    // Encodings for characters.
    73    char codes_[128];
    74  };
    75  
    76  // Construct the underscore encoding map.
    77  
    78  Special_char_code::Special_char_code()
    79  {
    80    memset(this->codes_, 0, sizeof this->codes_);
    81    this->codes_['_'] = '_';
    82    this->codes_['.'] = '0';
    83    this->codes_['/'] = '1';
    84    this->codes_['*'] = '2';
    85    this->codes_[','] = '3';
    86    this->codes_['{'] = '4';
    87    this->codes_['}'] = '5';
    88    this->codes_['['] = '6';
    89    this->codes_[']'] = '7';
    90    this->codes_['('] = '8';
    91    this->codes_[')'] = '9';
    92    this->codes_['"'] = 'a';
    93    this->codes_[' '] = 'b';
    94    this->codes_[';'] = 'c';
    95  }
    96  
    97  // The singleton Special_char_code.
    98  
    99  static const Special_char_code special_char_code;
   100  
   101  // Pull the next UTF-8 character out of P and store it in *PC.  Return
   102  // the number of bytes read.
   103  
   104  static size_t
   105  fetch_utf8_char(const char* p, unsigned int* pc)
   106  {
   107    unsigned char c = *p;
   108    if ((c & 0x80) == 0)
   109      {
   110        *pc = c;
   111        return 1;
   112      }
   113    size_t len = 0;
   114    while ((c & 0x80) != 0)
   115      {
   116        ++len;
   117        c <<= 1;
   118      }
   119    unsigned int rc = *p & ((1 << (7 - len)) - 1);
   120    for (size_t i = 1; i < len; i++)
   121      {
   122        unsigned int u = p[i];
   123        rc <<= 6;
   124        rc |= u & 0x3f;
   125      }
   126    *pc = rc;
   127    return len;
   128  }
   129  
   130  // Encode an identifier using assembler-friendly characters.  The
   131  // encoding is described in detail near the end of the long comment at
   132  // the start of names.cc.
   133  
   134  std::string
   135  go_encode_id(const std::string &id)
   136  {
   137    if (Lex::is_invalid_identifier(id))
   138      {
   139        go_assert(saw_errors());
   140        return id;
   141      }
   142  
   143    std::string ret;
   144    const char* p = id.c_str();
   145    const char* pend = p + id.length();
   146  
   147    // We encode a leading digit, to ensure that no identifier starts
   148    // with a digit.
   149    if (pend > p && p[0] >= '0' && p[0] <= '9')
   150      {
   151        char buf[8];
   152        snprintf(buf, sizeof buf, "_x%02x", p[0]);
   153        ret.append(buf);
   154        ++p;
   155      }
   156  
   157    while (p < pend)
   158      {
   159        unsigned int c;
   160        size_t len = fetch_utf8_char(p, &c);
   161        if (len == 1)
   162  	{
   163  	  if (!char_needs_encoding(c))
   164  	    ret.push_back(c);
   165  	  else
   166  	    {
   167  	      char code = special_char_code.code_for(c);
   168  	      if (code != 0)
   169  		{
   170  		  ret.push_back('_');
   171  		  ret.push_back(code);
   172  		}
   173  	      else
   174  		{
   175  		  char buf[8];
   176  		  snprintf(buf, sizeof buf, "_x%02x", c);
   177  		  ret.append(buf);
   178  		}
   179  	    }
   180  	}
   181        else
   182  	{
   183  	  char buf[16];
   184  	  if (c < 0x10000)
   185  	    snprintf(buf, sizeof buf, "_u%04x", c);
   186  	  else
   187  	    snprintf(buf, sizeof buf, "_U%08x", c);
   188  	  ret.append(buf);
   189  	}
   190  
   191        p += len;
   192      }
   193  
   194    return ret;
   195  }
   196  
   197  // Convert a hex digit string to a unicode codepoint. No checking
   198  // to insure that the hex digit is meaningful.
   199  
   200  static unsigned
   201  hex_digits_to_unicode_codepoint(const char *digits, unsigned ndig)
   202  {
   203    unsigned result = 0;
   204    for (unsigned i = 0; i < ndig; ++i) {
   205      result <<= 4;
   206      result |= Lex::hex_val(digits[i]);
   207    }
   208    return result;
   209  }
   210  
   211  // Decode/demangle a mangled string produced by go_encode_id(). Returns
   212  // empty string if demangling process fails in some way.  At the moment
   213  // this routine is unused; there is an equivalent routine in the runtime
   214  // used for demangling symbols appearing in stack traces.
   215  
   216  std::string
   217  go_decode_id(const std::string &encoded)
   218  {
   219    std::string ret;
   220    const char* p = encoded.c_str();
   221    const char* pend = p + encoded.length();
   222    const Location loc = Linemap::predeclared_location();
   223  
   224    while (p < pend)
   225      {
   226        if (*p != '_' || p + 1 == pend)
   227  	{
   228  	  ret.push_back(*p);
   229  	  p++;
   230  	  continue;
   231  	}
   232  
   233        switch (p[1])
   234  	{
   235  	case '_':
   236  	  ret.push_back('_');
   237  	  p += 2;
   238  	  break;
   239  	case '0':
   240  	  ret.push_back('.');
   241  	  p += 2;
   242  	  break;
   243  	case '1':
   244  	  ret.push_back('/');
   245  	  p += 2;
   246  	  break;
   247  	case '2':
   248  	  ret.push_back('*');
   249  	  p += 2;
   250  	  break;
   251  	case '3':
   252  	  ret.push_back(',');
   253  	  p += 2;
   254  	  break;
   255  	case '4':
   256  	  ret.push_back('{');
   257  	  p += 2;
   258  	  break;
   259  	case '5':
   260  	  ret.push_back('}');
   261  	  p += 2;
   262  	  break;
   263  	case '6':
   264  	  ret.push_back('[');
   265  	  p += 2;
   266  	  break;
   267  	case '7':
   268  	  ret.push_back(']');
   269  	  p += 2;
   270  	  break;
   271  	case '8':
   272  	  ret.push_back('(');
   273  	  p += 2;
   274  	  break;
   275  	case '9':
   276  	  ret.push_back(')');
   277  	  p += 2;
   278  	  break;
   279  	case 'a':
   280  	  ret.push_back('"');
   281  	  p += 2;
   282  	  break;
   283  	case 'b':
   284  	  ret.push_back(' ');
   285  	  p += 2;
   286  	  break;
   287  	case 'c':
   288  	  ret.push_back(';');
   289  	  p += 2;
   290  	  break;
   291          case 'x':
   292  	  {
   293  	    const char* digits = p + 2;
   294  	    if (strlen(digits) < 2)
   295  	      return "";
   296  	    unsigned int rune = hex_digits_to_unicode_codepoint(digits, 2);
   297  	    Lex::append_char(rune, true, &ret, loc);
   298  	    p += 4;
   299  	  }
   300  	  break;
   301  	case 'u':
   302  	  {
   303  	    const char* digits = p + 2;
   304  	    if (strlen(digits) < 4)
   305  	      return "";
   306  	    unsigned int rune = hex_digits_to_unicode_codepoint(digits, 4);
   307  	    Lex::append_char(rune, true, &ret, loc);
   308  	    p += 6;
   309  	  }
   310  	  break;
   311  	case 'U':
   312  	  {
   313  	    const char* digits = p + 2;
   314  	    if (strlen(digits) < 8)
   315  	      return "";
   316  	    unsigned int rune = hex_digits_to_unicode_codepoint(digits, 8);
   317  	    Lex::append_char(rune, true, &ret, loc);
   318  	    p += 10;
   319  	  }
   320  	  break;
   321  	default:
   322  	  return "";
   323  	}
   324      }
   325  
   326    return ret;
   327  }
   328  
   329  // Encode a struct field tag.  This is only used when we need to
   330  // create a type descriptor for an anonymous struct type with field
   331  // tags.  Underscore encoding will be applied to the returned string.
   332  // The tag will appear between curly braces, so that is all we have to
   333  // avoid.
   334  
   335  std::string
   336  go_mangle_struct_tag(const std::string& tag)
   337  {
   338    std::string ret;
   339    const char* p = tag.c_str();
   340    const char* pend = p + tag.length();
   341    while (p < pend)
   342      {
   343        unsigned int c;
   344        size_t len = fetch_utf8_char(p, &c);
   345        if (len > 1)
   346  	ret.append(p, len);
   347        else if (c != '{' && c != '}' && c != '\\')
   348  	ret.push_back(c);
   349        else
   350  	{
   351  	  ret.push_back('\\');
   352  	  ret.push_back(c);
   353  	}
   354        p += len;
   355      }
   356    return ret;
   357  }