github.com/myhau/pulumi/pkg/v3@v3.70.2-0.20221116134521-f2775972e587/codegen/python/python.go (about)

     1  // Copyright 2016-2020, Pulumi Corporation.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package python
    16  
    17  import (
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  
    22  	"github.com/pulumi/pulumi/pkg/v3/codegen"
    23  )
    24  
    25  // useLegacyName are names that should return a legacy result from PyName, for compatibility.
    26  var useLegacyName = codegen.NewStringSet(
    27  	// The following property name of a nested type is a case where the newer algorithm produces an incorrect name
    28  	// (`open_xjson_ser_de`). It should be the legacy name of `open_x_json_ser_de`.
    29  	// TODO[pulumi/pulumi#5199]: We should see if we can fix this in the algorithm of PyName so it doesn't need to
    30  	// be special-cased in this set.
    31  	"openXJsonSerDe", // AWS
    32  
    33  	// The following function name has already shipped with the legacy name (`get_public_i_ps`).
    34  	// TODO[pulumi/pulumi#5200]: Consider emitting two functions: one with the correct name (`get_public_ips`)
    35  	// and another function with the legacy name (`get_public_i_ps`) marked as deprecated.
    36  	"GetPublicIPs", // Azure
    37  
    38  	// The following function name has already shipped with the legacy name (`get_uptime_check_i_ps`).
    39  	// TODO[pulumi/pulumi#5200]: Consider emitting two functions: one with the correct name (`get_uptime_check_ips`)
    40  	// and another function with the legacy name (`get_uptime_check_i_ps`) marked as deprecated.
    41  	"GetUptimeCheckIPs", // GCP
    42  )
    43  
    44  // PyName turns a variable or function name, normally using camelCase, to an underscore_case name.
    45  func PyName(name string) string {
    46  	return pyName(name, useLegacyName.Has(name))
    47  }
    48  
    49  func pyName(name string, legacy bool) string {
    50  	// This method is a state machine with four states:
    51  	//   stateFirst - the initial state.
    52  	//   stateUpper - The last character we saw was an uppercase letter and the character before it
    53  	//                was either a number or a lowercase letter.
    54  	//   stateAcronym - The last character we saw was an uppercase letter and the character before it
    55  	//                  was an uppercase letter.
    56  	//   stateLowerOrNumber - The last character we saw was a lowercase letter or a number.
    57  	//
    58  	// The following are the state transitions of this state machine:
    59  	//   stateFirst -> (uppercase letter) -> stateUpper
    60  	//   stateFirst -> (lowercase letter or number) -> stateLowerOrNumber
    61  	//      Append the lower-case form of the character to currentComponent.
    62  	//
    63  	//   stateUpper -> (uppercase letter) -> stateAcronym
    64  	//   stateUpper -> (lowercase letter or number) -> stateLowerOrNumber
    65  	//      Append the lower-case form of the character to currentComponent.
    66  	//
    67  	//   stateAcronym -> (uppercase letter) -> stateAcronym
    68  	//		Append the lower-case form of the character to currentComponent.
    69  	//   stateAcronym -> (number) -> stateLowerOrNumber
    70  	//      Append the character to currentComponent.
    71  	//   stateAcronym -> (lowercase letter) -> stateLowerOrNumber
    72  	//      Take all but the last character in currentComponent, turn that into
    73  	//      a string, and append that to components. Set currentComponent to the
    74  	//      last two characters seen.
    75  	//
    76  	//   stateLowerOrNumber -> (uppercase letter) -> stateUpper
    77  	//      Take all characters in currentComponent, turn that into a string,
    78  	//      and append that to components. Set currentComponent to the last
    79  	//      character seen.
    80  	//	 stateLowerOrNumber -> (lowercase letter) -> stateLowerOrNumber
    81  	//      Append the character to currentComponent.
    82  	//
    83  	// The Go libraries that convert camelCase to snake_case deviate subtly from
    84  	// the semantics we're going for in this method, namely that they separate
    85  	// numbers and lowercase letters. We don't want this in all cases (we want e.g. Sha256Hash to
    86  	// be converted as sha256_hash). We also want SHA256Hash to be converted as sha256_hash, so
    87  	// we must at least be aware of digits when in the stateAcronym state.
    88  	//
    89  	// As for why this is a state machine, the libraries that do this all pretty much use
    90  	// either regular expressions or state machines, which I suppose are ultimately the same thing.
    91  	const (
    92  		stateFirst = iota
    93  		stateUpper
    94  		stateAcronym
    95  		stateLowerOrNumber
    96  	)
    97  
    98  	var result strings.Builder           // The components of the name, joined together with underscores.
    99  	var currentComponent strings.Builder // The characters composing the current component being built
   100  
   101  	// Preallocate enough space for the name + 5 underscores. '5' is based on a wild guess that most names will consist
   102  	// of 5 or fewer words.
   103  	result.Grow(len(name) + 5)
   104  	currentComponent.Grow(len(name) + 5)
   105  
   106  	state := stateFirst
   107  	for _, char := range name {
   108  		// If this is an illegal character for a Python identifier, replace it.
   109  		if !isLegalIdentifierPart(char) {
   110  			char = '_'
   111  		}
   112  
   113  		switch state {
   114  		case stateFirst:
   115  			if !isLegalIdentifierStart(char) {
   116  				currentComponent.WriteRune('_')
   117  			}
   118  
   119  			if unicode.IsUpper(char) {
   120  				// stateFirst -> stateUpper
   121  				state = stateUpper
   122  				currentComponent.WriteRune(unicode.ToLower(char))
   123  				continue
   124  			}
   125  
   126  			// stateFirst -> stateLowerOrNumber
   127  			state = stateLowerOrNumber
   128  			currentComponent.WriteRune(char)
   129  			continue
   130  
   131  		case stateUpper:
   132  			if unicode.IsUpper(char) {
   133  				// stateUpper -> stateAcronym
   134  				state = stateAcronym
   135  				currentComponent.WriteRune(unicode.ToLower(char))
   136  				continue
   137  			}
   138  
   139  			// stateUpper -> stateLowerOrNumber
   140  			state = stateLowerOrNumber
   141  			currentComponent.WriteRune(char)
   142  			continue
   143  
   144  		case stateAcronym:
   145  			if unicode.IsUpper(char) {
   146  				// stateAcronym -> stateAcronym
   147  				currentComponent.WriteRune(unicode.ToLower(char))
   148  				continue
   149  			}
   150  
   151  			// We want to fold digits (or the lowercase letter 's' if not the legacy algo) immediately following
   152  			// an acronym into the same component as the acronym.
   153  			if unicode.IsDigit(char) || (char == 's' && !legacy) {
   154  				// stateAcronym -> stateLowerOrNumber
   155  				state = stateLowerOrNumber
   156  				currentComponent.WriteRune(char)
   157  				continue
   158  			}
   159  
   160  			// stateAcronym -> stateLowerOrNumber
   161  			component := currentComponent.String()
   162  			last, size := utf8.DecodeLastRuneInString(component)
   163  			if result.Len() != 0 {
   164  				result.WriteRune('_')
   165  			}
   166  			result.WriteString(component[:len(component)-size])
   167  
   168  			currentComponent.Reset()
   169  			currentComponent.WriteRune(last)
   170  			currentComponent.WriteRune(char)
   171  			state = stateLowerOrNumber
   172  			continue
   173  
   174  		case stateLowerOrNumber:
   175  			if unicode.IsUpper(char) {
   176  				// stateLowerOrNumber -> stateUpper
   177  				if result.Len() != 0 {
   178  					result.WriteRune('_')
   179  				}
   180  				result.WriteString(currentComponent.String())
   181  
   182  				currentComponent.Reset()
   183  				currentComponent.WriteRune(unicode.ToLower(char))
   184  				state = stateUpper
   185  				continue
   186  			}
   187  
   188  			// stateLowerOrNumber -> stateLowerOrNumber
   189  			currentComponent.WriteRune(char)
   190  			continue
   191  		}
   192  	}
   193  
   194  	if currentComponent.Len() != 0 {
   195  		if result.Len() != 0 {
   196  			result.WriteRune('_')
   197  		}
   198  		result.WriteString(currentComponent.String())
   199  	}
   200  	return EnsureKeywordSafe(result.String())
   201  }
   202  
   203  // Keywords is a map of reserved keywords used by Python 2 and 3.  We use this to avoid generating unspeakable
   204  // names in the resulting code.  This map was sourced by merging the following reference material:
   205  //
   206  //   - Python 2: https://docs.python.org/2.5/ref/keywords.html
   207  //   - Python 3: https://docs.python.org/3/reference/lexical_analysis.html#keywords
   208  var Keywords = codegen.NewStringSet(
   209  	"False",
   210  	"None",
   211  	"True",
   212  	"and",
   213  	"as",
   214  	"assert",
   215  	"async",
   216  	"await",
   217  	"break",
   218  	"class",
   219  	"continue",
   220  	"def",
   221  	"del",
   222  	"elif",
   223  	"else",
   224  	"except",
   225  	"exec",
   226  	"finally",
   227  	"for",
   228  	"from",
   229  	"global",
   230  	"if",
   231  	"import",
   232  	"in",
   233  	"is",
   234  	"lambda",
   235  	"nonlocal",
   236  	"not",
   237  	"or",
   238  	"pass",
   239  	"print",
   240  	"raise",
   241  	"return",
   242  	"try",
   243  	"while",
   244  	"with",
   245  	"yield")
   246  
   247  // EnsureKeywordSafe adds a trailing underscore if the generated name clashes with a Python 2 or 3 keyword, per
   248  // PEP 8: https://www.python.org/dev/peps/pep-0008/?#function-and-method-arguments
   249  func EnsureKeywordSafe(name string) string {
   250  	if Keywords.Has(name) {
   251  		return name + "_"
   252  	}
   253  	return name
   254  }