github.com/myhau/pulumi/pkg/v3@v3.70.2-0.20221116134521-f2775972e587/codegen/python/python.go (about) 1 // Copyright 2016-2020, Pulumi Corporation. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package python 16 17 import ( 18 "strings" 19 "unicode" 20 "unicode/utf8" 21 22 "github.com/pulumi/pulumi/pkg/v3/codegen" 23 ) 24 25 // useLegacyName are names that should return a legacy result from PyName, for compatibility. 26 var useLegacyName = codegen.NewStringSet( 27 // The following property name of a nested type is a case where the newer algorithm produces an incorrect name 28 // (`open_xjson_ser_de`). It should be the legacy name of `open_x_json_ser_de`. 29 // TODO[pulumi/pulumi#5199]: We should see if we can fix this in the algorithm of PyName so it doesn't need to 30 // be special-cased in this set. 31 "openXJsonSerDe", // AWS 32 33 // The following function name has already shipped with the legacy name (`get_public_i_ps`). 34 // TODO[pulumi/pulumi#5200]: Consider emitting two functions: one with the correct name (`get_public_ips`) 35 // and another function with the legacy name (`get_public_i_ps`) marked as deprecated. 36 "GetPublicIPs", // Azure 37 38 // The following function name has already shipped with the legacy name (`get_uptime_check_i_ps`). 39 // TODO[pulumi/pulumi#5200]: Consider emitting two functions: one with the correct name (`get_uptime_check_ips`) 40 // and another function with the legacy name (`get_uptime_check_i_ps`) marked as deprecated. 41 "GetUptimeCheckIPs", // GCP 42 ) 43 44 // PyName turns a variable or function name, normally using camelCase, to an underscore_case name. 45 func PyName(name string) string { 46 return pyName(name, useLegacyName.Has(name)) 47 } 48 49 func pyName(name string, legacy bool) string { 50 // This method is a state machine with four states: 51 // stateFirst - the initial state. 52 // stateUpper - The last character we saw was an uppercase letter and the character before it 53 // was either a number or a lowercase letter. 54 // stateAcronym - The last character we saw was an uppercase letter and the character before it 55 // was an uppercase letter. 56 // stateLowerOrNumber - The last character we saw was a lowercase letter or a number. 57 // 58 // The following are the state transitions of this state machine: 59 // stateFirst -> (uppercase letter) -> stateUpper 60 // stateFirst -> (lowercase letter or number) -> stateLowerOrNumber 61 // Append the lower-case form of the character to currentComponent. 62 // 63 // stateUpper -> (uppercase letter) -> stateAcronym 64 // stateUpper -> (lowercase letter or number) -> stateLowerOrNumber 65 // Append the lower-case form of the character to currentComponent. 66 // 67 // stateAcronym -> (uppercase letter) -> stateAcronym 68 // Append the lower-case form of the character to currentComponent. 69 // stateAcronym -> (number) -> stateLowerOrNumber 70 // Append the character to currentComponent. 71 // stateAcronym -> (lowercase letter) -> stateLowerOrNumber 72 // Take all but the last character in currentComponent, turn that into 73 // a string, and append that to components. Set currentComponent to the 74 // last two characters seen. 75 // 76 // stateLowerOrNumber -> (uppercase letter) -> stateUpper 77 // Take all characters in currentComponent, turn that into a string, 78 // and append that to components. Set currentComponent to the last 79 // character seen. 80 // stateLowerOrNumber -> (lowercase letter) -> stateLowerOrNumber 81 // Append the character to currentComponent. 82 // 83 // The Go libraries that convert camelCase to snake_case deviate subtly from 84 // the semantics we're going for in this method, namely that they separate 85 // numbers and lowercase letters. We don't want this in all cases (we want e.g. Sha256Hash to 86 // be converted as sha256_hash). We also want SHA256Hash to be converted as sha256_hash, so 87 // we must at least be aware of digits when in the stateAcronym state. 88 // 89 // As for why this is a state machine, the libraries that do this all pretty much use 90 // either regular expressions or state machines, which I suppose are ultimately the same thing. 91 const ( 92 stateFirst = iota 93 stateUpper 94 stateAcronym 95 stateLowerOrNumber 96 ) 97 98 var result strings.Builder // The components of the name, joined together with underscores. 99 var currentComponent strings.Builder // The characters composing the current component being built 100 101 // Preallocate enough space for the name + 5 underscores. '5' is based on a wild guess that most names will consist 102 // of 5 or fewer words. 103 result.Grow(len(name) + 5) 104 currentComponent.Grow(len(name) + 5) 105 106 state := stateFirst 107 for _, char := range name { 108 // If this is an illegal character for a Python identifier, replace it. 109 if !isLegalIdentifierPart(char) { 110 char = '_' 111 } 112 113 switch state { 114 case stateFirst: 115 if !isLegalIdentifierStart(char) { 116 currentComponent.WriteRune('_') 117 } 118 119 if unicode.IsUpper(char) { 120 // stateFirst -> stateUpper 121 state = stateUpper 122 currentComponent.WriteRune(unicode.ToLower(char)) 123 continue 124 } 125 126 // stateFirst -> stateLowerOrNumber 127 state = stateLowerOrNumber 128 currentComponent.WriteRune(char) 129 continue 130 131 case stateUpper: 132 if unicode.IsUpper(char) { 133 // stateUpper -> stateAcronym 134 state = stateAcronym 135 currentComponent.WriteRune(unicode.ToLower(char)) 136 continue 137 } 138 139 // stateUpper -> stateLowerOrNumber 140 state = stateLowerOrNumber 141 currentComponent.WriteRune(char) 142 continue 143 144 case stateAcronym: 145 if unicode.IsUpper(char) { 146 // stateAcronym -> stateAcronym 147 currentComponent.WriteRune(unicode.ToLower(char)) 148 continue 149 } 150 151 // We want to fold digits (or the lowercase letter 's' if not the legacy algo) immediately following 152 // an acronym into the same component as the acronym. 153 if unicode.IsDigit(char) || (char == 's' && !legacy) { 154 // stateAcronym -> stateLowerOrNumber 155 state = stateLowerOrNumber 156 currentComponent.WriteRune(char) 157 continue 158 } 159 160 // stateAcronym -> stateLowerOrNumber 161 component := currentComponent.String() 162 last, size := utf8.DecodeLastRuneInString(component) 163 if result.Len() != 0 { 164 result.WriteRune('_') 165 } 166 result.WriteString(component[:len(component)-size]) 167 168 currentComponent.Reset() 169 currentComponent.WriteRune(last) 170 currentComponent.WriteRune(char) 171 state = stateLowerOrNumber 172 continue 173 174 case stateLowerOrNumber: 175 if unicode.IsUpper(char) { 176 // stateLowerOrNumber -> stateUpper 177 if result.Len() != 0 { 178 result.WriteRune('_') 179 } 180 result.WriteString(currentComponent.String()) 181 182 currentComponent.Reset() 183 currentComponent.WriteRune(unicode.ToLower(char)) 184 state = stateUpper 185 continue 186 } 187 188 // stateLowerOrNumber -> stateLowerOrNumber 189 currentComponent.WriteRune(char) 190 continue 191 } 192 } 193 194 if currentComponent.Len() != 0 { 195 if result.Len() != 0 { 196 result.WriteRune('_') 197 } 198 result.WriteString(currentComponent.String()) 199 } 200 return EnsureKeywordSafe(result.String()) 201 } 202 203 // Keywords is a map of reserved keywords used by Python 2 and 3. We use this to avoid generating unspeakable 204 // names in the resulting code. This map was sourced by merging the following reference material: 205 // 206 // - Python 2: https://docs.python.org/2.5/ref/keywords.html 207 // - Python 3: https://docs.python.org/3/reference/lexical_analysis.html#keywords 208 var Keywords = codegen.NewStringSet( 209 "False", 210 "None", 211 "True", 212 "and", 213 "as", 214 "assert", 215 "async", 216 "await", 217 "break", 218 "class", 219 "continue", 220 "def", 221 "del", 222 "elif", 223 "else", 224 "except", 225 "exec", 226 "finally", 227 "for", 228 "from", 229 "global", 230 "if", 231 "import", 232 "in", 233 "is", 234 "lambda", 235 "nonlocal", 236 "not", 237 "or", 238 "pass", 239 "print", 240 "raise", 241 "return", 242 "try", 243 "while", 244 "with", 245 "yield") 246 247 // EnsureKeywordSafe adds a trailing underscore if the generated name clashes with a Python 2 or 3 keyword, per 248 // PEP 8: https://www.python.org/dev/peps/pep-0008/?#function-and-method-arguments 249 func EnsureKeywordSafe(name string) string { 250 if Keywords.Has(name) { 251 return name + "_" 252 } 253 return name 254 }