github.com/blend/go-sdk@v1.20220411.3/names/parser.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package names 9 10 import ( 11 "strings" 12 "unicode" 13 ) 14 15 var validSuffixes = []string{ 16 "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX", 17 "Senior", "Junior", "Jr", "Sr", 18 "PhD", "APR", "RPh", "PE", "MD", "MA", "DMD", "CME", 19 } 20 21 var compoundLastNames = []string{ 22 "vere", "von", "van", "de", "del", "della", "di", "da", "pietro", 23 "vanden", "du", "st.", "st", "la", "lo", "ter", "bin", "ibn", 24 } 25 26 // Parse parses a string into a name. 27 func Parse(input string) (name Name) { 28 fullName := strings.TrimSpace(input) 29 30 rawNameParts := strings.Split(fullName, " ") 31 32 nameParts := []string{} 33 34 lastName := "" 35 firstName := "" 36 initials := "" 37 for _, part := range rawNameParts { 38 if !strings.Contains(part, "(") { 39 nameParts = append(nameParts, part) 40 } 41 } 42 43 numWords := len(nameParts) 44 salutation := processSalutation(nameParts[0]) 45 suffix := processSuffix(nameParts[len(nameParts)-1]) 46 47 start := 0 48 if salutation != "" { 49 start = 1 50 } 51 52 end := numWords 53 if suffix != "" { 54 end = numWords - 1 55 } 56 57 i := 0 58 for i = start; i < (end - 1); i++ { 59 word := nameParts[i] 60 if isCompoundLastName(word) && i != start { 61 break 62 } 63 if isMiddleName(word) { 64 if i == start { 65 if isMiddleName(nameParts[i+1]) { 66 firstName = firstName + " " + strings.ToUpper(word) 67 } else { 68 initials = initials + " " + strings.ToUpper(word) 69 } 70 } else { 71 initials = initials + " " + strings.ToUpper(word) 72 } 73 } else { 74 firstName = firstName + " " + fixCase(word) 75 } 76 } 77 78 if (end - start) > 1 { 79 for j := i; j < end; j++ { 80 lastName = lastName + " " + fixCase(nameParts[j]) 81 } 82 } else if i < len(nameParts) { 83 firstName = fixCase(nameParts[i]) 84 } 85 86 name.Salutation = salutation 87 name.FirstName = strings.TrimSpace(firstName) 88 name.MiddleName = strings.TrimSpace(initials) 89 name.LastName = strings.TrimSpace(lastName) 90 name.Suffix = suffix 91 92 return name 93 } 94 95 func processSalutation(input string) string { 96 word := cleanString(input) 97 98 switch word { 99 case "mr", "master", "mister": 100 return "Mr." 101 case "mrs", "misses": 102 return "Mrs." 103 case "ms", "miss": 104 return "Ms." 105 case "dr": 106 return "Dr." 107 case "rev": 108 return "Rev." 109 case "fr": 110 return "Fr." 111 } 112 113 return "" 114 } 115 116 func processSuffix(input string) string { 117 word := cleanString(input) 118 return getByLower(validSuffixes, word) 119 } 120 121 func isCompoundLastName(input string) bool { 122 word := cleanString(input) 123 exists := containsLower(compoundLastNames, word) 124 return exists 125 } 126 127 func isMiddleName(input string) bool { 128 word := cleanString(input) 129 return len(word) == 1 130 } 131 132 func uppercaseFirstAll(input string, separator string) string { 133 words := []string{} 134 parts := strings.Split(input, separator) 135 for _, thisWord := range parts { 136 toAppend := "" 137 switch { 138 case isCompoundLastName(strings.ToLower(thisWord)): 139 // preserve first letter case, but to lower the rest for compound last names 140 if unicode.IsUpper([]rune(thisWord)[0]) { 141 toAppend = strings.Title(strings.ToLower(thisWord)) 142 } else { 143 toAppend = strings.ToLower(thisWord) 144 } 145 case isCamelCase(thisWord): 146 // Preserve case for Camel-cased strings 147 toAppend = thisWord 148 default: 149 // For everything else, force to title case 150 toAppend = upperCaseFirst(strings.ToLower(thisWord)) 151 } 152 words = append(words, toAppend) 153 } 154 return strings.Join(words, separator) 155 } 156 157 func upperCaseFirst(input string) string { 158 return strings.Title(strings.ToLower(input)) 159 } 160 161 func fixCase(input string) string { 162 word := uppercaseFirstAll(input, "-") 163 word = uppercaseFirstAll(word, ".") 164 return word 165 } 166 167 func cleanString(input string) string { 168 return strings.ToLower(strings.Replace(input, ".", "", -1)) 169 } 170 171 // isCamelCase returns if a string is CamelCased. 172 // CamelCased in this sense is if a string has both inner-upper and lower characters. 173 func isCamelCase(input string) bool { 174 hasLowers := false 175 hasInnerUppers := false 176 177 for i, c := range input { 178 if i != 0 && unicode.IsUpper(c) { 179 hasInnerUppers = true 180 } 181 if unicode.IsLower(c) { 182 hasLowers = true 183 } 184 } 185 186 return hasLowers && hasInnerUppers 187 } 188 189 // containsLower returns true if the `elem` is in the StringArray, false otherwise. 190 func containsLower(values []string, elem string) bool { 191 for _, arrayElem := range values { 192 if strings.ToLower(arrayElem) == elem { 193 return true 194 } 195 } 196 return false 197 } 198 199 // getByLower returns an element from the array that matches the input. 200 func getByLower(values []string, elem string) string { 201 for _, arrayElem := range values { 202 if strings.ToLower(arrayElem) == elem { 203 return arrayElem 204 } 205 } 206 return "" 207 }