github.com/blend/go-sdk@v1.20220411.3/names/parser.go (about)

     1  /*
     2  
     3  Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     5  
     6  */
     7  
     8  package names
     9  
    10  import (
    11  	"strings"
    12  	"unicode"
    13  )
    14  
    15  var validSuffixes = []string{
    16  	"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX",
    17  	"Senior", "Junior", "Jr", "Sr",
    18  	"PhD", "APR", "RPh", "PE", "MD", "MA", "DMD", "CME",
    19  }
    20  
    21  var compoundLastNames = []string{
    22  	"vere", "von", "van", "de", "del", "della", "di", "da", "pietro",
    23  	"vanden", "du", "st.", "st", "la", "lo", "ter", "bin", "ibn",
    24  }
    25  
    26  // Parse parses a string into a name.
    27  func Parse(input string) (name Name) {
    28  	fullName := strings.TrimSpace(input)
    29  
    30  	rawNameParts := strings.Split(fullName, " ")
    31  
    32  	nameParts := []string{}
    33  
    34  	lastName := ""
    35  	firstName := ""
    36  	initials := ""
    37  	for _, part := range rawNameParts {
    38  		if !strings.Contains(part, "(") {
    39  			nameParts = append(nameParts, part)
    40  		}
    41  	}
    42  
    43  	numWords := len(nameParts)
    44  	salutation := processSalutation(nameParts[0])
    45  	suffix := processSuffix(nameParts[len(nameParts)-1])
    46  
    47  	start := 0
    48  	if salutation != "" {
    49  		start = 1
    50  	}
    51  
    52  	end := numWords
    53  	if suffix != "" {
    54  		end = numWords - 1
    55  	}
    56  
    57  	i := 0
    58  	for i = start; i < (end - 1); i++ {
    59  		word := nameParts[i]
    60  		if isCompoundLastName(word) && i != start {
    61  			break
    62  		}
    63  		if isMiddleName(word) {
    64  			if i == start {
    65  				if isMiddleName(nameParts[i+1]) {
    66  					firstName = firstName + " " + strings.ToUpper(word)
    67  				} else {
    68  					initials = initials + " " + strings.ToUpper(word)
    69  				}
    70  			} else {
    71  				initials = initials + " " + strings.ToUpper(word)
    72  			}
    73  		} else {
    74  			firstName = firstName + " " + fixCase(word)
    75  		}
    76  	}
    77  
    78  	if (end - start) > 1 {
    79  		for j := i; j < end; j++ {
    80  			lastName = lastName + " " + fixCase(nameParts[j])
    81  		}
    82  	} else if i < len(nameParts) {
    83  		firstName = fixCase(nameParts[i])
    84  	}
    85  
    86  	name.Salutation = salutation
    87  	name.FirstName = strings.TrimSpace(firstName)
    88  	name.MiddleName = strings.TrimSpace(initials)
    89  	name.LastName = strings.TrimSpace(lastName)
    90  	name.Suffix = suffix
    91  
    92  	return name
    93  }
    94  
    95  func processSalutation(input string) string {
    96  	word := cleanString(input)
    97  
    98  	switch word {
    99  	case "mr", "master", "mister":
   100  		return "Mr."
   101  	case "mrs", "misses":
   102  		return "Mrs."
   103  	case "ms", "miss":
   104  		return "Ms."
   105  	case "dr":
   106  		return "Dr."
   107  	case "rev":
   108  		return "Rev."
   109  	case "fr":
   110  		return "Fr."
   111  	}
   112  
   113  	return ""
   114  }
   115  
   116  func processSuffix(input string) string {
   117  	word := cleanString(input)
   118  	return getByLower(validSuffixes, word)
   119  }
   120  
   121  func isCompoundLastName(input string) bool {
   122  	word := cleanString(input)
   123  	exists := containsLower(compoundLastNames, word)
   124  	return exists
   125  }
   126  
   127  func isMiddleName(input string) bool {
   128  	word := cleanString(input)
   129  	return len(word) == 1
   130  }
   131  
   132  func uppercaseFirstAll(input string, separator string) string {
   133  	words := []string{}
   134  	parts := strings.Split(input, separator)
   135  	for _, thisWord := range parts {
   136  		toAppend := ""
   137  		switch {
   138  		case isCompoundLastName(strings.ToLower(thisWord)):
   139  			// preserve first letter case, but to lower the rest for compound last names
   140  			if unicode.IsUpper([]rune(thisWord)[0]) {
   141  				toAppend = strings.Title(strings.ToLower(thisWord))
   142  			} else {
   143  				toAppend = strings.ToLower(thisWord)
   144  			}
   145  		case isCamelCase(thisWord):
   146  			// Preserve case for Camel-cased strings
   147  			toAppend = thisWord
   148  		default:
   149  			// For everything else, force to title case
   150  			toAppend = upperCaseFirst(strings.ToLower(thisWord))
   151  		}
   152  		words = append(words, toAppend)
   153  	}
   154  	return strings.Join(words, separator)
   155  }
   156  
   157  func upperCaseFirst(input string) string {
   158  	return strings.Title(strings.ToLower(input))
   159  }
   160  
   161  func fixCase(input string) string {
   162  	word := uppercaseFirstAll(input, "-")
   163  	word = uppercaseFirstAll(word, ".")
   164  	return word
   165  }
   166  
   167  func cleanString(input string) string {
   168  	return strings.ToLower(strings.Replace(input, ".", "", -1))
   169  }
   170  
   171  // isCamelCase returns if a string is CamelCased.
   172  // CamelCased in this sense is if a string has both inner-upper and lower characters.
   173  func isCamelCase(input string) bool {
   174  	hasLowers := false
   175  	hasInnerUppers := false
   176  
   177  	for i, c := range input {
   178  		if i != 0 && unicode.IsUpper(c) {
   179  			hasInnerUppers = true
   180  		}
   181  		if unicode.IsLower(c) {
   182  			hasLowers = true
   183  		}
   184  	}
   185  
   186  	return hasLowers && hasInnerUppers
   187  }
   188  
   189  // containsLower returns true if the `elem` is in the StringArray, false otherwise.
   190  func containsLower(values []string, elem string) bool {
   191  	for _, arrayElem := range values {
   192  		if strings.ToLower(arrayElem) == elem {
   193  			return true
   194  		}
   195  	}
   196  	return false
   197  }
   198  
   199  // getByLower returns an element from the array that matches the input.
   200  func getByLower(values []string, elem string) string {
   201  	for _, arrayElem := range values {
   202  		if strings.ToLower(arrayElem) == elem {
   203  			return arrayElem
   204  		}
   205  	}
   206  	return ""
   207  }