github.com/LanderTome/numerologyCalculator@v1.0.2/numerology/database.go (about) 1 // Copyright 2021 Robert D. Wukmir 2 // This file is subject to the terms and conditions defined in 3 // the LICENSE file, which is part of this source code package. 4 // 5 // Unless required by applicable law or agreed to in writing, 6 // software distributed under the License is distributed on an 7 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 8 // either express or implied. See the License for the specific 9 // language governing permissions and limitations under the 10 // License. 11 12 package numerology 13 14 import ( 15 "bufio" 16 "errors" 17 "fmt" 18 "github.com/cheggaaa/pb/v3" 19 "github.com/spkg/bom" 20 "github.com/xo/dburl" 21 "gorm.io/driver/mysql" 22 "gorm.io/driver/postgres" 23 "gorm.io/driver/sqlite" // Requires Cgo when compiling and therefore doesn't work well for some target architectures. 24 // "github.com/cloudquery/sqlite" // Cgo-free version of sqlite. Doesn't yet support all features of native sqlite but works fine here. 25 "gorm.io/gorm" 26 "gorm.io/gorm/logger" 27 "io/fs" 28 "log" 29 "math" 30 "os" 31 "path/filepath" 32 "reflect" 33 "sort" 34 "strconv" 35 "strings" 36 ) 37 38 // DB holds the database connection used of name searches. Gorm is used which means that only SQLite, MySQL, 39 // and PostgreSQL are supported out-of-the-box. Variable is exposed in case someone wanted to hack on other 40 // database solutions for a project. 41 var DB *gorm.DB 42 var dbLogger = logger.Default.LogMode(logger.Silent) 43 44 // uint8 (1-byte [0 to 255]) is used because it should be large enough for almost any situation and can save 45 // space in most databases (not sqlite, however). 46 // 47 // The longest English word 'pneumonoultramicroscopicsilicovolcanoconiosis' 48 // maxes out at only 218. 49 type precalculatedNumerology struct { 50 Id int64 `gorm:"primaryKey"` 51 Name string 52 Gender string `gorm:"index;type:varchar(1)"` 53 PythagoreanFull uint8 `gorm:"index"` 54 PythagoreanVowels uint8 `gorm:"index"` 55 PythagoreanConsonants uint8 `gorm:"index"` 56 ChaldeanFull uint8 `gorm:"index"` 57 ChaldeanVowels uint8 `gorm:"index"` 58 ChaldeanConsonants uint8 `gorm:"index"` 59 P1 uint8 // Pythagorean count for number 1 60 P2 uint8 // Pythagorean count for number 2 61 P3 uint8 // Pythagorean count for number 3 62 P4 uint8 // Pythagorean count for number 4 63 P5 uint8 // Pythagorean count for number 5 64 P6 uint8 // Pythagorean count for number 6 65 P7 uint8 // Pythagorean count for number 7 66 P8 uint8 // Pythagorean count for number 8 67 P9 uint8 // Pythagorean count for number 9 68 C1 uint8 // Chaldean count for number 1 69 C2 uint8 // Chaldean count for number 2 70 C3 uint8 // Chaldean count for number 3 71 C4 uint8 // Chaldean count for number 4 72 C5 uint8 // Chaldean count for number 5 73 C6 uint8 // Chaldean count for number 6 74 C7 uint8 // Chaldean count for number 7 75 C8 uint8 // Chaldean count for number 8 76 } 77 78 // connectToDatabase parses a given DSN and establishes a connection to the database using Gorm. Only SQLite, 79 // PostgreSQL, and MySQL are currently supported. 80 func connectToDatabase(dsn string) error { 81 if DB == nil { 82 u, err := dburl.Parse(dsn) 83 if err != nil { 84 return errors.New("unable to connect to parse database connection string. dns=" + dsn) 85 } 86 switch u.OriginalScheme { 87 case "sqlite": 88 DB, err = gorm.Open(sqlite.Open(u.DSN), &gorm.Config{Logger: dbLogger}) 89 case "postgres": 90 DB, err = gorm.Open(postgres.Open(u.DSN), &gorm.Config{Logger: dbLogger}) 91 case "mysql": 92 DB, err = gorm.Open(mysql.Open(u.DSN), &gorm.Config{Logger: dbLogger}) 93 default: 94 return errors.New("unsupported database. " + u.OriginalScheme) 95 } 96 return err 97 } 98 return nil 99 } 100 101 // setStructField allows us to add values to a struct by using a constructed string name for the field. 102 // This is used for the P1, P2, C1, C2, etc. columns of the database. 103 func setStructField(pcn *precalculatedNumerology, field string, value uint8) { 104 v := reflect.ValueOf(pcn).Elem().FieldByName(field) 105 if v.IsValid() { 106 v.SetUint(uint64(value)) 107 } 108 } 109 110 // namePopularity is a sortable slice used for ordering names before putting in the database. 111 type namePopularity []nameEntry 112 113 func (a namePopularity) Len() int { return len(a) } 114 func (a namePopularity) Less(i, j int) bool { return a[i].Popularity < a[j].Popularity } 115 func (a namePopularity) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 116 117 type nameEntry struct { 118 Name string 119 Gender uint8 120 Popularity int 121 } 122 123 // Extract all the names from the CSV files in the directory, merge the names, and create a sorted slice of results. 124 func extractNamesFromFiles(directory string) (namePopularity, error) { 125 log.Printf("Extracting names from %v", directory) 126 namePopularityMap := map[uint8]map[string]int{ 127 'M': {}, 128 'F': {}, 129 } 130 files, err := fs.Glob(os.DirFS(directory), "*.csv") 131 if err != nil { 132 return namePopularity{}, errors.New("unable to scan directory") 133 } 134 totalFiles := len(files) 135 bar := pb.Full.Start(totalFiles) 136 for i, fn := range files { 137 bar.Increment() 138 // Weighting comes into play when there are multiple files imported together. Older names are weighted less than modern names. 139 // The assumption is that each file is a year and earlier years are sorted in ascending order. 140 weight := float64(i+1) / float64(totalFiles) 141 142 file, err := os.Open(filepath.Join(directory, fn)) 143 if err != nil { 144 log.Printf("Error opening file %v. --Skipping--", filepath.Join(directory, fn)) 145 continue 146 } 147 // Look through files and extract the data 148 // bom.NewReader gets rid of UTF-8 byte order marks that can cause problems. 149 fscanner := bufio.NewScanner(bom.NewReader(file)) 150 for fscanner.Scan() { 151 cols := strings.Split(fscanner.Text(), ",") 152 lname, gender, c := strings.TrimSpace(cols[0]), strings.TrimSpace(strings.ToUpper(cols[1]))[0], strings.TrimSpace(cols[2]) 153 /* 154 ----- This section is taken out because it only applies to some datasets. ----- 155 // USA Census file names are truncated at 15 characters. There are only a few dozen and they are all 156 // combination names like ChristopherJohn and MariaDelRosario. Just ignore them. 157 if len(lname) >= 15 { 158 continue 159 } 160 */ 161 origCount, err := strconv.Atoi(c) 162 if err != nil { 163 // If name count cannot be converted to a number then just skip the name. 164 log.Printf("Unable to convert popularity from string to number. %v,%v,%v", lname, gender, c) 165 continue 166 } 167 // Weight the popularity of the name. 168 count := int(math.Ceil(float64(origCount) * weight)) 169 if c, ok := namePopularityMap[gender][lname]; ok { 170 namePopularityMap[gender][lname] = c + count 171 } else { 172 namePopularityMap[gender][lname] = count 173 } 174 } 175 } 176 bar.Finish() 177 178 // Put names in a slice of structs so we can sort it using the standard library. 179 var Names namePopularity 180 for gender, v := range namePopularityMap { 181 for keyName, count := range v { 182 Names = append(Names, nameEntry{ 183 Name: keyName, 184 Gender: gender, 185 Popularity: count, 186 }) 187 // After name is put in struct, delete it from the map to conserve resources. 188 delete(namePopularityMap[gender], keyName) 189 } 190 } 191 // Sort in descending order of popularity. 192 sort.Sort(sort.Reverse(Names)) 193 return Names, nil 194 } 195 196 // getAllDirectories gets all the directories in the baseDir folder. 197 func getAllDirectories(baseDir string) []string { 198 directories := []string{} 199 items, _ := os.ReadDir(baseDir) 200 for _, item := range items { 201 if item.IsDir() { 202 directories = append(directories, item.Name()) 203 } 204 } 205 return directories 206 } 207 208 // Create the table if it is not already created. 209 func setupDatabaseTable(table string) error { 210 if err := DB.Table(table).AutoMigrate(&precalculatedNumerology{}); err != nil { 211 return err 212 } 213 214 // Because we are using the same struct for all our tables, Gorm uses the same name for 215 // all the indexes. This causes and error since you can only have one index of each name. 216 // Manually rename the indexes after GORM creates them. 217 idxPrefix := "idx_precalculated_numerologies_" 218 indexes := []string{ 219 "chaldean_consonants", 220 "chaldean_full", 221 "chaldean_vowels", 222 "pythagorean_consonants", 223 "pythagorean_full", 224 "pythagorean_vowels", 225 "gender", 226 } 227 for _, idx := range indexes { 228 if err := DB.Table(table).Migrator().RenameIndex( 229 &precalculatedNumerology{}, 230 idxPrefix+idx, table+"_idx_"+idx, 231 ); err != nil { 232 return err 233 } 234 if err := DB.Table(table).Migrator().DropIndex(&precalculatedNumerology{}, idxPrefix+idx); err != nil { 235 return err 236 } 237 } 238 return nil 239 } 240 241 // CreateDatabase function creates and populates the database table with the pre-populated numerological 242 // calculations. The argument dsn is the connection string for the database that will utilized. The argument 243 // baseDir is the directory where the CSV files are stored that contain the names that will populate the 244 // database. Each folder in the baseDir becomes a table in the database. This allows for multiple name 245 // sources. 246 // 247 // The format for the CSV files is name, gender, popularity with no header. Gender is just a letter 'M' for 248 // male or 'F' female. Popularity is used to determine the sort order of the names. Each reoccurrence of the 249 // same name aggregates the popularity. 250 // 251 // john,M,10000 252 // sara,F,9000 253 // jack,M,8000 254 func CreateDatabase(dsn string, baseDir string) error { 255 directories := getAllDirectories(baseDir) 256 257 log.Println("Connecting to database...") 258 if err := connectToDatabase(dsn); err != nil { 259 return errors.New("unable to connect to database. " + err.Error()) 260 } 261 // Iterate over each of the folders and make a separate db table for each. 262 for _, dir := range directories { 263 // Create the table if it is not already created. 264 if err := setupDatabaseTable(dir); err != nil { 265 return err 266 } 267 268 // Make sure the table is empty. If it is not, then adding entries could mess it up. 269 var count int64 270 DB.Table(dir).Count(&count) 271 if count > 0 { 272 log.Printf("Table %v is not empty. Skipping.", dir) 273 continue 274 } 275 276 names, err := extractNamesFromFiles(filepath.Join(baseDir, dir)) 277 if err != nil { 278 return err 279 } 280 281 log.Printf("Populating database table %v", dir) 282 bar := pb.Full.Start(len(names)) 283 for _, entry := range names { 284 bar.Increment() 285 // Precalculate the numerological numbers we want to put in the database. 286 pythagorean := Name(entry.Name, Pythagorean, []int{}, false) 287 chaldean := Name(entry.Name, Chaldean, []int{}, false) 288 289 // Check for unacceptable characters 290 if len(pythagorean.UnknownCharacters()) > 0 || len(chaldean.UnknownCharacters()) > 0 { 291 log.Println(fmt.Sprintf("Skipping name with unacceptable characters: %v", entry.Name)) 292 continue 293 } 294 295 dbEntry := precalculatedNumerology{ 296 Name: entry.Name, 297 Gender: string(entry.Gender), 298 PythagoreanFull: uint8(pythagorean.Full().Breakdown[0].ReduceSteps[0]), 299 PythagoreanVowels: uint8(pythagorean.Vowels().Breakdown[0].ReduceSteps[0]), 300 PythagoreanConsonants: uint8(pythagorean.Consonants().Breakdown[0].ReduceSteps[0]), 301 ChaldeanFull: uint8(chaldean.Full().Breakdown[0].ReduceSteps[0]), 302 ChaldeanVowels: uint8(chaldean.Vowels().Breakdown[0].ReduceSteps[0]), 303 ChaldeanConsonants: uint8(chaldean.Consonants().Breakdown[0].ReduceSteps[0]), 304 } 305 306 // Use reflection to populate these fields in the struct. There may be a better way to do this. 307 pCounts := pythagorean.Counts() 308 for k, v := range pCounts { 309 field := fmt.Sprintf("P%v", k) 310 setStructField(&dbEntry, field, uint8(v)) 311 } 312 cCounts := chaldean.Counts() 313 for k, v := range cCounts { 314 field := fmt.Sprintf("C%v", k) 315 setStructField(&dbEntry, field, uint8(v)) 316 } 317 318 // Insert the record into the database. 319 if err := DB.Table(dir).Create(&dbEntry).Error; err != nil { 320 log.Println(fmt.Sprintf("unable to insert record into database: %v", dbEntry)) 321 continue 322 } 323 } 324 bar.Finish() 325 } 326 log.Println("Vacuuming database to complete process.") 327 // Vacuum the database to make sure any extra space is reclaimed. 328 DB.Raw("VACUUM;") 329 return nil 330 }