github.com/LanderTome/numerologyCalculator@v1.0.2/numerology/database.go (about)

     1  // Copyright 2021 Robert D. Wukmir
     2  // This file is subject to the terms and conditions defined in
     3  // the LICENSE file, which is part of this source code package.
     4  //
     5  // Unless required by applicable law or agreed to in writing,
     6  // software distributed under the License is distributed on an
     7  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     8  // either express or implied. See the License for the specific
     9  // language governing permissions and limitations under the
    10  // License.
    11  
    12  package numerology
    13  
    14  import (
    15  	"bufio"
    16  	"errors"
    17  	"fmt"
    18  	"github.com/cheggaaa/pb/v3"
    19  	"github.com/spkg/bom"
    20  	"github.com/xo/dburl"
    21  	"gorm.io/driver/mysql"
    22  	"gorm.io/driver/postgres"
    23  	"gorm.io/driver/sqlite" // Requires Cgo when compiling and therefore doesn't work well for some target architectures.
    24  	// "github.com/cloudquery/sqlite" // Cgo-free version of sqlite. Doesn't yet support all features of native sqlite but works fine here.
    25  	"gorm.io/gorm"
    26  	"gorm.io/gorm/logger"
    27  	"io/fs"
    28  	"log"
    29  	"math"
    30  	"os"
    31  	"path/filepath"
    32  	"reflect"
    33  	"sort"
    34  	"strconv"
    35  	"strings"
    36  )
    37  
    38  // DB holds the database connection used of name searches. Gorm is used which means that only SQLite, MySQL,
    39  // and PostgreSQL are supported out-of-the-box. Variable is exposed in case someone wanted to hack on other
    40  // database solutions for a project.
    41  var DB *gorm.DB
    42  var dbLogger = logger.Default.LogMode(logger.Silent)
    43  
    44  // uint8 (1-byte [0 to 255]) is used because it should be large enough for almost any situation and can save
    45  // space in most databases (not sqlite, however).
    46  //
    47  // The longest English word 'pneumonoultramicroscopicsilicovolcanoconiosis'
    48  // maxes out at only 218.
    49  type precalculatedNumerology struct {
    50  	Id                    int64 `gorm:"primaryKey"`
    51  	Name                  string
    52  	Gender                string `gorm:"index;type:varchar(1)"`
    53  	PythagoreanFull       uint8  `gorm:"index"`
    54  	PythagoreanVowels     uint8  `gorm:"index"`
    55  	PythagoreanConsonants uint8  `gorm:"index"`
    56  	ChaldeanFull          uint8  `gorm:"index"`
    57  	ChaldeanVowels        uint8  `gorm:"index"`
    58  	ChaldeanConsonants    uint8  `gorm:"index"`
    59  	P1                    uint8  // Pythagorean count for number 1
    60  	P2                    uint8  // Pythagorean count for number 2
    61  	P3                    uint8  // Pythagorean count for number 3
    62  	P4                    uint8  // Pythagorean count for number 4
    63  	P5                    uint8  // Pythagorean count for number 5
    64  	P6                    uint8  // Pythagorean count for number 6
    65  	P7                    uint8  // Pythagorean count for number 7
    66  	P8                    uint8  // Pythagorean count for number 8
    67  	P9                    uint8  // Pythagorean count for number 9
    68  	C1                    uint8  // Chaldean count for number 1
    69  	C2                    uint8  // Chaldean count for number 2
    70  	C3                    uint8  // Chaldean count for number 3
    71  	C4                    uint8  // Chaldean count for number 4
    72  	C5                    uint8  // Chaldean count for number 5
    73  	C6                    uint8  // Chaldean count for number 6
    74  	C7                    uint8  // Chaldean count for number 7
    75  	C8                    uint8  // Chaldean count for number 8
    76  }
    77  
    78  // connectToDatabase parses a given DSN and establishes a connection to the database using Gorm. Only SQLite,
    79  // PostgreSQL, and MySQL are currently supported.
    80  func connectToDatabase(dsn string) error {
    81  	if DB == nil {
    82  		u, err := dburl.Parse(dsn)
    83  		if err != nil {
    84  			return errors.New("unable to connect to parse database connection string. dns=" + dsn)
    85  		}
    86  		switch u.OriginalScheme {
    87  		case "sqlite":
    88  			DB, err = gorm.Open(sqlite.Open(u.DSN), &gorm.Config{Logger: dbLogger})
    89  		case "postgres":
    90  			DB, err = gorm.Open(postgres.Open(u.DSN), &gorm.Config{Logger: dbLogger})
    91  		case "mysql":
    92  			DB, err = gorm.Open(mysql.Open(u.DSN), &gorm.Config{Logger: dbLogger})
    93  		default:
    94  			return errors.New("unsupported database. " + u.OriginalScheme)
    95  		}
    96  		return err
    97  	}
    98  	return nil
    99  }
   100  
   101  // setStructField allows us to add values to a struct by using a constructed string name for the field.
   102  // This is used for the P1, P2, C1, C2, etc. columns of the database.
   103  func setStructField(pcn *precalculatedNumerology, field string, value uint8) {
   104  	v := reflect.ValueOf(pcn).Elem().FieldByName(field)
   105  	if v.IsValid() {
   106  		v.SetUint(uint64(value))
   107  	}
   108  }
   109  
   110  // namePopularity is a sortable slice used for ordering names before putting in the database.
   111  type namePopularity []nameEntry
   112  
   113  func (a namePopularity) Len() int           { return len(a) }
   114  func (a namePopularity) Less(i, j int) bool { return a[i].Popularity < a[j].Popularity }
   115  func (a namePopularity) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   116  
   117  type nameEntry struct {
   118  	Name       string
   119  	Gender     uint8
   120  	Popularity int
   121  }
   122  
   123  // Extract all the names from the CSV files in the directory, merge the names, and create a sorted slice of results.
   124  func extractNamesFromFiles(directory string) (namePopularity, error) {
   125  	log.Printf("Extracting names from %v", directory)
   126  	namePopularityMap := map[uint8]map[string]int{
   127  		'M': {},
   128  		'F': {},
   129  	}
   130  	files, err := fs.Glob(os.DirFS(directory), "*.csv")
   131  	if err != nil {
   132  		return namePopularity{}, errors.New("unable to scan directory")
   133  	}
   134  	totalFiles := len(files)
   135  	bar := pb.Full.Start(totalFiles)
   136  	for i, fn := range files {
   137  		bar.Increment()
   138  		// Weighting comes into play when there are multiple files imported together. Older names are weighted less than modern names.
   139  		// The assumption is that each file is a year and earlier years are sorted in ascending order.
   140  		weight := float64(i+1) / float64(totalFiles)
   141  
   142  		file, err := os.Open(filepath.Join(directory, fn))
   143  		if err != nil {
   144  			log.Printf("Error opening file %v. --Skipping--", filepath.Join(directory, fn))
   145  			continue
   146  		}
   147  		// Look through files and extract the data
   148  		// bom.NewReader gets rid of UTF-8 byte order marks that can cause problems.
   149  		fscanner := bufio.NewScanner(bom.NewReader(file))
   150  		for fscanner.Scan() {
   151  			cols := strings.Split(fscanner.Text(), ",")
   152  			lname, gender, c := strings.TrimSpace(cols[0]), strings.TrimSpace(strings.ToUpper(cols[1]))[0], strings.TrimSpace(cols[2])
   153  			/*
   154  				----- This section is taken out because it only applies to some datasets. -----
   155  				// USA Census file names are truncated at 15 characters. There are only a few dozen and they are all
   156  				// combination names like ChristopherJohn and MariaDelRosario. Just ignore them.
   157  				if len(lname) >= 15 {
   158  					continue
   159  				}
   160  			*/
   161  			origCount, err := strconv.Atoi(c)
   162  			if err != nil {
   163  				// If name count cannot be converted to a number then just skip the name.
   164  				log.Printf("Unable to convert popularity from string to number. %v,%v,%v", lname, gender, c)
   165  				continue
   166  			}
   167  			// Weight the popularity of the name.
   168  			count := int(math.Ceil(float64(origCount) * weight))
   169  			if c, ok := namePopularityMap[gender][lname]; ok {
   170  				namePopularityMap[gender][lname] = c + count
   171  			} else {
   172  				namePopularityMap[gender][lname] = count
   173  			}
   174  		}
   175  	}
   176  	bar.Finish()
   177  
   178  	// Put names in a slice of structs so we can sort it using the standard library.
   179  	var Names namePopularity
   180  	for gender, v := range namePopularityMap {
   181  		for keyName, count := range v {
   182  			Names = append(Names, nameEntry{
   183  				Name:       keyName,
   184  				Gender:     gender,
   185  				Popularity: count,
   186  			})
   187  			// After name is put in struct, delete it from the map to conserve resources.
   188  			delete(namePopularityMap[gender], keyName)
   189  		}
   190  	}
   191  	// Sort in descending order of popularity.
   192  	sort.Sort(sort.Reverse(Names))
   193  	return Names, nil
   194  }
   195  
   196  // getAllDirectories gets all the directories in the baseDir folder.
   197  func getAllDirectories(baseDir string) []string {
   198  	directories := []string{}
   199  	items, _ := os.ReadDir(baseDir)
   200  	for _, item := range items {
   201  		if item.IsDir() {
   202  			directories = append(directories, item.Name())
   203  		}
   204  	}
   205  	return directories
   206  }
   207  
   208  // Create the table if it is not already created.
   209  func setupDatabaseTable(table string) error {
   210  	if err := DB.Table(table).AutoMigrate(&precalculatedNumerology{}); err != nil {
   211  		return err
   212  	}
   213  
   214  	// Because we are using the same struct for all our tables, Gorm uses the same name for
   215  	// all the indexes. This causes and error since you can only have one index of each name.
   216  	// Manually rename the indexes after GORM creates them.
   217  	idxPrefix := "idx_precalculated_numerologies_"
   218  	indexes := []string{
   219  		"chaldean_consonants",
   220  		"chaldean_full",
   221  		"chaldean_vowels",
   222  		"pythagorean_consonants",
   223  		"pythagorean_full",
   224  		"pythagorean_vowels",
   225  		"gender",
   226  	}
   227  	for _, idx := range indexes {
   228  		if err := DB.Table(table).Migrator().RenameIndex(
   229  			&precalculatedNumerology{},
   230  			idxPrefix+idx, table+"_idx_"+idx,
   231  		); err != nil {
   232  			return err
   233  		}
   234  		if err := DB.Table(table).Migrator().DropIndex(&precalculatedNumerology{}, idxPrefix+idx); err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return nil
   239  }
   240  
   241  // CreateDatabase function creates and populates the database table with the pre-populated numerological
   242  // calculations. The argument dsn is the connection string for the database that will utilized. The argument
   243  // baseDir is the directory where the CSV files are stored that contain the names that will populate the
   244  // database. Each folder in the baseDir becomes a table in the database. This allows for multiple name
   245  // sources.
   246  //
   247  // The format for the CSV files is name, gender, popularity with no header. Gender is just a letter 'M' for
   248  // male or 'F' female. Popularity is used to determine the sort order of the names. Each reoccurrence of the
   249  // same name aggregates the popularity.
   250  //
   251  //  john,M,10000
   252  //  sara,F,9000
   253  //  jack,M,8000
   254  func CreateDatabase(dsn string, baseDir string) error {
   255  	directories := getAllDirectories(baseDir)
   256  
   257  	log.Println("Connecting to database...")
   258  	if err := connectToDatabase(dsn); err != nil {
   259  		return errors.New("unable to connect to database. " + err.Error())
   260  	}
   261  	// Iterate over each of the folders and make a separate db table for each.
   262  	for _, dir := range directories {
   263  		// Create the table if it is not already created.
   264  		if err := setupDatabaseTable(dir); err != nil {
   265  			return err
   266  		}
   267  
   268  		// Make sure the table is empty. If it is not, then adding entries could mess it up.
   269  		var count int64
   270  		DB.Table(dir).Count(&count)
   271  		if count > 0 {
   272  			log.Printf("Table %v is not empty. Skipping.", dir)
   273  			continue
   274  		}
   275  
   276  		names, err := extractNamesFromFiles(filepath.Join(baseDir, dir))
   277  		if err != nil {
   278  			return err
   279  		}
   280  
   281  		log.Printf("Populating database table %v", dir)
   282  		bar := pb.Full.Start(len(names))
   283  		for _, entry := range names {
   284  			bar.Increment()
   285  			// Precalculate the numerological numbers we want to put in the database.
   286  			pythagorean := Name(entry.Name, Pythagorean, []int{}, false)
   287  			chaldean := Name(entry.Name, Chaldean, []int{}, false)
   288  
   289  			// Check for unacceptable characters
   290  			if len(pythagorean.UnknownCharacters()) > 0 || len(chaldean.UnknownCharacters()) > 0 {
   291  				log.Println(fmt.Sprintf("Skipping name with unacceptable characters: %v", entry.Name))
   292  				continue
   293  			}
   294  
   295  			dbEntry := precalculatedNumerology{
   296  				Name:                  entry.Name,
   297  				Gender:                string(entry.Gender),
   298  				PythagoreanFull:       uint8(pythagorean.Full().Breakdown[0].ReduceSteps[0]),
   299  				PythagoreanVowels:     uint8(pythagorean.Vowels().Breakdown[0].ReduceSteps[0]),
   300  				PythagoreanConsonants: uint8(pythagorean.Consonants().Breakdown[0].ReduceSteps[0]),
   301  				ChaldeanFull:          uint8(chaldean.Full().Breakdown[0].ReduceSteps[0]),
   302  				ChaldeanVowels:        uint8(chaldean.Vowels().Breakdown[0].ReduceSteps[0]),
   303  				ChaldeanConsonants:    uint8(chaldean.Consonants().Breakdown[0].ReduceSteps[0]),
   304  			}
   305  
   306  			// Use reflection to populate these fields in the struct. There may be a better way to do this.
   307  			pCounts := pythagorean.Counts()
   308  			for k, v := range pCounts {
   309  				field := fmt.Sprintf("P%v", k)
   310  				setStructField(&dbEntry, field, uint8(v))
   311  			}
   312  			cCounts := chaldean.Counts()
   313  			for k, v := range cCounts {
   314  				field := fmt.Sprintf("C%v", k)
   315  				setStructField(&dbEntry, field, uint8(v))
   316  			}
   317  
   318  			// Insert the record into the database.
   319  			if err := DB.Table(dir).Create(&dbEntry).Error; err != nil {
   320  				log.Println(fmt.Sprintf("unable to insert record into database: %v", dbEntry))
   321  				continue
   322  			}
   323  		}
   324  		bar.Finish()
   325  	}
   326  	log.Println("Vacuuming database to complete process.")
   327  	// Vacuum the database to make sure any extra space is reclaimed.
   328  	DB.Raw("VACUUM;")
   329  	return nil
   330  }