code.gitea.io/gitea@v1.22.3/models/db/collation.go (about)

     1  // Copyright 2023 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package db
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"strings"
    10  
    11  	"code.gitea.io/gitea/modules/container"
    12  	"code.gitea.io/gitea/modules/log"
    13  	"code.gitea.io/gitea/modules/setting"
    14  
    15  	"xorm.io/xorm"
    16  	"xorm.io/xorm/schemas"
    17  )
    18  
    19  type CheckCollationsResult struct {
    20  	ExpectedCollation        string
    21  	AvailableCollation       container.Set[string]
    22  	DatabaseCollation        string
    23  	IsCollationCaseSensitive func(s string) bool
    24  	CollationEquals          func(a, b string) bool
    25  	ExistingTableNumber      int
    26  
    27  	InconsistentCollationColumns []string
    28  }
    29  
    30  func findAvailableCollationsMySQL(x *xorm.Engine) (ret container.Set[string], err error) {
    31  	var res []struct {
    32  		Collation string
    33  	}
    34  	if err = x.SQL("SHOW COLLATION WHERE (Collation = 'utf8mb4_bin') OR (Collation LIKE '%\\_as\\_cs%')").Find(&res); err != nil {
    35  		return nil, err
    36  	}
    37  	ret = make(container.Set[string], len(res))
    38  	for _, r := range res {
    39  		ret.Add(r.Collation)
    40  	}
    41  	return ret, nil
    42  }
    43  
    44  func findAvailableCollationsMSSQL(x *xorm.Engine) (ret container.Set[string], err error) {
    45  	var res []struct {
    46  		Name string
    47  	}
    48  	if err = x.SQL("SELECT * FROM sys.fn_helpcollations() WHERE name LIKE '%[_]CS[_]AS%'").Find(&res); err != nil {
    49  		return nil, err
    50  	}
    51  	ret = make(container.Set[string], len(res))
    52  	for _, r := range res {
    53  		ret.Add(r.Name)
    54  	}
    55  	return ret, nil
    56  }
    57  
    58  func CheckCollations(x *xorm.Engine) (*CheckCollationsResult, error) {
    59  	dbTables, err := x.DBMetas()
    60  	if err != nil {
    61  		return nil, err
    62  	}
    63  
    64  	res := &CheckCollationsResult{
    65  		ExistingTableNumber: len(dbTables),
    66  		CollationEquals:     func(a, b string) bool { return a == b },
    67  	}
    68  
    69  	var candidateCollations []string
    70  	if x.Dialect().URI().DBType == schemas.MYSQL {
    71  		if _, err = x.SQL("SELECT @@collation_database").Get(&res.DatabaseCollation); err != nil {
    72  			return nil, err
    73  		}
    74  		res.IsCollationCaseSensitive = func(s string) bool {
    75  			return s == "utf8mb4_bin" || strings.HasSuffix(s, "_as_cs")
    76  		}
    77  		candidateCollations = []string{"utf8mb4_0900_as_cs", "uca1400_as_cs", "utf8mb4_bin"}
    78  		res.AvailableCollation, err = findAvailableCollationsMySQL(x)
    79  		if err != nil {
    80  			return nil, err
    81  		}
    82  		res.CollationEquals = func(a, b string) bool {
    83  			// MariaDB adds the "utf8mb4_" prefix, eg: "utf8mb4_uca1400_as_cs", but not the name "uca1400_as_cs" in "SHOW COLLATION"
    84  			// At the moment, it's safe to ignore the database difference, just trim the prefix and compare. It could be fixed easily if there is any problem in the future.
    85  			return a == b || strings.TrimPrefix(a, "utf8mb4_") == strings.TrimPrefix(b, "utf8mb4_")
    86  		}
    87  	} else if x.Dialect().URI().DBType == schemas.MSSQL {
    88  		if _, err = x.SQL("SELECT DATABASEPROPERTYEX(DB_NAME(), 'Collation')").Get(&res.DatabaseCollation); err != nil {
    89  			return nil, err
    90  		}
    91  		res.IsCollationCaseSensitive = func(s string) bool {
    92  			return strings.HasSuffix(s, "_CS_AS")
    93  		}
    94  		candidateCollations = []string{"Latin1_General_CS_AS"}
    95  		res.AvailableCollation, err = findAvailableCollationsMSSQL(x)
    96  		if err != nil {
    97  			return nil, err
    98  		}
    99  	} else {
   100  		return nil, nil
   101  	}
   102  
   103  	if res.DatabaseCollation == "" {
   104  		return nil, errors.New("unable to get collation for current database")
   105  	}
   106  
   107  	res.ExpectedCollation = setting.Database.CharsetCollation
   108  	if res.ExpectedCollation == "" {
   109  		for _, collation := range candidateCollations {
   110  			if res.AvailableCollation.Contains(collation) {
   111  				res.ExpectedCollation = collation
   112  				break
   113  			}
   114  		}
   115  	}
   116  
   117  	if res.ExpectedCollation == "" {
   118  		return nil, errors.New("unable to find a suitable collation for current database")
   119  	}
   120  
   121  	allColumnsMatchExpected := true
   122  	allColumnsMatchDatabase := true
   123  	for _, table := range dbTables {
   124  		for _, col := range table.Columns() {
   125  			if col.Collation != "" {
   126  				allColumnsMatchExpected = allColumnsMatchExpected && res.CollationEquals(col.Collation, res.ExpectedCollation)
   127  				allColumnsMatchDatabase = allColumnsMatchDatabase && res.CollationEquals(col.Collation, res.DatabaseCollation)
   128  				if !res.IsCollationCaseSensitive(col.Collation) || !res.CollationEquals(col.Collation, res.DatabaseCollation) {
   129  					res.InconsistentCollationColumns = append(res.InconsistentCollationColumns, fmt.Sprintf("%s.%s", table.Name, col.Name))
   130  				}
   131  			}
   132  		}
   133  	}
   134  	// if all columns match expected collation or all match database collation, then it could also be considered as "consistent"
   135  	if allColumnsMatchExpected || allColumnsMatchDatabase {
   136  		res.InconsistentCollationColumns = nil
   137  	}
   138  	return res, nil
   139  }
   140  
   141  func CheckCollationsDefaultEngine() (*CheckCollationsResult, error) {
   142  	return CheckCollations(x)
   143  }
   144  
   145  func alterDatabaseCollation(x *xorm.Engine, collation string) error {
   146  	if x.Dialect().URI().DBType == schemas.MYSQL {
   147  		_, err := x.Exec("ALTER DATABASE CHARACTER SET utf8mb4 COLLATE " + collation)
   148  		return err
   149  	} else if x.Dialect().URI().DBType == schemas.MSSQL {
   150  		// TODO: MSSQL has many limitations on changing database collation, it could fail in many cases.
   151  		_, err := x.Exec("ALTER DATABASE CURRENT COLLATE " + collation)
   152  		return err
   153  	}
   154  	return errors.New("unsupported database type")
   155  }
   156  
   157  // preprocessDatabaseCollation checks database & table column collation, and alter the database collation if needed
   158  func preprocessDatabaseCollation(x *xorm.Engine) {
   159  	r, err := CheckCollations(x)
   160  	if err != nil {
   161  		log.Error("Failed to check database collation: %v", err)
   162  	}
   163  	if r == nil {
   164  		return // no check result means the database doesn't need to do such check/process (at the moment ....)
   165  	}
   166  
   167  	// try to alter database collation to expected if the database is empty, it might fail in some cases (and it isn't necessary to succeed)
   168  	// at the moment, there is no "altering" solution for MSSQL, site admin should manually change the database collation
   169  	if !r.CollationEquals(r.DatabaseCollation, r.ExpectedCollation) && r.ExistingTableNumber == 0 {
   170  		if err = alterDatabaseCollation(x, r.ExpectedCollation); err != nil {
   171  			log.Error("Failed to change database collation to %q: %v", r.ExpectedCollation, err)
   172  		} else {
   173  			_, _ = x.Exec("SELECT 1") // after "altering", MSSQL's session becomes invalid, so make a simple query to "refresh" the session
   174  			if r, err = CheckCollations(x); err != nil {
   175  				log.Error("Failed to check database collation again after altering: %v", err) // impossible case
   176  				return
   177  			}
   178  			log.Warn("Current database has been altered to use collation %q", r.DatabaseCollation)
   179  		}
   180  	}
   181  
   182  	// check column collation, and show warning/error to end users -- no need to fatal, do not block the startup
   183  	if !r.IsCollationCaseSensitive(r.DatabaseCollation) {
   184  		log.Warn("Current database is using a case-insensitive collation %q, although Gitea could work with it, there might be some rare cases which don't work as expected.", r.DatabaseCollation)
   185  	}
   186  
   187  	if len(r.InconsistentCollationColumns) > 0 {
   188  		log.Error("There are %d table columns using inconsistent collation, they should use %q. Please go to admin panel Self Check page", len(r.InconsistentCollationColumns), r.DatabaseCollation)
   189  	}
   190  }