github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/ignore.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package doltdb
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    25  	"github.com/dolthub/dolt/go/store/types"
    26  	"github.com/dolthub/dolt/go/store/val"
    27  )
    28  
    29  type IgnorePattern struct {
    30  	Pattern string
    31  	Ignore  bool
    32  }
    33  
    34  func NewIgnorePattern(pattern string, ignore bool) IgnorePattern {
    35  	return IgnorePattern{Pattern: pattern, Ignore: ignore}
    36  }
    37  
    38  // IgnoredTables contains the results of comparing a series of tables to a set of dolt_ignore patterns.
    39  type IgnoredTables struct {
    40  	Ignore     []string
    41  	DontIgnore []string
    42  	Conflicts  []DoltIgnoreConflictError
    43  }
    44  
    45  // IgnoreResult is an enum containing the result of matching a table name against the list of ignored table patterns
    46  type IgnoreResult int
    47  
    48  const (
    49  	Ignore                IgnoreResult = iota // The table should be ignored.
    50  	DontIgnore                                // The table should not be ignored.
    51  	IgnorePatternConflict                     // The table matched multiple conflicting patterns.
    52  	ErrorOccurred                             // An error occurred.
    53  )
    54  
    55  type IgnorePatterns []IgnorePattern
    56  
    57  func GetIgnoredTablePatterns(ctx context.Context, roots Roots) (IgnorePatterns, error) {
    58  	var ignorePatterns []IgnorePattern
    59  	workingSet := roots.Working
    60  	table, found, err := workingSet.GetTable(ctx, TableName{Name: IgnoreTableName})
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  	if !found {
    65  		// dolt_ignore doesn't exist, so don't filter any tables.
    66  		return ignorePatterns, nil
    67  	}
    68  	index, err := table.GetRowData(ctx)
    69  	if table.Format() == types.Format_LD_1 {
    70  		// dolt_ignore is not supported for the legacy storage format.
    71  		return ignorePatterns, nil
    72  	}
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	ignoreTableSchema, err := table.GetSchema(ctx)
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  	keyDesc, valueDesc := ignoreTableSchema.GetMapDescriptors()
    81  
    82  	if !keyDesc.Equals(val.NewTupleDescriptor(val.Type{Enc: val.StringEnc})) {
    83  		return nil, fmt.Errorf("dolt_ignore had unexpected key type, this should never happen")
    84  	}
    85  	if !valueDesc.Equals(val.NewTupleDescriptor(val.Type{Enc: val.Int8Enc, Nullable: true})) {
    86  		return nil, fmt.Errorf("dolt_ignore had unexpected value type, this should never happen")
    87  	}
    88  
    89  	ignoreTableMap, err := durable.ProllyMapFromIndex(index).IterAll(ctx)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	for {
    94  		keyTuple, valueTuple, err := ignoreTableMap.Next(ctx)
    95  		if err == io.EOF {
    96  			break
    97  		}
    98  		if err != nil {
    99  			return nil, err
   100  		}
   101  
   102  		pattern, ok := keyDesc.GetString(0, keyTuple)
   103  		if !ok {
   104  			return nil, fmt.Errorf("could not read pattern")
   105  		}
   106  		ignore, ok := valueDesc.GetBool(0, valueTuple)
   107  		ignorePatterns = append(ignorePatterns, NewIgnorePattern(pattern, ignore))
   108  	}
   109  	return ignorePatterns, nil
   110  }
   111  
   112  // ExcludeIgnoredTables takes a list of table names and removes any tables that should be ignored,
   113  // as determined by the patterns in the dolt_ignore table.
   114  // The ignore patterns are read from the dolt_ignore table in the working set.
   115  func ExcludeIgnoredTables(ctx context.Context, roots Roots, tables []string) ([]string, error) {
   116  	ignorePatterns, err := GetIgnoredTablePatterns(ctx, roots)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	filteredTables := []string{}
   121  	for _, tbl := range tables {
   122  		ignored, err := ignorePatterns.IsTableNameIgnored(tbl)
   123  		if err != nil {
   124  			return nil, err
   125  		}
   126  		if conflict := AsDoltIgnoreInConflict(err); conflict != nil {
   127  			// no-op
   128  		} else if err != nil {
   129  			return nil, err
   130  		} else if ignored == DontIgnore {
   131  			// no-op
   132  		} else if ignored == Ignore {
   133  			continue
   134  		} else {
   135  			return nil, fmt.Errorf("IsTableNameIgnored returned ErrorOccurred but no error!")
   136  		}
   137  		filteredTables = append(filteredTables, tbl)
   138  	}
   139  	return filteredTables, nil
   140  }
   141  
   142  // compilePattern takes a dolt_ignore pattern and generate a Regexp that matches against the same table names as the pattern.
   143  func compilePattern(pattern string) (*regexp.Regexp, error) {
   144  	pattern = "^" + regexp.QuoteMeta(pattern) + "$"
   145  	pattern = strings.Replace(pattern, "\\?", ".", -1)
   146  	pattern = strings.Replace(pattern, "\\*", ".*", -1)
   147  	pattern = strings.Replace(pattern, "%", ".*", -1)
   148  	return regexp.Compile(pattern)
   149  }
   150  
   151  // getMoreSpecificPatterns takes a dolt_ignore pattern and generates a Regexp that matches against all patterns
   152  // that are "more specific" than it. (a pattern A is more specific than a pattern B if all names that match A also
   153  // match pattern B, but not vice versa.)
   154  func getMoreSpecificPatterns(lessSpecific string) (*regexp.Regexp, error) {
   155  	pattern := "^" + regexp.QuoteMeta(lessSpecific) + "$"
   156  	// A ? can expand to any character except for a * or %, since that also has special meaning in patterns.
   157  
   158  	pattern = strings.Replace(pattern, "\\?", "[^\\*%]", -1)
   159  	pattern = strings.Replace(pattern, "\\*", ".*", -1)
   160  	pattern = strings.Replace(pattern, "%", ".*", -1)
   161  	return regexp.Compile(pattern)
   162  }
   163  
   164  // normalizePattern generates an equivalent pattern, such that all equivalent patterns have the same normalized pattern.
   165  // It accomplishes this by replacing all * with %, and removing multiple adjacent %.
   166  // This will get a lot harder to implement once we support escaped characters in patterns.
   167  func normalizePattern(pattern string) string {
   168  	pattern = strings.Replace(pattern, "*", "%", -1)
   169  	for {
   170  		newPattern := strings.Replace(pattern, "%%", "%", -1)
   171  		if newPattern == pattern {
   172  			break
   173  		}
   174  		pattern = newPattern
   175  	}
   176  	return pattern
   177  }
   178  
   179  func resolveConflictingPatterns(trueMatches, falseMatches []string, tableName string) (IgnoreResult, error) {
   180  	trueMatchesToRemove := map[string]struct{}{}
   181  	falseMatchesToRemove := map[string]struct{}{}
   182  	for _, trueMatch := range trueMatches {
   183  		trueMatchRegExp, err := getMoreSpecificPatterns(trueMatch)
   184  		if err != nil {
   185  			return ErrorOccurred, err
   186  		}
   187  		for _, falseMatch := range falseMatches {
   188  			if normalizePattern(trueMatch) == normalizePattern(falseMatch) {
   189  				return IgnorePatternConflict, DoltIgnoreConflictError{Table: tableName, TruePatterns: []string{trueMatch}, FalsePatterns: []string{falseMatch}}
   190  			}
   191  			if trueMatchRegExp.MatchString(falseMatch) {
   192  				trueMatchesToRemove[trueMatch] = struct{}{}
   193  			}
   194  		}
   195  	}
   196  	for _, falseMatch := range falseMatches {
   197  		falseMatchRegExp, err := getMoreSpecificPatterns(falseMatch)
   198  		if err != nil {
   199  			return ErrorOccurred, err
   200  		}
   201  		for _, trueMatch := range trueMatches {
   202  			if falseMatchRegExp.MatchString(trueMatch) {
   203  				falseMatchesToRemove[falseMatch] = struct{}{}
   204  			}
   205  		}
   206  	}
   207  	if len(trueMatchesToRemove) == len(trueMatches) {
   208  		return DontIgnore, nil
   209  	}
   210  	if len(falseMatchesToRemove) == len(falseMatches) {
   211  		return Ignore, nil
   212  	}
   213  
   214  	// There's a conflict. Remove the less specific patterns so that only the conflict remains.
   215  
   216  	var conflictingTrueMatches []string
   217  	var conflictingFalseMatches []string
   218  
   219  	for _, trueMatch := range trueMatches {
   220  		if _, ok := trueMatchesToRemove[trueMatch]; !ok {
   221  			conflictingTrueMatches = append(conflictingTrueMatches, trueMatch)
   222  		}
   223  	}
   224  
   225  	for _, falseMatch := range falseMatches {
   226  		if _, ok := trueMatchesToRemove[falseMatch]; !ok {
   227  			conflictingFalseMatches = append(conflictingFalseMatches, falseMatch)
   228  		}
   229  	}
   230  
   231  	return IgnorePatternConflict, DoltIgnoreConflictError{Table: tableName, TruePatterns: conflictingTrueMatches, FalsePatterns: conflictingFalseMatches}
   232  }
   233  
   234  func (ip *IgnorePatterns) IsTableNameIgnored(tableName string) (IgnoreResult, error) {
   235  	// The dolt_rebase table is automatically ignored by Dolt – it shouldn't ever
   236  	// be checked in to a Dolt database.
   237  	if strings.ToLower(tableName) == strings.ToLower(RebaseTableName) {
   238  		return Ignore, nil
   239  	}
   240  
   241  	trueMatches := []string{}
   242  	falseMatches := []string{}
   243  	for _, patternIgnore := range *ip {
   244  		pattern := patternIgnore.Pattern
   245  		ignore := patternIgnore.Ignore
   246  		patternRegExp, err := compilePattern(pattern)
   247  		if err != nil {
   248  			return ErrorOccurred, err
   249  		}
   250  		if patternRegExp.MatchString(tableName) {
   251  			if ignore {
   252  				trueMatches = append(trueMatches, pattern)
   253  			} else {
   254  				falseMatches = append(falseMatches, pattern)
   255  			}
   256  		}
   257  	}
   258  	if len(trueMatches) == 0 {
   259  		return DontIgnore, nil
   260  	}
   261  	if len(falseMatches) == 0 {
   262  		return Ignore, nil
   263  	}
   264  	// The table name matched both positive and negative patterns.
   265  	// More specific patterns override less specific patterns.
   266  	return resolveConflictingPatterns(trueMatches, falseMatches, tableName)
   267  }