github.com/mutagen-io/mutagen@v0.18.0-rc1/pkg/filesystem/behavior/unicode.go (about)

     1  package behavior
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"strings"
    10  
    11  	"github.com/mutagen-io/mutagen/pkg/filesystem"
    12  )
    13  
    14  const (
    15  	// composedFileNamePrefix is the prefix used for temporary files created by
    16  	// the Unicode decomposition test. It is in NFC form.
    17  	composedFileNamePrefix = filesystem.TemporaryNamePrefix + "unicode-test-\xc3\xa9ntry"
    18  	// decomposedFileNamePrefix is the NFD equivalent of composedFileNamePrefix.
    19  	decomposedFileNamePrefix = filesystem.TemporaryNamePrefix + "unicode-test-\x65\xcc\x81ntry"
    20  )
    21  
    22  // DecomposesUnicodeByPath determines whether or not the filesystem on which the
    23  // directory at the specified path resides decomposes Unicode filenames. The
    24  // second value returned by this function indicates whether or not probe files
    25  // were used in determining behavior.
    26  func DecomposesUnicodeByPath(path string, probeMode ProbeMode) (bool, bool, error) {
    27  	// Check the filesystem probing mode and see if we can return an assumption.
    28  	if probeMode == ProbeMode_ProbeModeAssume {
    29  		return assumeUnicodeDecomposition, false, nil
    30  	} else if !probeMode.Supported() {
    31  		panic("invalid probe mode")
    32  	}
    33  
    34  	// Check if we have a fast test that will work.
    35  	if result, ok := probeUnicodeDecompositionFastByPath(path); ok {
    36  		return result, false, nil
    37  	} else if runtime.GOOS == "windows" {
    38  		panic("fast path not used on Windows")
    39  	}
    40  
    41  	// Create and close a temporary file using the composed filename.
    42  	file, err := os.CreateTemp(path, composedFileNamePrefix)
    43  	if err != nil {
    44  		return false, true, fmt.Errorf("unable to create test file: %w", err)
    45  	} else if err = file.Close(); err != nil {
    46  		return false, true, fmt.Errorf("unable to close test file: %w", err)
    47  	}
    48  
    49  	// Grab the file's name. This is calculated from the parameters passed to
    50  	// TempFile, not by reading from the OS, so it will still be in a composed
    51  	// form. Also calculate a decomposed variant.
    52  	composedFilename := filepath.Base(file.Name())
    53  	decomposedFilename := strings.Replace(
    54  		composedFilename,
    55  		composedFileNamePrefix,
    56  		decomposedFileNamePrefix,
    57  		1,
    58  	)
    59  
    60  	// Defer removal of the file. Since we don't know whether the filesystem is
    61  	// also normalization-insensitive, we try both compositions.
    62  	defer func() {
    63  		if os.Remove(filepath.Join(path, composedFilename)) != nil {
    64  			os.Remove(filepath.Join(path, decomposedFilename))
    65  		}
    66  	}()
    67  
    68  	// Grab the contents of the path.
    69  	contents, err := filesystem.DirectoryContentsByPath(path)
    70  	if err != nil {
    71  		return false, true, fmt.Errorf("unable to read directory contents: %w", err)
    72  	}
    73  
    74  	// Loop through contents and see if we find a match for the decomposed file
    75  	// name. It doesn't even need to be our file, though it probably will be.
    76  	for _, c := range contents {
    77  		name := c.Name()
    78  		if name == decomposedFilename {
    79  			return true, true, nil
    80  		} else if name == composedFilename {
    81  			return false, true, nil
    82  		}
    83  	}
    84  
    85  	// If we didn't find any match, something's fishy.
    86  	return false, true, errors.New("unable to find test file after creation")
    87  }
    88  
    89  // DecomposesUnicode determines whether or not the specified directory (and its
    90  // underlying filesystem) decomposes Unicode filenames. The second value
    91  // returned by this function indicates whether or not probe files were used in
    92  // determining behavior.
    93  func DecomposesUnicode(directory *filesystem.Directory, probeMode ProbeMode) (bool, bool, error) {
    94  	// Check the filesystem probing mode and see if we can return an assumption.
    95  	if probeMode == ProbeMode_ProbeModeAssume {
    96  		return assumeUnicodeDecomposition, false, nil
    97  	} else if !probeMode.Supported() {
    98  		panic("invalid probe mode")
    99  	}
   100  
   101  	// Check if we have a fast test that will work.
   102  	if result, ok := probeUnicodeDecompositionFast(directory); ok {
   103  		return result, false, nil
   104  	} else if runtime.GOOS == "windows" {
   105  		panic("fast path not used on Windows")
   106  	}
   107  
   108  	// Create and close a temporary file using the composed filename.
   109  	composedName, file, err := directory.CreateTemporaryFile(composedFileNamePrefix)
   110  	if err != nil {
   111  		return false, true, fmt.Errorf("unable to create test file: %w", err)
   112  	} else if err = file.Close(); err != nil {
   113  		return false, true, fmt.Errorf("unable to close test file: %w", err)
   114  	}
   115  
   116  	// The name returned from CreateTemporaryFile is calculated from the
   117  	// provided pattern, so it will still be in a composed form. Compute the
   118  	// decomposed variant.
   119  	decomposedName := strings.Replace(
   120  		composedName,
   121  		composedFileNamePrefix,
   122  		decomposedFileNamePrefix,
   123  		1,
   124  	)
   125  
   126  	// Defer removal of the file. Since we don't know whether the filesystem is
   127  	// also normalization-insensitive, we try both compositions.
   128  	defer func() {
   129  		if directory.RemoveFile(composedName) != nil {
   130  			directory.RemoveFile(decomposedName)
   131  		}
   132  	}()
   133  
   134  	// HACK: If we're on Linux, then re-open the directory after creating the
   135  	// temporary file (and defer closure of the re-opened copy). This is
   136  	// necessary to work around an issue with osxfs where a directory descriptor
   137  	// can't be used to list contents created after the descriptor was opened
   138  	// (due either to aggressive caching or some sort of implementation bug).
   139  	// See issue #73 for more details. Ideally we'd restrict this workaround to
   140  	// osxfs, but we can't actually detect osxfs specifically because the statfs
   141  	// type field just indicates that it's a FUSE filesystem. Even if we wanted
   142  	// to restrict this behavior to just FUSE filesystems, the statfs call is
   143  	// going to be about the same cost (if not more expensive) than the re-open
   144  	// call, so it's best to just do this in all cases on Linux. This isn't such
   145  	// a big deal since this function is only called once per scan, and we may
   146  	// hit a fast path above anyway.
   147  	directoryForContentRead := directory
   148  	if runtime.GOOS == "linux" {
   149  		directoryForContentRead, err = directory.OpenDirectory(".")
   150  		if err != nil {
   151  			return false, true, fmt.Errorf("unable to re-open directory: %w", err)
   152  		}
   153  		defer directoryForContentRead.Close()
   154  	}
   155  
   156  	// Grab the content names in the directory.
   157  	names, err := directoryForContentRead.ReadContentNames()
   158  	if err != nil {
   159  		return false, true, fmt.Errorf("unable to read directory content names: %w", err)
   160  	}
   161  
   162  	// Loop through the names and see if we find a match for either the composed
   163  	// or decomposed name.
   164  	for _, name := range names {
   165  		if name == decomposedName {
   166  			return true, true, nil
   167  		} else if name == composedName {
   168  			return false, true, nil
   169  		}
   170  	}
   171  
   172  	// If we didn't find any match, something's fishy.
   173  	return false, true, errors.New("unable to find test file after creation")
   174  }