github.com/grailbio/base@v0.0.11/fileio/names.go (about)

     1  // Copyright 2017 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package fileio
     6  
     7  import (
     8  	"regexp"
     9  	"strings"
    10  )
    11  
    12  // FileType represents the type of a file based on its filename
    13  type FileType int
    14  
    15  const (
    16  	// Other represents a filetype other than the ones supported here.
    17  	Other FileType = iota
    18  	// Gzip file.
    19  	Gzip
    20  	// Bzip2 file.
    21  	Bzip2
    22  	// GrailRIO recordio.
    23  	GrailRIO
    24  	// GrailRIOPacked packed recordio.
    25  	GrailRIOPacked
    26  	// GrailRIOPackedEncrypted encrypted packed recordio.
    27  	GrailRIOPackedEncrypted
    28  	// GrailRIOPackedCompressed compressed packed recordio.
    29  	GrailRIOPackedCompressed
    30  	// GrailRIOPackedCompressedAndEncrypted compressed and encrypted packed recordio.
    31  	GrailRIOPackedCompressedAndEncrypted
    32  	// JSON text file
    33  	JSON
    34  	// Zstd format.
    35  	// https://facebook.github.io/zstd/
    36  	// https://tools.ietf.org/html/rfc8478
    37  	Zstd
    38  )
    39  
    40  var lookup = map[string]FileType{
    41  	".gz":              Gzip,
    42  	".bz2":             Bzip2,
    43  	".grail-rio":       GrailRIO,
    44  	".grail-rpk":       GrailRIOPacked,
    45  	".grail-rpk-kd":    GrailRIOPackedEncrypted,
    46  	".grail-rpk-gz":    GrailRIOPackedCompressed,
    47  	".grail-rpk-gz-kd": GrailRIOPackedCompressedAndEncrypted,
    48  	".json":            JSON,
    49  	".zst":             Zstd,
    50  }
    51  
    52  // StorageAPI represents the Storage API required to access a file.
    53  type StorageAPI int
    54  
    55  const (
    56  	// LocalAPI represents a local fileystem accessible via a unix/posix API
    57  	// and hence the io/os packages.
    58  	LocalAPI StorageAPI = iota
    59  	// S3API represents an Amazon S3 API.
    60  	S3API
    61  )
    62  
    63  // DetermineAPI determines the Storage API that stores the file
    64  // referred to by pathname.
    65  func DetermineAPI(pathname string) StorageAPI {
    66  	if strings.HasPrefix(pathname, "s3://") {
    67  		return S3API
    68  	}
    69  	return LocalAPI
    70  }
    71  
    72  // DetermineType determines the type of the file given its filename.
    73  func DetermineType(filename string) FileType {
    74  	idx := strings.LastIndexByte(filename, '.')
    75  	if idx < 0 {
    76  		return Other
    77  	}
    78  	suffix := filename[idx:]
    79  	return lookup[suffix]
    80  }
    81  
    82  // FileSuffix returns the filename suffix associated with the specified
    83  // FileType.
    84  func FileSuffix(typ FileType) string {
    85  	for k, v := range lookup {
    86  		if v == typ {
    87  			return string(k)
    88  		}
    89  	}
    90  	return ""
    91  }
    92  
    93  // IsGrailRecordio returns true if the filetype is one of the Grail recordio
    94  // types.
    95  func IsGrailRecordio(ft FileType) bool {
    96  	switch ft {
    97  	case GrailRIO, GrailRIOPacked,
    98  		GrailRIOPackedEncrypted,
    99  		GrailRIOPackedCompressed,
   100  		GrailRIOPackedCompressedAndEncrypted:
   101  		return true
   102  	}
   103  	return false
   104  }
   105  
   106  var (
   107  	s3re0 = regexp.MustCompile("^s3://[^/]+.*$")
   108  	s3re1 = regexp.MustCompile("^s3:/*(.*)$")
   109  	s3re2 = regexp.MustCompile("^s:/+(.*)$")
   110  	s3re3 = regexp.MustCompile("^s3/+(.*)$")
   111  )
   112  
   113  // SpellCorrectS3 returns true if the S3 path looks like an S3 path and returns
   114  // the spell corrected path. That is, it returns true for common mispellings
   115  // such as those show below along with the corrected s3://<path>
   116  // s3:///<path>
   117  // s3:<path>
   118  // s3:/<path>
   119  // s://<path>
   120  // s:/<path>
   121  // s3//<path>
   122  func SpellCorrectS3(s3path string) (StorageAPI, bool, string) {
   123  	if s3path == "s3://" || s3re0.MatchString(s3path) {
   124  		return S3API, false, s3path
   125  	}
   126  	if strings.HasPrefix(s3path, "s3:") {
   127  		fixed := s3re1.FindStringSubmatch(s3path)
   128  		return S3API, true, "s3://" + fixed[1]
   129  	}
   130  	if strings.HasPrefix(s3path, "s:") {
   131  		fixed := s3re2.FindStringSubmatch(s3path)
   132  		return S3API, true, "s3://" + fixed[1]
   133  	}
   134  	if strings.HasPrefix(s3path, "s3/") {
   135  		fixed := s3re3.FindStringSubmatch(s3path)
   136  		return S3API, true, "s3://" + fixed[1]
   137  	}
   138  	return LocalAPI, false, s3path
   139  }