github.com/grailbio/base@v0.0.11/fileio/names.go (about) 1 // Copyright 2017 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package fileio 6 7 import ( 8 "regexp" 9 "strings" 10 ) 11 12 // FileType represents the type of a file based on its filename 13 type FileType int 14 15 const ( 16 // Other represents a filetype other than the ones supported here. 17 Other FileType = iota 18 // Gzip file. 19 Gzip 20 // Bzip2 file. 21 Bzip2 22 // GrailRIO recordio. 23 GrailRIO 24 // GrailRIOPacked packed recordio. 25 GrailRIOPacked 26 // GrailRIOPackedEncrypted encrypted packed recordio. 27 GrailRIOPackedEncrypted 28 // GrailRIOPackedCompressed compressed packed recordio. 29 GrailRIOPackedCompressed 30 // GrailRIOPackedCompressedAndEncrypted compressed and encrypted packed recordio. 31 GrailRIOPackedCompressedAndEncrypted 32 // JSON text file 33 JSON 34 // Zstd format. 35 // https://facebook.github.io/zstd/ 36 // https://tools.ietf.org/html/rfc8478 37 Zstd 38 ) 39 40 var lookup = map[string]FileType{ 41 ".gz": Gzip, 42 ".bz2": Bzip2, 43 ".grail-rio": GrailRIO, 44 ".grail-rpk": GrailRIOPacked, 45 ".grail-rpk-kd": GrailRIOPackedEncrypted, 46 ".grail-rpk-gz": GrailRIOPackedCompressed, 47 ".grail-rpk-gz-kd": GrailRIOPackedCompressedAndEncrypted, 48 ".json": JSON, 49 ".zst": Zstd, 50 } 51 52 // StorageAPI represents the Storage API required to access a file. 53 type StorageAPI int 54 55 const ( 56 // LocalAPI represents a local fileystem accessible via a unix/posix API 57 // and hence the io/os packages. 58 LocalAPI StorageAPI = iota 59 // S3API represents an Amazon S3 API. 60 S3API 61 ) 62 63 // DetermineAPI determines the Storage API that stores the file 64 // referred to by pathname. 65 func DetermineAPI(pathname string) StorageAPI { 66 if strings.HasPrefix(pathname, "s3://") { 67 return S3API 68 } 69 return LocalAPI 70 } 71 72 // DetermineType determines the type of the file given its filename. 73 func DetermineType(filename string) FileType { 74 idx := strings.LastIndexByte(filename, '.') 75 if idx < 0 { 76 return Other 77 } 78 suffix := filename[idx:] 79 return lookup[suffix] 80 } 81 82 // FileSuffix returns the filename suffix associated with the specified 83 // FileType. 84 func FileSuffix(typ FileType) string { 85 for k, v := range lookup { 86 if v == typ { 87 return string(k) 88 } 89 } 90 return "" 91 } 92 93 // IsGrailRecordio returns true if the filetype is one of the Grail recordio 94 // types. 95 func IsGrailRecordio(ft FileType) bool { 96 switch ft { 97 case GrailRIO, GrailRIOPacked, 98 GrailRIOPackedEncrypted, 99 GrailRIOPackedCompressed, 100 GrailRIOPackedCompressedAndEncrypted: 101 return true 102 } 103 return false 104 } 105 106 var ( 107 s3re0 = regexp.MustCompile("^s3://[^/]+.*$") 108 s3re1 = regexp.MustCompile("^s3:/*(.*)$") 109 s3re2 = regexp.MustCompile("^s:/+(.*)$") 110 s3re3 = regexp.MustCompile("^s3/+(.*)$") 111 ) 112 113 // SpellCorrectS3 returns true if the S3 path looks like an S3 path and returns 114 // the spell corrected path. That is, it returns true for common mispellings 115 // such as those show below along with the corrected s3://<path> 116 // s3:///<path> 117 // s3:<path> 118 // s3:/<path> 119 // s://<path> 120 // s:/<path> 121 // s3//<path> 122 func SpellCorrectS3(s3path string) (StorageAPI, bool, string) { 123 if s3path == "s3://" || s3re0.MatchString(s3path) { 124 return S3API, false, s3path 125 } 126 if strings.HasPrefix(s3path, "s3:") { 127 fixed := s3re1.FindStringSubmatch(s3path) 128 return S3API, true, "s3://" + fixed[1] 129 } 130 if strings.HasPrefix(s3path, "s:") { 131 fixed := s3re2.FindStringSubmatch(s3path) 132 return S3API, true, "s3://" + fixed[1] 133 } 134 if strings.HasPrefix(s3path, "s3/") { 135 fixed := s3re3.FindStringSubmatch(s3path) 136 return S3API, true, "s3://" + fixed[1] 137 } 138 return LocalAPI, false, s3path 139 }