github.com/viant/toolbox@v0.34.5/storage/http_service.go (about) 1 package storage 2 3 import ( 4 "fmt" 5 "github.com/pkg/errors" 6 "github.com/viant/toolbox" 7 "github.com/viant/toolbox/cred" 8 "io" 9 "io/ioutil" 10 "net/http" 11 "net/url" 12 "os" 13 "path" 14 "path/filepath" 15 "strings" 16 "time" 17 ) 18 19 //httpStorageService represents basic http storage service (only limited listing and full download are supported) 20 type httpStorageService struct { 21 Credential *cred.Config 22 } 23 24 //HTTPClientProvider represents http client provider 25 var HTTPClientProvider = func() (*http.Client, error) { 26 return toolbox.NewHttpClient(&toolbox.HttpOptions{Key: "MaxIdleConns", Value: 0}) 27 } 28 29 func (s *httpStorageService) addCredentialToURLIfNeeded(URL string) string { 30 if s.Credential == nil || s.Credential.Password == "" || s.Credential.Username == "" { 31 return URL 32 } 33 prasedURL, err := url.Parse(URL) 34 if err != nil { 35 return URL 36 } 37 if prasedURL.User != nil { 38 return URL 39 } 40 return strings.Replace(URL, "://", fmt.Sprintf("://%v:%v@", s.Credential.Username, s.Credential.Password), 1) 41 } 42 43 type hRef struct { 44 URL string 45 Value string 46 } 47 48 func extractLinks(body string) []*hRef { 49 var result = make([]*hRef, 0) 50 var linkContents = strings.Split(string(body), "href=\"") 51 for i := 1; i < len(linkContents); i++ { 52 var linkContent = linkContents[i] 53 linkEndPosition := strings.Index(linkContent, "\"") 54 if linkEndPosition == -1 { 55 continue 56 } 57 linkHref := string(linkContent[:linkEndPosition]) 58 var content = "" 59 contentStartPosition := strings.Index(linkContent, ">") 60 if contentStartPosition != 1 { 61 content = string(linkContent[contentStartPosition+1:]) 62 contentEndPosition := strings.Index(content, "<") 63 if contentEndPosition != -1 { 64 content = string(content[:contentEndPosition]) 65 } 66 } 67 link := &hRef{ 68 URL: linkHref, 69 Value: strings.Trim(content, " \t\r\n"), 70 } 71 result = append(result, link) 72 73 } 74 return result 75 } 76 77 //List returns a list of object for supplied url 78 func (s *httpStorageService) List(URL string) ([]Object, error) { 79 listURL := s.addCredentialToURLIfNeeded(URL) 80 client, err := HTTPClientProvider() 81 if err != nil { 82 return nil, err 83 } 84 response, err := client.Get(listURL) 85 86 if err != nil { 87 return nil, err 88 } 89 90 body, err := ioutil.ReadAll(response.Body) 91 if err != nil { 92 return nil, err 93 } 94 95 now := time.Now() 96 contentType := response.Header.Get("Content-Type") 97 var result = make([]Object, 0) 98 99 if response.Status != "200 OK" { 100 return nil, fmt.Errorf("Invalid response code: %v", response.Status) 101 } 102 103 isGitUrl := strings.Contains(URL, "github.") 104 isPublicGit := strings.Contains(URL, "github.com") 105 if strings.Contains(contentType, "text/html") { 106 107 links := extractLinks(string(body)) 108 var indexedLinks = map[string]bool{} 109 if isGitUrl { 110 111 for _, link := range links { 112 if !((strings.Contains(link.URL, "/blob/") || strings.Contains(link.URL, "/tree/")) && strings.HasSuffix(link.URL, link.Value)) { 113 continue 114 } 115 linkType := StorageObjectContentType 116 _, name := toolbox.URLSplit(link.URL) 117 if path.Ext(name) == "" { 118 linkType = StorageObjectFolderType 119 } 120 121 baseURL := toolbox.URLBase(URL) 122 123 objectURL := link.URL 124 if !strings.Contains(objectURL, baseURL) { 125 objectURL = toolbox.URLPathJoin(baseURL, link.URL) 126 } 127 128 if linkType == StorageObjectContentType && strings.Contains(objectURL, "/master/") { 129 objectURL = strings.Replace(objectURL, "/blob/", "/", 1) 130 if isPublicGit { 131 objectURL = strings.Replace(objectURL, "github.com", "raw.githubusercontent.com", 1) 132 } else { 133 objectURL = strings.Replace(objectURL, ".com/", ".com/raw/", 1) 134 } 135 } 136 if linkType == StorageObjectContentType && !strings.Contains(objectURL, "raw") { 137 continue 138 } 139 if _, ok := indexedLinks[objectURL]; ok { 140 continue 141 } 142 storageObject := newHttpFileObject(objectURL, linkType, nil, now, 1) 143 indexedLinks[objectURL] = true 144 result = append(result, storageObject) 145 } 146 147 } else { 148 149 for _, link := range links { 150 if link.URL == "" || strings.Contains(link.URL, ":") || strings.HasPrefix(link.URL, "#") || strings.HasPrefix(link.URL, "?") || strings.HasPrefix(link.URL, ".") || strings.HasPrefix(link.URL, "/") { 151 continue 152 } 153 linkType := StorageObjectContentType 154 if strings.HasSuffix(link.URL, "/") { 155 linkType = StorageObjectFolderType 156 } 157 objectURL := toolbox.URLPathJoin(URL, link.URL) 158 storageObject := newHttpFileObject(objectURL, linkType, nil, now, 1) 159 result = append(result, storageObject) 160 } 161 } 162 } 163 164 if strings.Contains(string(body), ">..<") { 165 return result, err 166 } 167 storageObject := newHttpFileObject(URL, StorageObjectContentType, nil, now, response.ContentLength) 168 result = append(result, storageObject) 169 return result, err 170 } 171 172 //Exists returns true if resource exists 173 func (s *httpStorageService) Exists(URL string) (bool, error) { 174 client, err := HTTPClientProvider() 175 if err != nil { 176 return false, err 177 } 178 response, err := client.Get(URL) 179 if err != nil { 180 return false, err 181 } 182 return response.StatusCode == 200, nil 183 } 184 185 //Object returns a Object for supplied url 186 func (s *httpStorageService) StorageObject(URL string) (Object, error) { 187 objects, err := s.List(URL) 188 if err != nil { 189 return nil, err 190 } 191 if len(objects) == 0 { 192 return nil, fmt.Errorf("resource not found: %v", URL) 193 } 194 195 return objects[0], nil 196 } 197 198 //Download returns reader for downloaded storage object 199 func (s *httpStorageService) Download(object Object) (io.ReadCloser, error) { 200 client, err := HTTPClientProvider() 201 if err != nil { 202 return nil, err 203 } 204 response, err := client.Get(s.addCredentialToURLIfNeeded(object.URL())) 205 return response.Body, err 206 } 207 208 //Upload uploads provided reader content for supplied url. 209 func (s *httpStorageService) Upload(URL string, reader io.Reader) error { 210 return errors.New("unsupported") 211 } 212 213 //Upload uploads provided reader content for supplied url. 214 func (s *httpStorageService) UploadWithMode(URL string, mode os.FileMode, reader io.Reader) error { 215 return errors.New("unsupported") 216 } 217 218 func (s *httpStorageService) Register(schema string, service Service) error { 219 return errors.New("unsupported") 220 } 221 222 //Delete removes passed in storage object 223 func (s *httpStorageService) Delete(object Object) error { 224 fileName := toolbox.Filename(object.URL()) 225 return os.Remove(fileName) 226 } 227 228 func (s *httpStorageService) Close() error { 229 return nil 230 } 231 232 //DownloadWithURL downloads content for passed in object URL 233 func (s *httpStorageService) DownloadWithURL(URL string) (io.ReadCloser, error) { 234 object, err := s.StorageObject(URL) 235 if err != nil { 236 return nil, err 237 } 238 return s.Download(object) 239 } 240 241 func NewHttpStorageService(credential *cred.Config) Service { 242 return &httpStorageService{ 243 Credential: credential, 244 } 245 } 246 247 type httpStorageObject struct { 248 *AbstractObject 249 } 250 251 func (o *httpStorageObject) Unwrap(target interface{}) error { 252 return fmt.Errorf("unsuported target %T", target) 253 } 254 255 func newHttpFileObject(url string, objectType int, source interface{}, lastModified time.Time, size int64) Object { 256 var isDir = objectType == StorageObjectFolderType 257 var _, name = toolbox.URLSplit(url) 258 var fileMode, _ = NewFileMode("-r--r--r--") 259 if isDir { 260 fileMode, _ = NewFileMode("dr--r--r--") 261 } 262 fileInfo := NewFileInfo(name, size, fileMode, lastModified, isDir) 263 abstract := NewAbstractStorageObject(url, source, fileInfo) 264 result := &httpStorageObject{ 265 AbstractObject: abstract, 266 } 267 result.AbstractObject.Object = result 268 return result 269 } 270 271 const HttpProviderScheme = "http" 272 const HttpsProviderScheme = "https" 273 274 func init() { 275 Registry().Registry[HttpsProviderScheme] = httpServiceProvider 276 Registry().Registry[HttpProviderScheme] = httpServiceProvider 277 278 } 279 280 func httpServiceProvider(credentialFile string) (Service, error) { 281 282 if credentialFile == "" { 283 return NewHttpStorageService(nil), nil 284 } 285 286 if !strings.HasPrefix(credentialFile, "/") { 287 dir, _ := filepath.Abs(filepath.Dir(os.Args[0])) 288 credentialFile = path.Join(dir, credentialFile) 289 } 290 config, err := cred.NewConfig(credentialFile) 291 if err != nil { 292 return nil, err 293 } 294 return NewHttpStorageService(config), nil 295 }