github.com/viant/toolbox@v0.34.5/storage/http_service.go (about)

     1  package storage
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/pkg/errors"
     6  	"github.com/viant/toolbox"
     7  	"github.com/viant/toolbox/cred"
     8  	"io"
     9  	"io/ioutil"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"path"
    14  	"path/filepath"
    15  	"strings"
    16  	"time"
    17  )
    18  
    19  //httpStorageService represents basic http storage service (only limited listing and full download are supported)
    20  type httpStorageService struct {
    21  	Credential *cred.Config
    22  }
    23  
    24  //HTTPClientProvider represents http client provider
    25  var HTTPClientProvider = func() (*http.Client, error) {
    26  	return toolbox.NewHttpClient(&toolbox.HttpOptions{Key: "MaxIdleConns", Value: 0})
    27  }
    28  
    29  func (s *httpStorageService) addCredentialToURLIfNeeded(URL string) string {
    30  	if s.Credential == nil || s.Credential.Password == "" || s.Credential.Username == "" {
    31  		return URL
    32  	}
    33  	prasedURL, err := url.Parse(URL)
    34  	if err != nil {
    35  		return URL
    36  	}
    37  	if prasedURL.User != nil {
    38  		return URL
    39  	}
    40  	return strings.Replace(URL, "://", fmt.Sprintf("://%v:%v@", s.Credential.Username, s.Credential.Password), 1)
    41  }
    42  
    43  type hRef struct {
    44  	URL   string
    45  	Value string
    46  }
    47  
    48  func extractLinks(body string) []*hRef {
    49  	var result = make([]*hRef, 0)
    50  	var linkContents = strings.Split(string(body), "href=\"")
    51  	for i := 1; i < len(linkContents); i++ {
    52  		var linkContent = linkContents[i]
    53  		linkEndPosition := strings.Index(linkContent, "\"")
    54  		if linkEndPosition == -1 {
    55  			continue
    56  		}
    57  		linkHref := string(linkContent[:linkEndPosition])
    58  		var content = ""
    59  		contentStartPosition := strings.Index(linkContent, ">")
    60  		if contentStartPosition != 1 {
    61  			content = string(linkContent[contentStartPosition+1:])
    62  			contentEndPosition := strings.Index(content, "<")
    63  			if contentEndPosition != -1 {
    64  				content = string(content[:contentEndPosition])
    65  			}
    66  		}
    67  		link := &hRef{
    68  			URL:   linkHref,
    69  			Value: strings.Trim(content, " \t\r\n"),
    70  		}
    71  		result = append(result, link)
    72  
    73  	}
    74  	return result
    75  }
    76  
    77  //List returns a list of object for supplied url
    78  func (s *httpStorageService) List(URL string) ([]Object, error) {
    79  	listURL := s.addCredentialToURLIfNeeded(URL)
    80  	client, err := HTTPClientProvider()
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	response, err := client.Get(listURL)
    85  
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	body, err := ioutil.ReadAll(response.Body)
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	now := time.Now()
    96  	contentType := response.Header.Get("Content-Type")
    97  	var result = make([]Object, 0)
    98  
    99  	if response.Status != "200 OK" {
   100  		return nil, fmt.Errorf("Invalid response code: %v", response.Status)
   101  	}
   102  
   103  	isGitUrl := strings.Contains(URL, "github.")
   104  	isPublicGit := strings.Contains(URL, "github.com")
   105  	if strings.Contains(contentType, "text/html") {
   106  
   107  		links := extractLinks(string(body))
   108  		var indexedLinks = map[string]bool{}
   109  		if isGitUrl {
   110  
   111  			for _, link := range links {
   112  				if !((strings.Contains(link.URL, "/blob/") || strings.Contains(link.URL, "/tree/")) && strings.HasSuffix(link.URL, link.Value)) {
   113  					continue
   114  				}
   115  				linkType := StorageObjectContentType
   116  				_, name := toolbox.URLSplit(link.URL)
   117  				if path.Ext(name) == "" {
   118  					linkType = StorageObjectFolderType
   119  				}
   120  
   121  				baseURL := toolbox.URLBase(URL)
   122  
   123  				objectURL := link.URL
   124  				if !strings.Contains(objectURL, baseURL) {
   125  					objectURL = toolbox.URLPathJoin(baseURL, link.URL)
   126  				}
   127  
   128  				if linkType == StorageObjectContentType && strings.Contains(objectURL, "/master/") {
   129  					objectURL = strings.Replace(objectURL, "/blob/", "/", 1)
   130  					if isPublicGit {
   131  						objectURL = strings.Replace(objectURL, "github.com", "raw.githubusercontent.com", 1)
   132  					} else {
   133  						objectURL = strings.Replace(objectURL, ".com/", ".com/raw/", 1)
   134  					}
   135  				}
   136  				if linkType == StorageObjectContentType && !strings.Contains(objectURL, "raw") {
   137  					continue
   138  				}
   139  				if _, ok := indexedLinks[objectURL]; ok {
   140  					continue
   141  				}
   142  				storageObject := newHttpFileObject(objectURL, linkType, nil, now, 1)
   143  				indexedLinks[objectURL] = true
   144  				result = append(result, storageObject)
   145  			}
   146  
   147  		} else {
   148  
   149  			for _, link := range links {
   150  				if link.URL == "" || strings.Contains(link.URL, ":") || strings.HasPrefix(link.URL, "#") || strings.HasPrefix(link.URL, "?") || strings.HasPrefix(link.URL, ".") || strings.HasPrefix(link.URL, "/") {
   151  					continue
   152  				}
   153  				linkType := StorageObjectContentType
   154  				if strings.HasSuffix(link.URL, "/") {
   155  					linkType = StorageObjectFolderType
   156  				}
   157  				objectURL := toolbox.URLPathJoin(URL, link.URL)
   158  				storageObject := newHttpFileObject(objectURL, linkType, nil, now, 1)
   159  				result = append(result, storageObject)
   160  			}
   161  		}
   162  	}
   163  
   164  	if strings.Contains(string(body), ">..<") {
   165  		return result, err
   166  	}
   167  	storageObject := newHttpFileObject(URL, StorageObjectContentType, nil, now, response.ContentLength)
   168  	result = append(result, storageObject)
   169  	return result, err
   170  }
   171  
   172  //Exists returns true if resource exists
   173  func (s *httpStorageService) Exists(URL string) (bool, error) {
   174  	client, err := HTTPClientProvider()
   175  	if err != nil {
   176  		return false, err
   177  	}
   178  	response, err := client.Get(URL)
   179  	if err != nil {
   180  		return false, err
   181  	}
   182  	return response.StatusCode == 200, nil
   183  }
   184  
   185  //Object returns a Object for supplied url
   186  func (s *httpStorageService) StorageObject(URL string) (Object, error) {
   187  	objects, err := s.List(URL)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	if len(objects) == 0 {
   192  		return nil, fmt.Errorf("resource  not found: %v", URL)
   193  	}
   194  
   195  	return objects[0], nil
   196  }
   197  
   198  //Download returns reader for downloaded storage object
   199  func (s *httpStorageService) Download(object Object) (io.ReadCloser, error) {
   200  	client, err := HTTPClientProvider()
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	response, err := client.Get(s.addCredentialToURLIfNeeded(object.URL()))
   205  	return response.Body, err
   206  }
   207  
   208  //Upload uploads provided reader content for supplied url.
   209  func (s *httpStorageService) Upload(URL string, reader io.Reader) error {
   210  	return errors.New("unsupported")
   211  }
   212  
   213  //Upload uploads provided reader content for supplied url.
   214  func (s *httpStorageService) UploadWithMode(URL string, mode os.FileMode, reader io.Reader) error {
   215  	return errors.New("unsupported")
   216  }
   217  
   218  func (s *httpStorageService) Register(schema string, service Service) error {
   219  	return errors.New("unsupported")
   220  }
   221  
   222  //Delete removes passed in storage object
   223  func (s *httpStorageService) Delete(object Object) error {
   224  	fileName := toolbox.Filename(object.URL())
   225  	return os.Remove(fileName)
   226  }
   227  
   228  func (s *httpStorageService) Close() error {
   229  	return nil
   230  }
   231  
   232  //DownloadWithURL downloads content for passed in object URL
   233  func (s *httpStorageService) DownloadWithURL(URL string) (io.ReadCloser, error) {
   234  	object, err := s.StorageObject(URL)
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  	return s.Download(object)
   239  }
   240  
   241  func NewHttpStorageService(credential *cred.Config) Service {
   242  	return &httpStorageService{
   243  		Credential: credential,
   244  	}
   245  }
   246  
   247  type httpStorageObject struct {
   248  	*AbstractObject
   249  }
   250  
   251  func (o *httpStorageObject) Unwrap(target interface{}) error {
   252  	return fmt.Errorf("unsuported target %T", target)
   253  }
   254  
   255  func newHttpFileObject(url string, objectType int, source interface{}, lastModified time.Time, size int64) Object {
   256  	var isDir = objectType == StorageObjectFolderType
   257  	var _, name = toolbox.URLSplit(url)
   258  	var fileMode, _ = NewFileMode("-r--r--r--")
   259  	if isDir {
   260  		fileMode, _ = NewFileMode("dr--r--r--")
   261  	}
   262  	fileInfo := NewFileInfo(name, size, fileMode, lastModified, isDir)
   263  	abstract := NewAbstractStorageObject(url, source, fileInfo)
   264  	result := &httpStorageObject{
   265  		AbstractObject: abstract,
   266  	}
   267  	result.AbstractObject.Object = result
   268  	return result
   269  }
   270  
   271  const HttpProviderScheme = "http"
   272  const HttpsProviderScheme = "https"
   273  
   274  func init() {
   275  	Registry().Registry[HttpsProviderScheme] = httpServiceProvider
   276  	Registry().Registry[HttpProviderScheme] = httpServiceProvider
   277  
   278  }
   279  
   280  func httpServiceProvider(credentialFile string) (Service, error) {
   281  
   282  	if credentialFile == "" {
   283  		return NewHttpStorageService(nil), nil
   284  	}
   285  
   286  	if !strings.HasPrefix(credentialFile, "/") {
   287  		dir, _ := filepath.Abs(filepath.Dir(os.Args[0]))
   288  		credentialFile = path.Join(dir, credentialFile)
   289  	}
   290  	config, err := cred.NewConfig(credentialFile)
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	return NewHttpStorageService(config), nil
   295  }