github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/gateway/sig/v2.go (about)

     1  package sig
     2  
     3  import (
     4  	"crypto/hmac"
     5  	"crypto/sha1" //nolint:gosec
     6  	"encoding/base64"
     7  	"fmt"
     8  	"net/http"
     9  	"net/url"
    10  	"regexp"
    11  	"sort"
    12  	"strings"
    13  
    14  	"github.com/treeverse/lakefs/pkg/auth/model"
    15  	"github.com/treeverse/lakefs/pkg/gateway/errors"
    16  	"github.com/treeverse/lakefs/pkg/httputil"
    17  	"github.com/treeverse/lakefs/pkg/logging"
    18  )
    19  
    20  const (
    21  	v2authHeaderName = "Authorization"
    22  )
    23  
    24  var (
    25  	V2AuthHeaderRegexp = regexp.MustCompile(`AWS (?P<AccessKeyId>.{3,20}):(?P<Signature>[A-Za-z0-9+/=]+)`)
    26  	// Both "interesting" arrays are sorted. so once we extract relevant items by looping on them = the result is sorted
    27  	interestingHeaders   = [...]string{"content-md5", "content-type", "date"}
    28  	interestingResources []string // initialized and sorted by the init function
    29  
    30  )
    31  
    32  //nolint:gochecknoinits
    33  func init() {
    34  	interestingResourcesContainer := []string{
    35  		"accelerate", "acl", "copy-source", "cors", "defaultObjectAcl",
    36  		"location", "logging", "partNumber", "policy",
    37  		"requestPayment", "torrent",
    38  		"versioning", "versionId", "versions", "website",
    39  		"uploads", "uploadId", "response-content-type",
    40  		"response-content-language", "response-expires",
    41  		"response-cache-control", "response-content-disposition",
    42  		"response-content-encoding", "delete", "lifecycle",
    43  		"tagging", "restore", "storageClass", "notification",
    44  		"replication", "analytics", "metrics",
    45  		"inventory", "select", "select-type",
    46  	}
    47  	sort.Strings(interestingResourcesContainer)
    48  	// check for duplicates in the array - if it happens it is a programmer error that will happen only when that
    49  	// query parameter is used - may be very hard to find.
    50  	tempMap := map[string]bool{}
    51  	var sortedArray []string
    52  	for _, word := range interestingResourcesContainer {
    53  		if _, ok := tempMap[word]; ok {
    54  			logging.ContextUnavailable().
    55  				WithField("word", word).
    56  				Warn("appears twice in sig\v2.go array interestingResourcesContainer. a programmer error")
    57  		} else {
    58  			tempMap[word] = true
    59  		}
    60  	}
    61  	for key := range tempMap {
    62  		sortedArray = append(sortedArray, key)
    63  	}
    64  	sort.Strings(sortedArray)
    65  	interestingResources = sortedArray
    66  }
    67  
    68  type v2Context struct {
    69  	accessKeyID string
    70  	signature   []byte
    71  }
    72  
    73  func (a v2Context) GetAccessKeyID() string {
    74  	return a.accessKeyID
    75  }
    76  
    77  type V2SigAuthenticator struct {
    78  	req        *http.Request
    79  	bareDomain string
    80  	sigCtx     v2Context
    81  }
    82  
    83  func NewV2SigAuthenticator(r *http.Request, bareDomain string) *V2SigAuthenticator {
    84  	return &V2SigAuthenticator{
    85  		req:        r,
    86  		bareDomain: bareDomain,
    87  	}
    88  }
    89  
    90  func (a *V2SigAuthenticator) Parse() (SigContext, error) {
    91  	ctx := a.req.Context()
    92  	headerValue := a.req.Header.Get(v2authHeaderName)
    93  	if len(headerValue) > 0 {
    94  		match := V2AuthHeaderRegexp.FindStringSubmatch(headerValue)
    95  		if len(match) == 0 {
    96  			logging.FromContext(ctx).Error("log header does not match v2 structure")
    97  			return nil, ErrHeaderMalformed
    98  		}
    99  		result := make(map[string]string)
   100  		for i, name := range V2AuthHeaderRegexp.SubexpNames() {
   101  			if i != 0 && name != "" {
   102  				result[name] = match[i]
   103  			}
   104  		}
   105  		sigCtx := v2Context{
   106  			accessKeyID: result["AccessKeyId"],
   107  		}
   108  		// parse signature
   109  		sig, err := base64.StdEncoding.DecodeString(result["Signature"])
   110  		if err != nil {
   111  			logging.FromContext(ctx).Error("log header does not match v2 structure (isn't proper base64)")
   112  			return nil, ErrHeaderMalformed
   113  		}
   114  		sigCtx.signature = sig
   115  		a.sigCtx = sigCtx
   116  		return sigCtx, nil
   117  	}
   118  	return nil, ErrHeaderMalformed
   119  }
   120  
   121  func headerValueToString(val []string) string {
   122  	var returnStr string
   123  	for i, item := range val {
   124  		if i == 0 {
   125  			returnStr = strings.TrimSpace(item)
   126  		} else {
   127  			returnStr += "," + strings.TrimSpace(item)
   128  		}
   129  	}
   130  	return returnStr
   131  }
   132  
   133  func canonicalStandardHeaders(headers http.Header) string {
   134  	var returnStr string
   135  	for _, hoi := range interestingHeaders {
   136  		foundHoi := false
   137  		for key, val := range headers {
   138  			if len(val) > 0 && strings.ToLower(key) == hoi {
   139  				returnStr += headerValueToString(val) + "\n"
   140  				foundHoi = true
   141  				break
   142  			}
   143  		}
   144  		if !foundHoi {
   145  			returnStr += "\n"
   146  		}
   147  	}
   148  	return returnStr
   149  }
   150  
   151  func canonicalCustomHeaders(headers http.Header) string {
   152  	var returnStr string
   153  	var foundKeys []string
   154  	for key := range headers {
   155  		if strings.HasPrefix(strings.ToLower(key), "x-amz-") {
   156  			foundKeys = append(foundKeys, key)
   157  		}
   158  	}
   159  	if len(foundKeys) == 0 {
   160  		return returnStr
   161  	}
   162  	sort.Strings(foundKeys)
   163  	for _, key := range foundKeys {
   164  		returnStr += fmt.Sprint(strings.ToLower(key), ":", headerValueToString(headers[key]), "\n")
   165  	}
   166  	return returnStr
   167  }
   168  
   169  func canonicalResources(query url.Values, authPath string) string {
   170  	var foundResources []string
   171  	var foundResourcesStr string
   172  	lowercaseQuery := make(url.Values)
   173  	if len(query) > 0 {
   174  		for key, val := range query {
   175  			lowercaseQuery[strings.ToLower(key)] = val
   176  		}
   177  		for _, r := range interestingResources { // the resulting array will be sorted by resource name, because interesting resources array is sorted
   178  			val, ok := lowercaseQuery[r]
   179  			if ok {
   180  				newValue := r
   181  				if len(strings.Join(val, "")) > 0 {
   182  					newValue += "=" + strings.Join(val, ",")
   183  				}
   184  				foundResources = append(foundResources, newValue)
   185  			}
   186  		}
   187  		if len(foundResources) > 0 {
   188  			foundResourcesStr = "?" + strings.Join(foundResources, "&")
   189  		}
   190  	}
   191  	return authPath + foundResourcesStr
   192  }
   193  
   194  func canonicalString(method string, query url.Values, path string, headers http.Header) string {
   195  	cs := strings.ToUpper(method) + "\n"
   196  	cs += canonicalStandardHeaders(headers)
   197  	cs += canonicalCustomHeaders(headers)
   198  	cs += canonicalResources(query, path)
   199  	return cs
   200  }
   201  
   202  func signCanonicalString(msg string, signature []byte) (digest []byte) {
   203  	h := hmac.New(sha1.New, signature)
   204  	_, _ = h.Write([]byte(msg))
   205  	digest = h.Sum(nil)
   206  	return
   207  }
   208  
   209  func buildPath(host string, bareDomain string, path string) string {
   210  	h := httputil.HostOnly(host)
   211  	b := httputil.HostOnly(bareDomain)
   212  	if h == b {
   213  		return path
   214  	}
   215  	bareSuffix := "." + b
   216  	if strings.HasSuffix(h, bareSuffix) {
   217  		prePath := strings.TrimSuffix(h, bareSuffix)
   218  		return "/" + prePath + path
   219  	}
   220  	// bareDomain is not suffix of the path probably a bug
   221  	logging.ContextUnavailable().
   222  		WithFields(logging.Fields{"request_host": host, "bare_domain": bareDomain}).
   223  		Error("request host mismatch")
   224  	return ""
   225  }
   226  
   227  func (a *V2SigAuthenticator) Verify(creds *model.Credential) error {
   228  	/*
   229  		s3 sigV2 implementation:
   230  		the s3 signature is somewhat different from general aws signature implementation.
   231  		in boto3 configuration their value is 's3' and 's3v4' respectively, while the general aws signatures are
   232  		'v2' and 'v4'.
   233  		in 2020, the GO aws sdk does not implement 's3' signature, So I will "translate" it from boto3.
   234  		source is class botocore.auth.HmacV1Auth
   235  		steps in building the string to be signed:
   236  		1. create initial string, with uppercase http method + '\n'
   237  		2. collect all required headers(in order):
   238  			- standard headers - 'content-md5', 'content-type', 'date' - if one of those does not appear, it is replaces with an
   239  			empty line '\n'. sorted and stringify
   240  			- custom headers - any header that starts with 'x-amz-'. if the header appears more than once - the values
   241  			are joined with ',' separator. sorted and stringify.
   242  			- path of the object
   243  			- QSA(Query String Arguments) - query arguments are searched for "interesting Resources".
   244  	*/
   245  
   246  	// Prefer the raw path if it exists -- *this* is what SigV2 signs
   247  	rawPath := a.req.URL.EscapedPath()
   248  
   249  	path := buildPath(a.req.Host, a.bareDomain, rawPath)
   250  	stringToSign := canonicalString(a.req.Method, a.req.URL.Query(), path, a.req.Header)
   251  	digest := signCanonicalString(stringToSign, []byte(creds.SecretAccessKey))
   252  	if !Equal(digest, a.sigCtx.signature) {
   253  		return errors.ErrSignatureDoesNotMatch
   254  	}
   255  	return nil
   256  }
   257  
   258  func (a *V2SigAuthenticator) String() string {
   259  	return "sigv2"
   260  }