go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/artifactcontent/server.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package artifactcontent
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net/http"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"cloud.google.com/go/spanner"
    26  	"google.golang.org/genproto/googleapis/bytestream"
    27  	"google.golang.org/grpc/codes"
    28  
    29  	"go.chromium.org/luci/common/clock"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/common/retry/transient"
    33  	"go.chromium.org/luci/grpc/appstatus"
    34  	"go.chromium.org/luci/grpc/grpcutil"
    35  	"go.chromium.org/luci/resultdb/internal/artifacts"
    36  	"go.chromium.org/luci/resultdb/internal/invocations"
    37  	"go.chromium.org/luci/resultdb/internal/spanutil"
    38  	"go.chromium.org/luci/resultdb/pbutil"
    39  	"go.chromium.org/luci/server/router"
    40  	"go.chromium.org/luci/server/span"
    41  	"go.chromium.org/luci/server/tokens"
    42  )
    43  
    44  var artifactNameTokenKind = tokens.TokenKind{
    45  	Algo:       tokens.TokenAlgoHmacSHA256,
    46  	Expiration: time.Hour,
    47  	SecretKey:  "artifact_name",
    48  	Version:    1,
    49  }
    50  
    51  // HostnameProvider returns a hostname to use in generated signed URLs.
    52  //
    53  // As input it accepts `host` metadata value of the GetArtifacts etc. requests.
    54  // It may be an empty string. HostnameProvider must return some host name in
    55  // this case too.
    56  type HostnameProvider func(requestHost string) string
    57  
    58  // Server can serve artifact content, and generate signed URLs to the content.
    59  type Server struct {
    60  	// Use http:// (not https://) for generated URLs.
    61  	InsecureURLs bool
    62  
    63  	// Returns a hostname to use in generated signed URLs.
    64  	HostnameProvider HostnameProvider
    65  
    66  	// Reads a blob from RBE-CAS.
    67  	ReadCASBlob func(ctx context.Context, req *bytestream.ReadRequest) (bytestream.ByteStream_ReadClient, error)
    68  
    69  	// Full name of the RBE-CAS instance used to store artifacts,
    70  	// e.g. "projects/luci-resultdb/instances/artifacts".
    71  	RBECASInstanceName string
    72  }
    73  
    74  // InstallHandlers installs handlers to serve artifact content.
    75  //
    76  // May be called multiple times to install the handler into multiple virtual
    77  // hosts.
    78  func (s *Server) InstallHandlers(r *router.Router) {
    79  	// TODO(nodir): use OAuth2.0 middleware to allow OAuth credentials.
    80  
    81  	// Ideally we use a more narrow pattern, but we cannot because of
    82  	// https://github.com/julienschmidt/httprouter/issues/208
    83  	// This is triggered by URL-escaped test IDs.
    84  	r.GET("/invocations/*rest", nil, s.handleGET)
    85  	r.OPTIONS("/invocations/*rest", nil, s.handleOPTIONS)
    86  }
    87  
    88  func (s *Server) handleGET(c *router.Context) {
    89  	req := &contentRequest{Server: s, w: c.Writer}
    90  	req.handle(c)
    91  }
    92  
    93  func (s *Server) handleOPTIONS(c *router.Context) {
    94  	s.setAccessControlHeaders(c, true)
    95  	c.Writer.WriteHeader(http.StatusOK)
    96  }
    97  
    98  // setAccessControlHeaders allows CORS.
    99  func (s *Server) setAccessControlHeaders(c *router.Context, preflight bool) {
   100  	h := c.Writer.Header()
   101  	h.Add("Access-Control-Allow-Origin", "*")
   102  	h.Add("Access-Control-Allow-Credentials", "false")
   103  
   104  	if preflight {
   105  		h.Add("Access-Control-Allow-Headers", "Origin, Authorization")
   106  		h.Add("Access-Control-Allow-Methods", "OPTIONS, GET")
   107  	}
   108  }
   109  
   110  type contentRequest struct {
   111  	*Server
   112  	w http.ResponseWriter
   113  
   114  	artifactName string
   115  
   116  	invID      invocations.ID
   117  	parentID   string
   118  	artifactID string
   119  	limit      int64 // Maximum size of the artifact, in bytes.
   120  
   121  	contentType spanner.NullString
   122  	size        spanner.NullInt64
   123  }
   124  
   125  func (r *contentRequest) handle(c *router.Context) {
   126  	r.setAccessControlHeaders(c, false)
   127  
   128  	if err := r.parseRequest(c.Request.Context(), c.Request); err != nil {
   129  		r.sendError(c.Request.Context(), appstatus.BadRequest(err))
   130  		return
   131  	}
   132  
   133  	if err := r.checkAccess(c.Request.Context(), c.Request); err != nil {
   134  		r.sendError(c.Request.Context(), err)
   135  		return
   136  	}
   137  
   138  	// Read the state from database.
   139  	var rbeCASHash spanner.NullString
   140  	key := r.invID.Key(r.parentID, r.artifactID)
   141  	err := spanutil.ReadRow(span.Single(c.Request.Context()), "Artifacts", key, map[string]any{
   142  		"ContentType": &r.contentType,
   143  		"Size":        &r.size,
   144  		"RBECASHash":  &rbeCASHash,
   145  	})
   146  
   147  	// Check the error and write content to the response body.
   148  	switch {
   149  	case spanner.ErrCode(err) == codes.NotFound:
   150  		err = appstatus.Attachf(err, codes.NotFound, "%s not found", r.artifactName)
   151  		r.sendError(c.Request.Context(), err)
   152  
   153  	case err != nil:
   154  		r.sendError(c.Request.Context(), err)
   155  
   156  	case rbeCASHash.Valid:
   157  		mw := NewMetricsWriter(c)
   158  		defer mw.Download(c.Request.Context(), r.size.Int64)
   159  		r.handleRBECASContent(c, rbeCASHash.StringVal)
   160  
   161  	default:
   162  		err = appstatus.Attachf(err, codes.NotFound, "%s not found", r.artifactName)
   163  		r.sendError(c.Request.Context(), err)
   164  	}
   165  }
   166  
   167  func (r *contentRequest) parseRequest(ctx context.Context, req *http.Request) error {
   168  	// We should not use URL.Path because it is important to preserve escaping
   169  	// of test IDs.
   170  	r.artifactName = strings.Trim(req.URL.EscapedPath(), "/")
   171  
   172  	invID, testID, resultID, artifactID, err := pbutil.ParseArtifactName(r.artifactName)
   173  	if err != nil {
   174  		return errors.Annotate(err, "invalid artifact name %q", r.artifactName).Err()
   175  	}
   176  	r.invID = invocations.ID(invID)
   177  	r.parentID = artifacts.ParentID(testID, resultID)
   178  	r.artifactID = artifactID
   179  
   180  	limitStr := req.URL.Query().Get("n")
   181  	if limitStr == "" {
   182  		return nil
   183  	}
   184  
   185  	r.limit, err = strconv.ParseInt(limitStr, 10, 64)
   186  	if err != nil || r.limit <= 0 {
   187  		return errors.Annotate(err, "query parmeter n must be a positive integer, but got %q", limitStr).Err()
   188  	}
   189  	return nil
   190  }
   191  
   192  // checkAccess ensures that the requester has access to the artifact content.
   193  //
   194  // Checks access using signed token query string param.
   195  func (r *contentRequest) checkAccess(ctx context.Context, req *http.Request) error {
   196  	token := req.URL.Query().Get("token")
   197  	if token == "" {
   198  		return appstatus.Errorf(codes.Unauthenticated, "no token")
   199  	}
   200  
   201  	_, err := artifactNameTokenKind.Validate(ctx, token, []byte(r.artifactName))
   202  	if !transient.Tag.In(err) {
   203  		return appstatus.Attachf(err, codes.PermissionDenied, "invalid token")
   204  	}
   205  	return err
   206  }
   207  
   208  func (r *contentRequest) sendError(ctx context.Context, err error) {
   209  	if err == nil {
   210  		panic("err is nil")
   211  	}
   212  	st, ok := appstatus.Get(err)
   213  	httpCode := grpcutil.CodeStatus(st.Code())
   214  	if !ok || httpCode == http.StatusInternalServerError {
   215  		logging.Errorf(ctx, "responding with: %s", err)
   216  		http.Error(r.w, "Internal server error", http.StatusInternalServerError)
   217  	} else {
   218  		logging.Warningf(ctx, "responding with: %s", st.Message())
   219  		http.Error(r.w, st.Message(), httpCode)
   220  	}
   221  }
   222  
   223  func (r *contentRequest) writeContentHeaders() {
   224  	if r.contentType.Valid {
   225  		r.w.Header().Set("Content-Type", r.contentType.StringVal)
   226  	}
   227  	if r.size.Valid {
   228  		length := r.size.Int64
   229  		if r.limit > 0 && r.limit < length {
   230  			length = r.limit
   231  		}
   232  		r.w.Header().Set("Content-Length", strconv.FormatInt(length, 10))
   233  	}
   234  }
   235  
   236  // GenerateSignedURL generates a signed HTTPS URL back to this server.
   237  // The returned token works only with the same artifact name.
   238  func (s *Server) GenerateSignedURL(ctx context.Context, requestHost, artifactName string) (url string, expiration time.Time, err error) {
   239  	now := clock.Now(ctx).UTC()
   240  
   241  	tok, err := artifactNameTokenKind.Generate(ctx, []byte(artifactName), nil, artifactNameTokenKind.Expiration)
   242  	if err != nil {
   243  		return "", time.Time{}, err
   244  	}
   245  
   246  	scheme := "https"
   247  	if s.InsecureURLs {
   248  		scheme = "http"
   249  	}
   250  
   251  	// Derive the hostname for generated URL from the request host. This is used
   252  	// to make sure GetArtifacts requests that hit "canary.*" API host also get
   253  	// "canary.*" artifact links.
   254  	hostname := s.HostnameProvider(requestHost)
   255  	if hostname == "" {
   256  		return "", time.Time{}, errors.Reason("empty content hostname").Err()
   257  	}
   258  
   259  	// Using url.URL here is hard because it escapes artifact name which we don't want.
   260  	url = fmt.Sprintf("%s://%s/%s?token=%s", scheme, hostname, artifactName, tok)
   261  	expiration = now.Add(artifactNameTokenKind.Expiration)
   262  	return
   263  }