go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/task/urlfetch/urlfetch.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package urlfetch implements tasks that just make HTTP calls.
    16  package urlfetch
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"net/http"
    25  	"net/url"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/golang/protobuf/proto"
    30  
    31  	"google.golang.org/api/pubsub/v1"
    32  
    33  	"go.chromium.org/luci/common/clock"
    34  	"go.chromium.org/luci/common/logging"
    35  	"go.chromium.org/luci/config/validation"
    36  	"go.chromium.org/luci/gae/service/urlfetch"
    37  	"go.chromium.org/luci/scheduler/appengine/internal"
    38  	"go.chromium.org/luci/scheduler/appengine/messages"
    39  	"go.chromium.org/luci/scheduler/appengine/task"
    40  )
    41  
    42  // TaskManager implements task.Manager interface for tasks defined with
    43  // UrlFetchTask proto message
    44  type TaskManager struct {
    45  }
    46  
    47  // Name is part of Manager interface.
    48  func (m TaskManager) Name() string {
    49  	return "url_fetch"
    50  }
    51  
    52  // ProtoMessageType is part of Manager interface.
    53  func (m TaskManager) ProtoMessageType() proto.Message {
    54  	return (*messages.UrlFetchTask)(nil)
    55  }
    56  
    57  // Traits is part of Manager interface.
    58  func (m TaskManager) Traits() task.Traits {
    59  	return task.Traits{
    60  		Multistage: false, // we don't use task.StatusRunning state
    61  	}
    62  }
    63  
    64  // ValidateProtoMessage is part of Manager interface.
    65  func (m TaskManager) ValidateProtoMessage(c *validation.Context, msg proto.Message, realmID string) {
    66  	cfg, ok := msg.(*messages.UrlFetchTask)
    67  	if !ok {
    68  		c.Errorf("wrong type %T, expecting *messages.UrlFetchTask", msg)
    69  		return
    70  	}
    71  	if cfg == nil {
    72  		c.Errorf("expecting a non-empty UrlFetchTask")
    73  		return
    74  	}
    75  
    76  	// Validate 'method' field.
    77  	// TODO(vadimsh): Add more methods (POST, PUT) when 'Body' is added.
    78  	goodMethods := map[string]bool{"GET": true}
    79  	if cfg.Method != "" && !goodMethods[cfg.Method] {
    80  		c.Errorf("unsupported HTTP method: %q", cfg.Method)
    81  	}
    82  
    83  	// Validate 'url' field.
    84  	if cfg.Url == "" {
    85  		c.Errorf("field 'url' is required")
    86  	} else {
    87  		u, err := url.Parse(cfg.Url)
    88  		if err != nil {
    89  			c.Errorf("invalid URL %q: %s", cfg.Url, err)
    90  		} else if !u.IsAbs() {
    91  			c.Errorf("not an absolute url: %q", cfg.Url)
    92  		}
    93  	}
    94  
    95  	// Validate 'timeout_sec' field. GAE task queue request deadline is 10 min, so
    96  	// limit URL fetch call duration to 8 min (giving 2 min to spare).
    97  	if cfg.TimeoutSec != 0 {
    98  		if cfg.TimeoutSec < 1 {
    99  			c.Errorf("minimum allowed 'timeout_sec' is 1 sec, got %d", cfg.TimeoutSec)
   100  		}
   101  		if cfg.TimeoutSec > 480 {
   102  			c.Errorf("maximum allowed 'timeout_sec' is 480 sec, got %d", cfg.TimeoutSec)
   103  		}
   104  	}
   105  }
   106  
   107  // LaunchTask is part of Manager interface.
   108  func (m TaskManager) LaunchTask(c context.Context, ctl task.Controller) error {
   109  	cfg := ctl.Task().(*messages.UrlFetchTask)
   110  	started := clock.Now(c)
   111  
   112  	// Defaults.
   113  	method := cfg.Method
   114  	if method == "" {
   115  		method = "GET"
   116  	}
   117  	timeout := cfg.TimeoutSec
   118  	if timeout == 0 {
   119  		timeout = 60
   120  	}
   121  
   122  	ctl.DebugLog("%s %s", method, cfg.Url)
   123  
   124  	// There must be no errors here in reality, since cfg is validated already by
   125  	// ValidateProtoMessage.
   126  	u, err := url.Parse(cfg.Url)
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	type tuple struct {
   132  		resp *http.Response
   133  		body []byte // first 4Kb of the response, for debug log
   134  		err  error
   135  	}
   136  	result := make(chan tuple)
   137  
   138  	// Do the fetch asynchronously with datastore update.
   139  	go func() {
   140  		defer close(result)
   141  		c, cancel := clock.WithTimeout(c, time.Duration(timeout)*time.Second)
   142  		defer cancel()
   143  		client := &http.Client{Transport: urlfetch.Get(c)}
   144  		resp, err := client.Do(&http.Request{
   145  			Method: method,
   146  			URL:    u,
   147  		})
   148  		if err != nil {
   149  			result <- tuple{nil, nil, err}
   150  			return
   151  		}
   152  		defer resp.Body.Close()
   153  		// Ignore read errors here. HTTP status code is set, it's the main output
   154  		// of the operation. Read 4K only since we use body only for debug message
   155  		// that is limited in size.
   156  		buf := bytes.Buffer{}
   157  		io.CopyN(&buf, resp.Body, 4096)
   158  		result <- tuple{resp, buf.Bytes(), nil}
   159  	}()
   160  
   161  	// Save the invocation log now (since URL fetch can take up to 8 minutes).
   162  	// Ignore errors. As long as final Save is OK, we don't care about this one.
   163  	// Do NOT set status to StatusRunning, because by doing so we take
   164  	// responsibility to detect crashes below and we don't want to do it (let
   165  	// the scheduler retry LaunchTask automatically instead).
   166  	if err := ctl.Save(c); err != nil {
   167  		logging.Warningf(c, "Failed to save invocation state: %s", err)
   168  	}
   169  
   170  	// Wait for completion.
   171  	res := <-result
   172  
   173  	duration := clock.Now(c).Sub(started)
   174  	status := task.StatusSucceeded
   175  	if res.err != nil || res.resp.StatusCode >= 400 {
   176  		status = task.StatusFailed
   177  	}
   178  
   179  	ctl.DebugLog("Finished with overall status %s in %s", status, duration)
   180  	if res.err != nil {
   181  		ctl.DebugLog("URL fetch error: %s", res.err)
   182  	} else {
   183  		ctl.DebugLog(dumpResponse(res.resp, res.body))
   184  	}
   185  	ctl.State().Status = status
   186  	return nil
   187  }
   188  
   189  // AbortTask is part of Manager interface.
   190  func (m TaskManager) AbortTask(c context.Context, ctl task.Controller) error {
   191  	return nil
   192  }
   193  
   194  // ExamineNotification is part of Manager interface.
   195  func (m TaskManager) ExamineNotification(c context.Context, msg *pubsub.PubsubMessage) string {
   196  	return ""
   197  }
   198  
   199  // HandleNotification is part of Manager interface.
   200  func (m TaskManager) HandleNotification(c context.Context, ctl task.Controller, msg *pubsub.PubsubMessage) error {
   201  	return errors.New("not implemented")
   202  }
   203  
   204  // HandleTimer is part of Manager interface.
   205  func (m TaskManager) HandleTimer(c context.Context, ctl task.Controller, name string, payload []byte) error {
   206  	return errors.New("not implemented")
   207  }
   208  
   209  // GetDebugState is part of Manager interface.
   210  func (m TaskManager) GetDebugState(c context.Context, ctl task.ControllerReadOnly) (*internal.DebugManagerState, error) {
   211  	return nil, fmt.Errorf("no debug state")
   212  }
   213  
   214  ////////////////////////////////////////////////////////////////////////////////
   215  
   216  // dumpResponse converts http.Response to text for the invocation debug log.
   217  func dumpResponse(resp *http.Response, body []byte) string {
   218  	out := &bytes.Buffer{}
   219  	fmt.Fprintln(out, resp.Status)
   220  	resp.Header.Write(out)
   221  	fmt.Fprintln(out)
   222  	if len(body) == 0 {
   223  		fmt.Fprintln(out, "<empty body>")
   224  	} else if isTextContent(resp.Header) {
   225  		out.Write(body)
   226  		if body[len(body)-1] != '\n' {
   227  			fmt.Fprintln(out)
   228  		}
   229  		if int64(len(body)) < resp.ContentLength {
   230  			fmt.Fprintln(out, "<truncated>")
   231  		}
   232  	} else {
   233  		fmt.Fprintln(out, "<binary response>")
   234  	}
   235  	return out.String()
   236  }
   237  
   238  var textContentTypes = []string{
   239  	"text/",
   240  	"application/json",
   241  	"application/xml",
   242  }
   243  
   244  // isTextContent returns True if Content-Type header corresponds to some
   245  // readable text type.
   246  func isTextContent(h http.Header) bool {
   247  	for _, header := range h["Content-Type"] {
   248  		for _, good := range textContentTypes {
   249  			if strings.HasPrefix(header, good) {
   250  				return true
   251  			}
   252  		}
   253  	}
   254  	return false
   255  }