go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/task/urlfetch/urlfetch.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package urlfetch implements tasks that just make HTTP calls. 16 package urlfetch 17 18 import ( 19 "bytes" 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "net/http" 25 "net/url" 26 "strings" 27 "time" 28 29 "github.com/golang/protobuf/proto" 30 31 "google.golang.org/api/pubsub/v1" 32 33 "go.chromium.org/luci/common/clock" 34 "go.chromium.org/luci/common/logging" 35 "go.chromium.org/luci/config/validation" 36 "go.chromium.org/luci/gae/service/urlfetch" 37 "go.chromium.org/luci/scheduler/appengine/internal" 38 "go.chromium.org/luci/scheduler/appengine/messages" 39 "go.chromium.org/luci/scheduler/appengine/task" 40 ) 41 42 // TaskManager implements task.Manager interface for tasks defined with 43 // UrlFetchTask proto message 44 type TaskManager struct { 45 } 46 47 // Name is part of Manager interface. 48 func (m TaskManager) Name() string { 49 return "url_fetch" 50 } 51 52 // ProtoMessageType is part of Manager interface. 53 func (m TaskManager) ProtoMessageType() proto.Message { 54 return (*messages.UrlFetchTask)(nil) 55 } 56 57 // Traits is part of Manager interface. 58 func (m TaskManager) Traits() task.Traits { 59 return task.Traits{ 60 Multistage: false, // we don't use task.StatusRunning state 61 } 62 } 63 64 // ValidateProtoMessage is part of Manager interface. 65 func (m TaskManager) ValidateProtoMessage(c *validation.Context, msg proto.Message, realmID string) { 66 cfg, ok := msg.(*messages.UrlFetchTask) 67 if !ok { 68 c.Errorf("wrong type %T, expecting *messages.UrlFetchTask", msg) 69 return 70 } 71 if cfg == nil { 72 c.Errorf("expecting a non-empty UrlFetchTask") 73 return 74 } 75 76 // Validate 'method' field. 77 // TODO(vadimsh): Add more methods (POST, PUT) when 'Body' is added. 78 goodMethods := map[string]bool{"GET": true} 79 if cfg.Method != "" && !goodMethods[cfg.Method] { 80 c.Errorf("unsupported HTTP method: %q", cfg.Method) 81 } 82 83 // Validate 'url' field. 84 if cfg.Url == "" { 85 c.Errorf("field 'url' is required") 86 } else { 87 u, err := url.Parse(cfg.Url) 88 if err != nil { 89 c.Errorf("invalid URL %q: %s", cfg.Url, err) 90 } else if !u.IsAbs() { 91 c.Errorf("not an absolute url: %q", cfg.Url) 92 } 93 } 94 95 // Validate 'timeout_sec' field. GAE task queue request deadline is 10 min, so 96 // limit URL fetch call duration to 8 min (giving 2 min to spare). 97 if cfg.TimeoutSec != 0 { 98 if cfg.TimeoutSec < 1 { 99 c.Errorf("minimum allowed 'timeout_sec' is 1 sec, got %d", cfg.TimeoutSec) 100 } 101 if cfg.TimeoutSec > 480 { 102 c.Errorf("maximum allowed 'timeout_sec' is 480 sec, got %d", cfg.TimeoutSec) 103 } 104 } 105 } 106 107 // LaunchTask is part of Manager interface. 108 func (m TaskManager) LaunchTask(c context.Context, ctl task.Controller) error { 109 cfg := ctl.Task().(*messages.UrlFetchTask) 110 started := clock.Now(c) 111 112 // Defaults. 113 method := cfg.Method 114 if method == "" { 115 method = "GET" 116 } 117 timeout := cfg.TimeoutSec 118 if timeout == 0 { 119 timeout = 60 120 } 121 122 ctl.DebugLog("%s %s", method, cfg.Url) 123 124 // There must be no errors here in reality, since cfg is validated already by 125 // ValidateProtoMessage. 126 u, err := url.Parse(cfg.Url) 127 if err != nil { 128 return err 129 } 130 131 type tuple struct { 132 resp *http.Response 133 body []byte // first 4Kb of the response, for debug log 134 err error 135 } 136 result := make(chan tuple) 137 138 // Do the fetch asynchronously with datastore update. 139 go func() { 140 defer close(result) 141 c, cancel := clock.WithTimeout(c, time.Duration(timeout)*time.Second) 142 defer cancel() 143 client := &http.Client{Transport: urlfetch.Get(c)} 144 resp, err := client.Do(&http.Request{ 145 Method: method, 146 URL: u, 147 }) 148 if err != nil { 149 result <- tuple{nil, nil, err} 150 return 151 } 152 defer resp.Body.Close() 153 // Ignore read errors here. HTTP status code is set, it's the main output 154 // of the operation. Read 4K only since we use body only for debug message 155 // that is limited in size. 156 buf := bytes.Buffer{} 157 io.CopyN(&buf, resp.Body, 4096) 158 result <- tuple{resp, buf.Bytes(), nil} 159 }() 160 161 // Save the invocation log now (since URL fetch can take up to 8 minutes). 162 // Ignore errors. As long as final Save is OK, we don't care about this one. 163 // Do NOT set status to StatusRunning, because by doing so we take 164 // responsibility to detect crashes below and we don't want to do it (let 165 // the scheduler retry LaunchTask automatically instead). 166 if err := ctl.Save(c); err != nil { 167 logging.Warningf(c, "Failed to save invocation state: %s", err) 168 } 169 170 // Wait for completion. 171 res := <-result 172 173 duration := clock.Now(c).Sub(started) 174 status := task.StatusSucceeded 175 if res.err != nil || res.resp.StatusCode >= 400 { 176 status = task.StatusFailed 177 } 178 179 ctl.DebugLog("Finished with overall status %s in %s", status, duration) 180 if res.err != nil { 181 ctl.DebugLog("URL fetch error: %s", res.err) 182 } else { 183 ctl.DebugLog(dumpResponse(res.resp, res.body)) 184 } 185 ctl.State().Status = status 186 return nil 187 } 188 189 // AbortTask is part of Manager interface. 190 func (m TaskManager) AbortTask(c context.Context, ctl task.Controller) error { 191 return nil 192 } 193 194 // ExamineNotification is part of Manager interface. 195 func (m TaskManager) ExamineNotification(c context.Context, msg *pubsub.PubsubMessage) string { 196 return "" 197 } 198 199 // HandleNotification is part of Manager interface. 200 func (m TaskManager) HandleNotification(c context.Context, ctl task.Controller, msg *pubsub.PubsubMessage) error { 201 return errors.New("not implemented") 202 } 203 204 // HandleTimer is part of Manager interface. 205 func (m TaskManager) HandleTimer(c context.Context, ctl task.Controller, name string, payload []byte) error { 206 return errors.New("not implemented") 207 } 208 209 // GetDebugState is part of Manager interface. 210 func (m TaskManager) GetDebugState(c context.Context, ctl task.ControllerReadOnly) (*internal.DebugManagerState, error) { 211 return nil, fmt.Errorf("no debug state") 212 } 213 214 //////////////////////////////////////////////////////////////////////////////// 215 216 // dumpResponse converts http.Response to text for the invocation debug log. 217 func dumpResponse(resp *http.Response, body []byte) string { 218 out := &bytes.Buffer{} 219 fmt.Fprintln(out, resp.Status) 220 resp.Header.Write(out) 221 fmt.Fprintln(out) 222 if len(body) == 0 { 223 fmt.Fprintln(out, "<empty body>") 224 } else if isTextContent(resp.Header) { 225 out.Write(body) 226 if body[len(body)-1] != '\n' { 227 fmt.Fprintln(out) 228 } 229 if int64(len(body)) < resp.ContentLength { 230 fmt.Fprintln(out, "<truncated>") 231 } 232 } else { 233 fmt.Fprintln(out, "<binary response>") 234 } 235 return out.String() 236 } 237 238 var textContentTypes = []string{ 239 "text/", 240 "application/json", 241 "application/xml", 242 } 243 244 // isTextContent returns True if Content-Type header corresponds to some 245 // readable text type. 246 func isTextContent(h http.Header) bool { 247 for _, header := range h["Content-Type"] { 248 for _, good := range textContentTypes { 249 if strings.HasPrefix(header, good) { 250 return true 251 } 252 } 253 } 254 return false 255 }