github.com/mholt/caddy-l4@v0.0.0-20241104153248-ec8fae209322/modules/l4http/httpmatcher.go (about) 1 // Copyright 2020 Matthew Holt 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package l4http 16 17 import ( 18 "bufio" 19 "bytes" 20 "encoding/json" 21 "fmt" 22 "io" 23 "net/http" 24 "net/url" 25 26 "github.com/caddyserver/caddy/v2" 27 "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" 28 "github.com/caddyserver/caddy/v2/modules/caddyhttp" 29 "golang.org/x/net/http2" 30 "golang.org/x/net/http2/hpack" 31 32 "github.com/mholt/caddy-l4/layer4" 33 "github.com/mholt/caddy-l4/modules/l4tls" 34 ) 35 36 func init() { 37 caddy.RegisterModule(&MatchHTTP{}) 38 } 39 40 // MatchHTTP is able to match HTTP connections. The auto-generated 41 // documentation for this type is wrong; instead of an object, it 42 // is [an array of matcher set objects](https://caddyserver.com/docs/json/apps/http/servers/routes/match/). 43 type MatchHTTP struct { 44 MatcherSetsRaw caddyhttp.RawMatcherSets `json:"-" caddy:"namespace=http.matchers"` 45 matcherSets caddyhttp.MatcherSets 46 } 47 48 // CaddyModule returns the Caddy module information. 49 func (*MatchHTTP) CaddyModule() caddy.ModuleInfo { 50 return caddy.ModuleInfo{ 51 ID: "layer4.matchers.http", 52 New: func() caddy.Module { return new(MatchHTTP) }, 53 } 54 } 55 56 // UnmarshalJSON satisfies the json.Unmarshaler interface. 57 func (m *MatchHTTP) UnmarshalJSON(b []byte) error { 58 return json.Unmarshal(b, &m.MatcherSetsRaw) 59 } 60 61 // MarshalJSON satisfies the json.Marshaler interface. 62 func (m *MatchHTTP) MarshalJSON() ([]byte, error) { 63 return json.Marshal(m.MatcherSetsRaw) 64 } 65 66 // Provision sets up the handler. 67 func (m *MatchHTTP) Provision(ctx caddy.Context) error { 68 matchersIface, err := ctx.LoadModule(m, "MatcherSetsRaw") 69 if err != nil { 70 return fmt.Errorf("loading matcher modules: %v", err) 71 } 72 err = m.matcherSets.FromInterface(matchersIface) 73 if err != nil { 74 return err 75 } 76 return nil 77 } 78 79 // Match returns true if the conn starts with an HTTP request. 80 func (m *MatchHTTP) Match(cx *layer4.Connection) (bool, error) { 81 // TODO: do we need a more standardized way to amortize matchers? or at least to remember decoded results from previous matchers? 82 req, ok := cx.GetVar("http_request").(*http.Request) 83 if !ok { 84 var err error 85 86 data := cx.MatchingBytes() 87 needMore, matched := m.isHttp(data) 88 if needMore { 89 if len(data) >= layer4.MaxMatchingBytes { 90 return false, layer4.ErrMatchingBufferFull 91 } 92 return false, layer4.ErrConsumedAllPrefetchedBytes 93 } 94 if !matched { 95 return false, nil 96 } 97 98 // use bufio reader which exactly matches the size of prefetched data, 99 // to not trigger all bytes consumed error 100 bufReader := bufio.NewReaderSize(cx, len(data)) 101 req, err = http.ReadRequest(bufReader) 102 if err != nil { 103 return false, err 104 } 105 106 // check if req is a http2 request made with prior knowledge and if so parse it 107 err = m.handleHttp2WithPriorKnowledge(bufReader, req) 108 if err != nil { 109 return false, err 110 } 111 112 // if the tls handler was used before fill in the TLS field of the request 113 // with the last aka innermost tls connection state 114 if connectionStates := l4tls.GetConnectionStates(cx); len(connectionStates) > 0 { 115 req.TLS = connectionStates[len(connectionStates)-1] 116 } 117 118 // in order to use request matchers, we have to populate the request context 119 req = caddyhttp.PrepareRequest(req, caddy.NewReplacer(), nil, nil) 120 121 // remember this for future use 122 cx.SetVar("http_request", req) 123 124 // also add values to the replacer (TODO: we could probably find a way to use the http app's replacer values) 125 repl := cx.Context.Value(layer4.ReplacerCtxKey).(*caddy.Replacer) 126 repl.Set("l4.http.host", req.Host) 127 } 128 129 // we have a valid HTTP request, so we can drill down further if there are 130 // any more matchers configured 131 return m.matcherSets.AnyMatch(req), nil 132 } 133 134 // isHttp test if the buffered data looks like HTTP by looking at the first line. 135 // first boolean determines if more data is required 136 func (m MatchHTTP) isHttp(data []byte) (bool, bool) { 137 // try to find the end of a http request line, for example " HTTP/1.1\r\n" 138 i := bytes.IndexByte(data, 0x0a) // find first new line 139 if i < 10 { 140 return true, false 141 } 142 // assume only \n line ending 143 start := i - 9 // position of space in front of HTTP 144 end := i - 3 // cut off version number "1.1" or "2.0" 145 // if we got a correct \r\n line ending shift the calculated start & end to the left 146 if data[i-1] == 0x0d { 147 start -= 1 148 end -= 1 149 } 150 return false, bytes.Compare(data[start:end], []byte(" HTTP/")) == 0 151 } 152 153 // Parses information from a http2 request with prior knowledge (RFC 7540 Section 3.4) 154 func (m *MatchHTTP) handleHttp2WithPriorKnowledge(reader io.Reader, req *http.Request) error { 155 // Does req contain a valid http2 magic? 156 // https://github.com/golang/net/blob/a630d4f3e7a22f21271532b4b88e1693824a838f/http2/h2c/h2c.go#L74 157 if req.Method != "PRI" || len(req.Header) != 0 || req.URL.Path != "*" || req.Proto != "HTTP/2.0" { 158 return nil 159 } 160 161 const expectedBody = "SM\r\n\r\n" 162 163 body := make([]byte, len(expectedBody)) 164 n, err := io.ReadFull(reader, body) 165 if err != nil { 166 return err 167 } 168 169 if string(body[:n]) != expectedBody { 170 return nil 171 } 172 173 framer := http2.NewFramer(io.Discard, reader) 174 175 // read the first 10 frames until we get a headers frame (skipping settings, window update & priority frames) 176 var frame http2.Frame 177 maxAttempts := 10 178 for i := 0; i < maxAttempts; i++ { 179 frame, err = framer.ReadFrame() 180 if err != nil { 181 return err 182 } 183 if frame.Header().Type == http2.FrameHeaders { 184 maxAttempts = 0 185 break 186 } 187 } 188 if maxAttempts != 0 { 189 return fmt.Errorf("failed to read a http2 headers frame after %d attempts", maxAttempts) 190 } 191 192 decoder := hpack.NewDecoder(4096, nil) // max table size 4096 from http2.initialHeaderTableSize 193 headers, err := decoder.DecodeFull((frame.(*http2.HeadersFrame)).HeaderBlockFragment()) 194 if err != nil { 195 return err 196 } 197 198 var scheme string 199 var authority string 200 var path string 201 202 for _, h := range headers { 203 if h.Name == ":method" { 204 req.Method = h.Value 205 } else if h.Name == ":path" { 206 path = h.Value 207 req.RequestURI = h.Value 208 } else if h.Name == ":scheme" { 209 scheme = h.Value 210 } else if h.Name == ":authority" { 211 authority = h.Value 212 req.Host = h.Value 213 } else { 214 req.Header.Add(h.Name, h.Value) 215 } 216 } 217 218 // According to http.Request.URL docs it only contains the value of RequestURI (so path only), 219 // but we can fill in more information 220 req.URL, err = url.Parse(fmt.Sprintf("%s://%s%s", scheme, authority, path)) 221 return err 222 } 223 224 // UnmarshalCaddyfile sets up the MatchHTTP from Caddyfile tokens. Syntax: 225 // 226 // http { 227 // <matcher> [<args...>] 228 // not <matcher> [<args...>] 229 // not { 230 // <matcher> [<args...>] 231 // } 232 // } 233 // http <matcher> [<args...>] 234 // http not <matcher> [<args...>] 235 // 236 // Note: as per https://caddyserver.com/docs/json/apps/http/servers/routes/match/, 237 // matchers within a set are AND'ed together. Arguments of this http matcher constitute 238 // a single matcher set, thus no OR logic is supported. Instead, use multiple http matchers. 239 func (m *MatchHTTP) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { 240 d.Next() // consume wrapper name 241 242 matcherSet, err := caddyhttp.ParseCaddyfileNestedMatcherSet(d) 243 if err != nil { 244 return err 245 } 246 m.MatcherSetsRaw = append(m.MatcherSetsRaw, matcherSet) 247 248 return nil 249 } 250 251 // Interface guards 252 var ( 253 _ caddy.Provisioner = (*MatchHTTP)(nil) 254 _ caddyfile.Unmarshaler = (*MatchHTTP)(nil) 255 _ json.Marshaler = (*MatchHTTP)(nil) 256 _ json.Unmarshaler = (*MatchHTTP)(nil) 257 _ layer4.ConnMatcher = (*MatchHTTP)(nil) 258 )