github.com/mholt/caddy-l4@v0.0.0-20241104153248-ec8fae209322/modules/l4http/httpmatcher.go (about)

     1  // Copyright 2020 Matthew Holt
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package l4http
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"net/url"
    25  
    26  	"github.com/caddyserver/caddy/v2"
    27  	"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
    28  	"github.com/caddyserver/caddy/v2/modules/caddyhttp"
    29  	"golang.org/x/net/http2"
    30  	"golang.org/x/net/http2/hpack"
    31  
    32  	"github.com/mholt/caddy-l4/layer4"
    33  	"github.com/mholt/caddy-l4/modules/l4tls"
    34  )
    35  
    36  func init() {
    37  	caddy.RegisterModule(&MatchHTTP{})
    38  }
    39  
    40  // MatchHTTP is able to match HTTP connections. The auto-generated
    41  // documentation for this type is wrong; instead of an object, it
    42  // is [an array of matcher set objects](https://caddyserver.com/docs/json/apps/http/servers/routes/match/).
    43  type MatchHTTP struct {
    44  	MatcherSetsRaw caddyhttp.RawMatcherSets `json:"-" caddy:"namespace=http.matchers"`
    45  	matcherSets    caddyhttp.MatcherSets
    46  }
    47  
    48  // CaddyModule returns the Caddy module information.
    49  func (*MatchHTTP) CaddyModule() caddy.ModuleInfo {
    50  	return caddy.ModuleInfo{
    51  		ID:  "layer4.matchers.http",
    52  		New: func() caddy.Module { return new(MatchHTTP) },
    53  	}
    54  }
    55  
    56  // UnmarshalJSON satisfies the json.Unmarshaler interface.
    57  func (m *MatchHTTP) UnmarshalJSON(b []byte) error {
    58  	return json.Unmarshal(b, &m.MatcherSetsRaw)
    59  }
    60  
    61  // MarshalJSON satisfies the json.Marshaler interface.
    62  func (m *MatchHTTP) MarshalJSON() ([]byte, error) {
    63  	return json.Marshal(m.MatcherSetsRaw)
    64  }
    65  
    66  // Provision sets up the handler.
    67  func (m *MatchHTTP) Provision(ctx caddy.Context) error {
    68  	matchersIface, err := ctx.LoadModule(m, "MatcherSetsRaw")
    69  	if err != nil {
    70  		return fmt.Errorf("loading matcher modules: %v", err)
    71  	}
    72  	err = m.matcherSets.FromInterface(matchersIface)
    73  	if err != nil {
    74  		return err
    75  	}
    76  	return nil
    77  }
    78  
    79  // Match returns true if the conn starts with an HTTP request.
    80  func (m *MatchHTTP) Match(cx *layer4.Connection) (bool, error) {
    81  	// TODO: do we need a more standardized way to amortize matchers? or at least to remember decoded results from previous matchers?
    82  	req, ok := cx.GetVar("http_request").(*http.Request)
    83  	if !ok {
    84  		var err error
    85  
    86  		data := cx.MatchingBytes()
    87  		needMore, matched := m.isHttp(data)
    88  		if needMore {
    89  			if len(data) >= layer4.MaxMatchingBytes {
    90  				return false, layer4.ErrMatchingBufferFull
    91  			}
    92  			return false, layer4.ErrConsumedAllPrefetchedBytes
    93  		}
    94  		if !matched {
    95  			return false, nil
    96  		}
    97  
    98  		// use bufio reader which exactly matches the size of prefetched data,
    99  		// to not trigger all bytes consumed error
   100  		bufReader := bufio.NewReaderSize(cx, len(data))
   101  		req, err = http.ReadRequest(bufReader)
   102  		if err != nil {
   103  			return false, err
   104  		}
   105  
   106  		// check if req is a http2 request made with prior knowledge and if so parse it
   107  		err = m.handleHttp2WithPriorKnowledge(bufReader, req)
   108  		if err != nil {
   109  			return false, err
   110  		}
   111  
   112  		// if the tls handler was used before fill in the TLS field of the request
   113  		// with the last aka innermost tls connection state
   114  		if connectionStates := l4tls.GetConnectionStates(cx); len(connectionStates) > 0 {
   115  			req.TLS = connectionStates[len(connectionStates)-1]
   116  		}
   117  
   118  		// in order to use request matchers, we have to populate the request context
   119  		req = caddyhttp.PrepareRequest(req, caddy.NewReplacer(), nil, nil)
   120  
   121  		// remember this for future use
   122  		cx.SetVar("http_request", req)
   123  
   124  		// also add values to the replacer (TODO: we could probably find a way to use the http app's replacer values)
   125  		repl := cx.Context.Value(layer4.ReplacerCtxKey).(*caddy.Replacer)
   126  		repl.Set("l4.http.host", req.Host)
   127  	}
   128  
   129  	// we have a valid HTTP request, so we can drill down further if there are
   130  	// any more matchers configured
   131  	return m.matcherSets.AnyMatch(req), nil
   132  }
   133  
   134  // isHttp test if the buffered data looks like HTTP by looking at the first line.
   135  // first boolean determines if more data is required
   136  func (m MatchHTTP) isHttp(data []byte) (bool, bool) {
   137  	// try to find the end of a http request line, for example " HTTP/1.1\r\n"
   138  	i := bytes.IndexByte(data, 0x0a) // find first new line
   139  	if i < 10 {
   140  		return true, false
   141  	}
   142  	// assume only \n line ending
   143  	start := i - 9 // position of space in front of HTTP
   144  	end := i - 3   // cut off version number "1.1" or "2.0"
   145  	// if we got a correct \r\n line ending shift the calculated start & end to the left
   146  	if data[i-1] == 0x0d {
   147  		start -= 1
   148  		end -= 1
   149  	}
   150  	return false, bytes.Compare(data[start:end], []byte(" HTTP/")) == 0
   151  }
   152  
   153  // Parses information from a http2 request with prior knowledge (RFC 7540 Section 3.4)
   154  func (m *MatchHTTP) handleHttp2WithPriorKnowledge(reader io.Reader, req *http.Request) error {
   155  	// Does req contain a valid http2 magic?
   156  	// https://github.com/golang/net/blob/a630d4f3e7a22f21271532b4b88e1693824a838f/http2/h2c/h2c.go#L74
   157  	if req.Method != "PRI" || len(req.Header) != 0 || req.URL.Path != "*" || req.Proto != "HTTP/2.0" {
   158  		return nil
   159  	}
   160  
   161  	const expectedBody = "SM\r\n\r\n"
   162  
   163  	body := make([]byte, len(expectedBody))
   164  	n, err := io.ReadFull(reader, body)
   165  	if err != nil {
   166  		return err
   167  	}
   168  
   169  	if string(body[:n]) != expectedBody {
   170  		return nil
   171  	}
   172  
   173  	framer := http2.NewFramer(io.Discard, reader)
   174  
   175  	// read the first 10 frames until we get a headers frame (skipping settings, window update & priority frames)
   176  	var frame http2.Frame
   177  	maxAttempts := 10
   178  	for i := 0; i < maxAttempts; i++ {
   179  		frame, err = framer.ReadFrame()
   180  		if err != nil {
   181  			return err
   182  		}
   183  		if frame.Header().Type == http2.FrameHeaders {
   184  			maxAttempts = 0
   185  			break
   186  		}
   187  	}
   188  	if maxAttempts != 0 {
   189  		return fmt.Errorf("failed to read a http2 headers frame after %d attempts", maxAttempts)
   190  	}
   191  
   192  	decoder := hpack.NewDecoder(4096, nil) // max table size 4096 from http2.initialHeaderTableSize
   193  	headers, err := decoder.DecodeFull((frame.(*http2.HeadersFrame)).HeaderBlockFragment())
   194  	if err != nil {
   195  		return err
   196  	}
   197  
   198  	var scheme string
   199  	var authority string
   200  	var path string
   201  
   202  	for _, h := range headers {
   203  		if h.Name == ":method" {
   204  			req.Method = h.Value
   205  		} else if h.Name == ":path" {
   206  			path = h.Value
   207  			req.RequestURI = h.Value
   208  		} else if h.Name == ":scheme" {
   209  			scheme = h.Value
   210  		} else if h.Name == ":authority" {
   211  			authority = h.Value
   212  			req.Host = h.Value
   213  		} else {
   214  			req.Header.Add(h.Name, h.Value)
   215  		}
   216  	}
   217  
   218  	// According to http.Request.URL docs it only contains the value of RequestURI (so path only),
   219  	// but we can fill in more information
   220  	req.URL, err = url.Parse(fmt.Sprintf("%s://%s%s", scheme, authority, path))
   221  	return err
   222  }
   223  
   224  // UnmarshalCaddyfile sets up the MatchHTTP from Caddyfile tokens. Syntax:
   225  //
   226  //	http {
   227  //		<matcher> [<args...>]
   228  //		not <matcher> [<args...>]
   229  //		not {
   230  //			<matcher> [<args...>]
   231  //		}
   232  //	}
   233  //	http <matcher> [<args...>]
   234  //	http not <matcher> [<args...>]
   235  //
   236  // Note: as per https://caddyserver.com/docs/json/apps/http/servers/routes/match/,
   237  // matchers within a set are AND'ed together. Arguments of this http matcher constitute
   238  // a single matcher set, thus no OR logic is supported. Instead, use multiple http matchers.
   239  func (m *MatchHTTP) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
   240  	d.Next() // consume wrapper name
   241  
   242  	matcherSet, err := caddyhttp.ParseCaddyfileNestedMatcherSet(d)
   243  	if err != nil {
   244  		return err
   245  	}
   246  	m.MatcherSetsRaw = append(m.MatcherSetsRaw, matcherSet)
   247  
   248  	return nil
   249  }
   250  
   251  // Interface guards
   252  var (
   253  	_ caddy.Provisioner     = (*MatchHTTP)(nil)
   254  	_ caddyfile.Unmarshaler = (*MatchHTTP)(nil)
   255  	_ json.Marshaler        = (*MatchHTTP)(nil)
   256  	_ json.Unmarshaler      = (*MatchHTTP)(nil)
   257  	_ layer4.ConnMatcher    = (*MatchHTTP)(nil)
   258  )