github.com/TeaOSLab/EdgeNode@v1.3.8/internal/nodes/http_request_utils.go (about)

     1  package nodes
     2  
     3  import (
     4  	"crypto/rand"
     5  	"fmt"
     6  	teaconst "github.com/TeaOSLab/EdgeNode/internal/const"
     7  	"github.com/TeaOSLab/EdgeNode/internal/utils/fasttime"
     8  	"github.com/TeaOSLab/EdgeNode/internal/utils/ranges"
     9  	"github.com/iwind/TeaGo/types"
    10  	"io"
    11  	"net/http"
    12  	"net/url"
    13  	"regexp"
    14  	"strconv"
    15  	"strings"
    16  	"sync/atomic"
    17  )
    18  
    19  // 搜索引擎和爬虫正则
    20  var searchEngineRegex = regexp.MustCompile(`(?i)(60spider|adldxbot|adsbot-google|applebot|admantx|alexa|baidu|bingbot|bingpreview|facebookexternalhit|googlebot|proximic|slurp|sogou|twitterbot|yandex)`)
    21  var spiderRegexp = regexp.MustCompile(`(?i)(python|pycurl|http-client|httpclient|apachebench|nethttp|http_request|java|perl|ruby|scrapy|php|rust)`)
    22  
    23  // 内容范围正则,其中的每个括号里的内容都在被引用,不能轻易修改
    24  var contentRangeRegexp = regexp.MustCompile(`^bytes (\d+)-(\d+)/(\d+|\*)`)
    25  
    26  // URL协议前缀
    27  var urlSchemeRegexp = regexp.MustCompile("^(?i)(http|https|ftp)://")
    28  
    29  // 分解Range
    30  func httpRequestParseRangeHeader(rangeValue string) (result []rangeutils.Range, ok bool) {
    31  	// 参考RFC:https://tools.ietf.org/html/rfc7233
    32  	index := strings.Index(rangeValue, "=")
    33  	if index == -1 {
    34  		return
    35  	}
    36  	unit := rangeValue[:index]
    37  	if unit != "bytes" {
    38  		return
    39  	}
    40  
    41  	var rangeSetString = rangeValue[index+1:]
    42  	if len(rangeSetString) == 0 {
    43  		ok = true
    44  		return
    45  	}
    46  
    47  	var pieces = strings.Split(rangeSetString, ", ")
    48  	for _, piece := range pieces {
    49  		index = strings.Index(piece, "-")
    50  		if index == -1 {
    51  			return
    52  		}
    53  		first := piece[:index]
    54  		firstInt := int64(-1)
    55  
    56  		var err error
    57  		last := piece[index+1:]
    58  		var lastInt = int64(-1)
    59  
    60  		if len(first) > 0 {
    61  			firstInt, err = strconv.ParseInt(first, 10, 64)
    62  			if err != nil {
    63  				return
    64  			}
    65  
    66  			if len(last) > 0 {
    67  				lastInt, err = strconv.ParseInt(last, 10, 64)
    68  				if err != nil {
    69  					return
    70  				}
    71  				if lastInt < firstInt {
    72  					return
    73  				}
    74  			}
    75  		} else {
    76  			if len(last) == 0 {
    77  				return
    78  			}
    79  
    80  			lastInt, err = strconv.ParseInt(last, 10, 64)
    81  			if err != nil {
    82  				return
    83  			}
    84  			lastInt = -lastInt
    85  		}
    86  
    87  		result = append(result, [2]int64{firstInt, lastInt})
    88  	}
    89  
    90  	ok = true
    91  	return
    92  }
    93  
    94  // 读取内容Range
    95  func httpRequestReadRange(reader io.Reader, buf []byte, start int64, end int64, callback func(buf []byte, n int) error) (ok bool, err error) {
    96  	if start < 0 || end < 0 {
    97  		return
    98  	}
    99  	seeker, ok := reader.(io.Seeker)
   100  	if !ok {
   101  		return
   102  	}
   103  	_, err = seeker.Seek(start, io.SeekStart)
   104  	if err != nil {
   105  		return false, nil
   106  	}
   107  
   108  	offset := start
   109  	for {
   110  		n, err := reader.Read(buf)
   111  		if n > 0 {
   112  			offset += int64(n)
   113  			if end < offset {
   114  				err = callback(buf, n-int(offset-end-1))
   115  				if err != nil {
   116  					return false, err
   117  				}
   118  				return true, nil
   119  			} else {
   120  				err = callback(buf, n)
   121  				if err != nil {
   122  					return false, err
   123  				}
   124  			}
   125  		}
   126  
   127  		if err != nil {
   128  			if err == io.EOF {
   129  				return true, nil
   130  			}
   131  			return false, err
   132  		}
   133  	}
   134  }
   135  
   136  // 分解Content-Range
   137  func httpRequestParseContentRangeHeader(contentRange string) (start int64, total int64) {
   138  	var matches = contentRangeRegexp.FindStringSubmatch(contentRange)
   139  	if len(matches) < 4 {
   140  		return -1, -1
   141  	}
   142  
   143  	start = types.Int64(matches[1])
   144  	var sizeString = matches[3]
   145  	if sizeString != "*" {
   146  		total = types.Int64(sizeString)
   147  	}
   148  	return
   149  }
   150  
   151  // 生成boundary
   152  // 仿照Golang自带的函数(multipart包)
   153  func httpRequestGenBoundary() string {
   154  	var buf [8]byte
   155  	_, err := io.ReadFull(rand.Reader, buf[:])
   156  	if err != nil {
   157  		panic(err)
   158  	}
   159  	return fmt.Sprintf("%x", buf[:])
   160  }
   161  
   162  // 从content-type中读取boundary
   163  func httpRequestParseBoundary(contentType string) string {
   164  	var delim = "boundary="
   165  	var boundaryIndex = strings.Index(contentType, delim)
   166  	if boundaryIndex < 0 {
   167  		return ""
   168  	}
   169  	var boundary = contentType[boundaryIndex+len(delim):]
   170  	semicolonIndex := strings.Index(boundary, ";")
   171  	if semicolonIndex >= 0 {
   172  		return boundary[:semicolonIndex]
   173  	}
   174  	return boundary
   175  }
   176  
   177  // 判断状态是否为跳转
   178  func httpStatusIsRedirect(statusCode int) bool {
   179  	return statusCode == http.StatusPermanentRedirect ||
   180  		statusCode == http.StatusTemporaryRedirect ||
   181  		statusCode == http.StatusMovedPermanently ||
   182  		statusCode == http.StatusSeeOther ||
   183  		statusCode == http.StatusFound
   184  }
   185  
   186  // 生成请求ID
   187  var httpRequestTimestamp int64
   188  var httpRequestId int32 = 1_000_000
   189  
   190  func httpRequestNextId() string {
   191  	unixTime, unixTimeString := fasttime.Now().UnixMilliString()
   192  	if unixTime > httpRequestTimestamp {
   193  		atomic.StoreInt32(&httpRequestId, 1_000_000)
   194  		httpRequestTimestamp = unixTime
   195  	}
   196  
   197  	// timestamp + nodeId + requestId
   198  	return unixTimeString + teaconst.NodeIdString + strconv.Itoa(int(atomic.AddInt32(&httpRequestId, 1)))
   199  }
   200  
   201  // 检查是否可以接受某个编码
   202  func httpAcceptEncoding(acceptEncodings string, encoding string) bool {
   203  	if len(acceptEncodings) == 0 {
   204  		return false
   205  	}
   206  	var pieces = strings.Split(acceptEncodings, ",")
   207  	for _, piece := range pieces {
   208  		var qualityIndex = strings.Index(piece, ";")
   209  		if qualityIndex >= 0 {
   210  			piece = piece[:qualityIndex]
   211  		}
   212  
   213  		if strings.TrimSpace(piece) == encoding {
   214  			return true
   215  		}
   216  	}
   217  	return false
   218  }
   219  
   220  // 跳转到某个URL
   221  func httpRedirect(writer http.ResponseWriter, req *http.Request, url string, code int) {
   222  	if len(writer.Header().Get("Content-Type")) == 0 {
   223  		// 设置Content-Type,是为了让页面不输出链接
   224  		writer.Header().Set("Content-Type", "text/html; charset=utf-8")
   225  	}
   226  
   227  	http.Redirect(writer, req, url, code)
   228  }
   229  
   230  // 分析URL中的Host部分
   231  func httpParseHost(urlString string) (host string, err error) {
   232  	if !urlSchemeRegexp.MatchString(urlString) {
   233  		urlString = "https://" + urlString
   234  	}
   235  
   236  	u, err := url.Parse(urlString)
   237  	if err != nil && u != nil {
   238  		return "", err
   239  	}
   240  	return u.Host, nil
   241  }