github.com/TeaOSLab/EdgeNode@v1.3.8/internal/nodes/http_request_utils.go (about) 1 package nodes 2 3 import ( 4 "crypto/rand" 5 "fmt" 6 teaconst "github.com/TeaOSLab/EdgeNode/internal/const" 7 "github.com/TeaOSLab/EdgeNode/internal/utils/fasttime" 8 "github.com/TeaOSLab/EdgeNode/internal/utils/ranges" 9 "github.com/iwind/TeaGo/types" 10 "io" 11 "net/http" 12 "net/url" 13 "regexp" 14 "strconv" 15 "strings" 16 "sync/atomic" 17 ) 18 19 // 搜索引擎和爬虫正则 20 var searchEngineRegex = regexp.MustCompile(`(?i)(60spider|adldxbot|adsbot-google|applebot|admantx|alexa|baidu|bingbot|bingpreview|facebookexternalhit|googlebot|proximic|slurp|sogou|twitterbot|yandex)`) 21 var spiderRegexp = regexp.MustCompile(`(?i)(python|pycurl|http-client|httpclient|apachebench|nethttp|http_request|java|perl|ruby|scrapy|php|rust)`) 22 23 // 内容范围正则,其中的每个括号里的内容都在被引用,不能轻易修改 24 var contentRangeRegexp = regexp.MustCompile(`^bytes (\d+)-(\d+)/(\d+|\*)`) 25 26 // URL协议前缀 27 var urlSchemeRegexp = regexp.MustCompile("^(?i)(http|https|ftp)://") 28 29 // 分解Range 30 func httpRequestParseRangeHeader(rangeValue string) (result []rangeutils.Range, ok bool) { 31 // 参考RFC:https://tools.ietf.org/html/rfc7233 32 index := strings.Index(rangeValue, "=") 33 if index == -1 { 34 return 35 } 36 unit := rangeValue[:index] 37 if unit != "bytes" { 38 return 39 } 40 41 var rangeSetString = rangeValue[index+1:] 42 if len(rangeSetString) == 0 { 43 ok = true 44 return 45 } 46 47 var pieces = strings.Split(rangeSetString, ", ") 48 for _, piece := range pieces { 49 index = strings.Index(piece, "-") 50 if index == -1 { 51 return 52 } 53 first := piece[:index] 54 firstInt := int64(-1) 55 56 var err error 57 last := piece[index+1:] 58 var lastInt = int64(-1) 59 60 if len(first) > 0 { 61 firstInt, err = strconv.ParseInt(first, 10, 64) 62 if err != nil { 63 return 64 } 65 66 if len(last) > 0 { 67 lastInt, err = strconv.ParseInt(last, 10, 64) 68 if err != nil { 69 return 70 } 71 if lastInt < firstInt { 72 return 73 } 74 } 75 } else { 76 if len(last) == 0 { 77 return 78 } 79 80 lastInt, err = strconv.ParseInt(last, 10, 64) 81 if err != nil { 82 return 83 } 84 lastInt = -lastInt 85 } 86 87 result = append(result, [2]int64{firstInt, lastInt}) 88 } 89 90 ok = true 91 return 92 } 93 94 // 读取内容Range 95 func httpRequestReadRange(reader io.Reader, buf []byte, start int64, end int64, callback func(buf []byte, n int) error) (ok bool, err error) { 96 if start < 0 || end < 0 { 97 return 98 } 99 seeker, ok := reader.(io.Seeker) 100 if !ok { 101 return 102 } 103 _, err = seeker.Seek(start, io.SeekStart) 104 if err != nil { 105 return false, nil 106 } 107 108 offset := start 109 for { 110 n, err := reader.Read(buf) 111 if n > 0 { 112 offset += int64(n) 113 if end < offset { 114 err = callback(buf, n-int(offset-end-1)) 115 if err != nil { 116 return false, err 117 } 118 return true, nil 119 } else { 120 err = callback(buf, n) 121 if err != nil { 122 return false, err 123 } 124 } 125 } 126 127 if err != nil { 128 if err == io.EOF { 129 return true, nil 130 } 131 return false, err 132 } 133 } 134 } 135 136 // 分解Content-Range 137 func httpRequestParseContentRangeHeader(contentRange string) (start int64, total int64) { 138 var matches = contentRangeRegexp.FindStringSubmatch(contentRange) 139 if len(matches) < 4 { 140 return -1, -1 141 } 142 143 start = types.Int64(matches[1]) 144 var sizeString = matches[3] 145 if sizeString != "*" { 146 total = types.Int64(sizeString) 147 } 148 return 149 } 150 151 // 生成boundary 152 // 仿照Golang自带的函数(multipart包) 153 func httpRequestGenBoundary() string { 154 var buf [8]byte 155 _, err := io.ReadFull(rand.Reader, buf[:]) 156 if err != nil { 157 panic(err) 158 } 159 return fmt.Sprintf("%x", buf[:]) 160 } 161 162 // 从content-type中读取boundary 163 func httpRequestParseBoundary(contentType string) string { 164 var delim = "boundary=" 165 var boundaryIndex = strings.Index(contentType, delim) 166 if boundaryIndex < 0 { 167 return "" 168 } 169 var boundary = contentType[boundaryIndex+len(delim):] 170 semicolonIndex := strings.Index(boundary, ";") 171 if semicolonIndex >= 0 { 172 return boundary[:semicolonIndex] 173 } 174 return boundary 175 } 176 177 // 判断状态是否为跳转 178 func httpStatusIsRedirect(statusCode int) bool { 179 return statusCode == http.StatusPermanentRedirect || 180 statusCode == http.StatusTemporaryRedirect || 181 statusCode == http.StatusMovedPermanently || 182 statusCode == http.StatusSeeOther || 183 statusCode == http.StatusFound 184 } 185 186 // 生成请求ID 187 var httpRequestTimestamp int64 188 var httpRequestId int32 = 1_000_000 189 190 func httpRequestNextId() string { 191 unixTime, unixTimeString := fasttime.Now().UnixMilliString() 192 if unixTime > httpRequestTimestamp { 193 atomic.StoreInt32(&httpRequestId, 1_000_000) 194 httpRequestTimestamp = unixTime 195 } 196 197 // timestamp + nodeId + requestId 198 return unixTimeString + teaconst.NodeIdString + strconv.Itoa(int(atomic.AddInt32(&httpRequestId, 1))) 199 } 200 201 // 检查是否可以接受某个编码 202 func httpAcceptEncoding(acceptEncodings string, encoding string) bool { 203 if len(acceptEncodings) == 0 { 204 return false 205 } 206 var pieces = strings.Split(acceptEncodings, ",") 207 for _, piece := range pieces { 208 var qualityIndex = strings.Index(piece, ";") 209 if qualityIndex >= 0 { 210 piece = piece[:qualityIndex] 211 } 212 213 if strings.TrimSpace(piece) == encoding { 214 return true 215 } 216 } 217 return false 218 } 219 220 // 跳转到某个URL 221 func httpRedirect(writer http.ResponseWriter, req *http.Request, url string, code int) { 222 if len(writer.Header().Get("Content-Type")) == 0 { 223 // 设置Content-Type,是为了让页面不输出链接 224 writer.Header().Set("Content-Type", "text/html; charset=utf-8") 225 } 226 227 http.Redirect(writer, req, url, code) 228 } 229 230 // 分析URL中的Host部分 231 func httpParseHost(urlString string) (host string, err error) { 232 if !urlSchemeRegexp.MatchString(urlString) { 233 urlString = "https://" + urlString 234 } 235 236 u, err := url.Parse(urlString) 237 if err != nil && u != nil { 238 return "", err 239 } 240 return u.Host, nil 241 }