github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/redact.go (about) 1 /* 2 * Copyright (c) 2022, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package common 21 22 import ( 23 std_errors "errors" 24 "net/url" 25 "path/filepath" 26 "regexp" 27 "strings" 28 ) 29 30 // RedactURLError transforms an error, when it is a url.Error, removing 31 // the URL value. This is to avoid logging private user data in cases 32 // where the URL may be a user input value. 33 // This function is used with errors returned by net/http and net/url, 34 // which are (currently) of type url.Error. In particular, the round trip 35 // function used by our HttpProxy, http.Client.Do, returns errors of type 36 // url.Error, with the URL being the url sent from the user's tunneled 37 // applications: 38 // https://github.com/golang/go/blob/release-branch.go1.4/src/net/http/client.go#L394 39 func RedactURLError(err error) error { 40 if urlErr, ok := err.(*url.Error); ok { 41 err = &url.Error{ 42 Op: urlErr.Op, 43 URL: "", 44 Err: urlErr.Err, 45 } 46 } 47 return err 48 } 49 50 var redactIPAddressAndPortRegex = regexp.MustCompile( 51 // IP address 52 `(` + 53 // IPv4 54 // 55 // An IPv4 address can also be represented as an unsigned integer, or with 56 // octal or with hex octet values, but we do not check for any of these 57 // uncommon representations as some may match non-IP values and we don't 58 // expect the "net" package, etc., to emit them.) 59 60 `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|` + 61 62 // IPv6 63 // 64 // Optional brackets for IPv6 with port 65 `\[?` + 66 `(` + 67 // Uncompressed IPv6; ensure there are 8 segments to avoid matching, e.g., a 68 // timestamp 69 `(([a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4})|` + 70 // Compressed IPv6 71 `([a-fA-F0-9:]*::[a-fA-F0-9:]+)|([a-fA-F0-9:]+::[a-fA-F0-9:]*)` + 72 `)` + 73 // Optional mapped/translated/embeded IPv4 suffix 74 `(.\d{1,3}\.\d{1,3}\.\d{1,3})?` + 75 `\]?` + 76 `)` + 77 78 // Optional port number 79 `(:\d+)?`) 80 81 // RedactIPAddresses returns a copy of the input with all IP addresses (and 82 // optional ports) replaced by "[redacted]". This is intended to be used to 83 // redact addresses from "net" package I/O error messages and otherwise avoid 84 // inadvertently recording direct server IPs via error message logs; and, in 85 // metrics, to reduce the error space due to superfluous source port data. 86 // 87 // RedactIPAddresses uses a simple regex match which liberally matches IP 88 // address-like patterns and will match invalid addresses; for example, it 89 // will match port numbers greater than 65535. We err on the side of redaction 90 // and are not as concerned, in this context, with false positive matches. If 91 // a user configures an upstream proxy address with an invalid IP or port 92 // value, we prefer to redact it. 93 // 94 // See the redactIPAddressAndPortRegex comment for some uncommon IP address 95 // representations that are not matched. 96 func RedactIPAddresses(b []byte) []byte { 97 return redactIPAddressAndPortRegex.ReplaceAll(b, []byte("[redacted]")) 98 } 99 100 // RedactIPAddressesString is RedactIPAddresses for strings. 101 func RedactIPAddressesString(s string) string { 102 return redactIPAddressAndPortRegex.ReplaceAllString(s, "[redacted]") 103 } 104 105 // EscapeRedactIPAddressString escapes the IP or IP:port addresses in the 106 // input in such a way that they won't be redacted when part of the input to 107 // RedactIPAddresses. 108 // 109 // The escape encoding is not guaranteed to be reversable or suitable for 110 // machine processing; the goal is to simply ensure the original value is 111 // human readable. 112 func EscapeRedactIPAddressString(address string) string { 113 address = strings.ReplaceAll(address, ".", "\\.") 114 address = strings.ReplaceAll(address, ":", "\\:") 115 return address 116 } 117 118 var redactFilePathRegex = regexp.MustCompile( 119 // File path 120 `(` + 121 // Leading characters 122 `[^ ]*` + 123 // At least one path separator 124 `/` + 125 // Path component; take until next space 126 `[^ ]*` + 127 `)+`) 128 129 // RedactFilePaths returns a copy of the input with all file paths 130 // replaced by "[redacted]". First any occurrences of the provided file paths 131 // are replaced and then an attempt is made to replace any other file paths by 132 // searching with a heuristic. The latter is a best effort attempt it is not 133 // guaranteed that it will catch every file path. 134 func RedactFilePaths(s string, filePaths ...string) string { 135 for _, filePath := range filePaths { 136 s = strings.ReplaceAll(s, filePath, "[redacted]") 137 } 138 return redactFilePathRegex.ReplaceAllLiteralString(filepath.ToSlash(s), "[redacted]") 139 } 140 141 // RedactFilePathsError is RedactFilePaths for errors. 142 func RedactFilePathsError(err error, filePaths ...string) error { 143 return std_errors.New(RedactFilePaths(err.Error(), filePaths...)) 144 } 145 146 // RedactNetError removes network address information from a "net" package 147 // error message. Addresses may be domains or IP addresses. 148 // 149 // Limitations: some non-address error context can be lost; this function 150 // makes assumptions about how the Go "net" package error messages are 151 // formatted and will fail to redact network addresses if this assumptions 152 // become untrue. 153 func RedactNetError(err error) error { 154 155 // Example "net" package error messages: 156 // 157 // - lookup <domain>: no such host 158 // - lookup <domain>: No address associated with hostname 159 // - dial tcp <address>: connectex: No connection could be made because the target machine actively refused it 160 // - write tcp <address>-><address>: write: connection refused 161 162 if err == nil { 163 return err 164 } 165 166 errstr := err.Error() 167 index := strings.Index(errstr, ": ") 168 if index == -1 { 169 return err 170 } 171 172 return std_errors.New("[redacted]" + errstr[index:]) 173 }