github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/redact.go (about)

     1  /*
     2   * Copyright (c) 2022, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package common
    21  
    22  import (
    23  	std_errors "errors"
    24  	"net/url"
    25  	"path/filepath"
    26  	"regexp"
    27  	"strings"
    28  )
    29  
    30  // RedactURLError transforms an error, when it is a url.Error, removing
    31  // the URL value. This is to avoid logging private user data in cases
    32  // where the URL may be a user input value.
    33  // This function is used with errors returned by net/http and net/url,
    34  // which are (currently) of type url.Error. In particular, the round trip
    35  // function used by our HttpProxy, http.Client.Do, returns errors of type
    36  // url.Error, with the URL being the url sent from the user's tunneled
    37  // applications:
    38  // https://github.com/golang/go/blob/release-branch.go1.4/src/net/http/client.go#L394
    39  func RedactURLError(err error) error {
    40  	if urlErr, ok := err.(*url.Error); ok {
    41  		err = &url.Error{
    42  			Op:  urlErr.Op,
    43  			URL: "",
    44  			Err: urlErr.Err,
    45  		}
    46  	}
    47  	return err
    48  }
    49  
    50  var redactIPAddressAndPortRegex = regexp.MustCompile(
    51  	// IP address
    52  	`(` +
    53  		// IPv4
    54  		//
    55  		// An IPv4 address can also be represented as an unsigned integer, or with
    56  		// octal or with hex octet values, but we do not check for any of these
    57  		// uncommon representations as some may match non-IP values and we don't
    58  		// expect the "net" package, etc., to emit them.)
    59  
    60  		`\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|` +
    61  
    62  		// IPv6
    63  		//
    64  		// Optional brackets for IPv6 with port
    65  		`\[?` +
    66  		`(` +
    67  		// Uncompressed IPv6; ensure there are 8 segments to avoid matching, e.g., a
    68  		// timestamp
    69  		`(([a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4})|` +
    70  		// Compressed IPv6
    71  		`([a-fA-F0-9:]*::[a-fA-F0-9:]+)|([a-fA-F0-9:]+::[a-fA-F0-9:]*)` +
    72  		`)` +
    73  		// Optional mapped/translated/embeded IPv4 suffix
    74  		`(.\d{1,3}\.\d{1,3}\.\d{1,3})?` +
    75  		`\]?` +
    76  		`)` +
    77  
    78  		// Optional port number
    79  		`(:\d+)?`)
    80  
    81  // RedactIPAddresses returns a copy of the input with all IP addresses (and
    82  // optional ports) replaced by "[redacted]". This is intended to be used to
    83  // redact addresses from "net" package I/O error messages and otherwise avoid
    84  // inadvertently recording direct server IPs via error message logs; and, in
    85  // metrics, to reduce the error space due to superfluous source port data.
    86  //
    87  // RedactIPAddresses uses a simple regex match which liberally matches IP
    88  // address-like patterns and will match invalid addresses; for example, it
    89  // will match port numbers greater than 65535. We err on the side of redaction
    90  // and are not as concerned, in this context, with false positive matches. If
    91  // a user configures an upstream proxy address with an invalid IP or port
    92  // value, we prefer to redact it.
    93  //
    94  // See the redactIPAddressAndPortRegex comment for some uncommon IP address
    95  // representations that are not matched.
    96  func RedactIPAddresses(b []byte) []byte {
    97  	return redactIPAddressAndPortRegex.ReplaceAll(b, []byte("[redacted]"))
    98  }
    99  
   100  // RedactIPAddressesString is RedactIPAddresses for strings.
   101  func RedactIPAddressesString(s string) string {
   102  	return redactIPAddressAndPortRegex.ReplaceAllString(s, "[redacted]")
   103  }
   104  
   105  // EscapeRedactIPAddressString escapes the IP or IP:port addresses in the
   106  // input in such a way that they won't be redacted when part of the input to
   107  // RedactIPAddresses.
   108  //
   109  // The escape encoding is not guaranteed to be reversable or suitable for
   110  // machine processing; the goal is to simply ensure the original value is
   111  // human readable.
   112  func EscapeRedactIPAddressString(address string) string {
   113  	address = strings.ReplaceAll(address, ".", "\\.")
   114  	address = strings.ReplaceAll(address, ":", "\\:")
   115  	return address
   116  }
   117  
   118  var redactFilePathRegex = regexp.MustCompile(
   119  	// File path
   120  	`(` +
   121  		// Leading characters
   122  		`[^ ]*` +
   123  		// At least one path separator
   124  		`/` +
   125  		// Path component; take until next space
   126  		`[^ ]*` +
   127  		`)+`)
   128  
   129  // RedactFilePaths returns a copy of the input with all file paths
   130  // replaced by "[redacted]". First any occurrences of the provided file paths
   131  // are replaced and then an attempt is made to replace any other file paths by
   132  // searching with a heuristic. The latter is a best effort attempt it is not
   133  // guaranteed that it will catch every file path.
   134  func RedactFilePaths(s string, filePaths ...string) string {
   135  	for _, filePath := range filePaths {
   136  		s = strings.ReplaceAll(s, filePath, "[redacted]")
   137  	}
   138  	return redactFilePathRegex.ReplaceAllLiteralString(filepath.ToSlash(s), "[redacted]")
   139  }
   140  
   141  // RedactFilePathsError is RedactFilePaths for errors.
   142  func RedactFilePathsError(err error, filePaths ...string) error {
   143  	return std_errors.New(RedactFilePaths(err.Error(), filePaths...))
   144  }
   145  
   146  // RedactNetError removes network address information from a "net" package
   147  // error message. Addresses may be domains or IP addresses.
   148  //
   149  // Limitations: some non-address error context can be lost; this function
   150  // makes assumptions about how the Go "net" package error messages are
   151  // formatted and will fail to redact network addresses if this assumptions
   152  // become untrue.
   153  func RedactNetError(err error) error {
   154  
   155  	// Example "net" package error messages:
   156  	//
   157  	// - lookup <domain>: no such host
   158  	// - lookup <domain>: No address associated with hostname
   159  	// - dial tcp <address>: connectex: No connection could be made because the target machine actively refused it
   160  	// - write tcp <address>-><address>: write: connection refused
   161  
   162  	if err == nil {
   163  		return err
   164  	}
   165  
   166  	errstr := err.Error()
   167  	index := strings.Index(errstr, ": ")
   168  	if index == -1 {
   169  		return err
   170  	}
   171  
   172  	return std_errors.New("[redacted]" + errstr[index:])
   173  }