github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/util/sanitize.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package util
     7  
     8  import (
     9  	"bytes"
    10  	"unicode"
    11  
    12  	"github.com/yuin/goldmark/util"
    13  )
    14  
    15  type sanitizedError struct {
    16  	err error
    17  }
    18  
    19  func (err sanitizedError) Error() string {
    20  	return SanitizeCredentialURLs(err.err.Error())
    21  }
    22  
    23  func (err sanitizedError) Unwrap() error {
    24  	return err.err
    25  }
    26  
    27  // SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
    28  func SanitizeErrorCredentialURLs(err error) error {
    29  	return sanitizedError{err: err}
    30  }
    31  
    32  const userPlaceholder = "sanitized-credential"
    33  
    34  var schemeSep = []byte("://")
    35  
    36  // SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
    37  func SanitizeCredentialURLs(s string) string {
    38  	bs := util.StringToReadOnlyBytes(s)
    39  	schemeSepPos := bytes.Index(bs, schemeSep)
    40  	if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
    41  		return s // fast return if there is no URL scheme or no userinfo
    42  	}
    43  	out := make([]byte, 0, len(bs)+len(userPlaceholder))
    44  	for schemeSepPos != -1 {
    45  		schemeSepPos += 3         // skip the "://"
    46  		sepAtPos := -1            // the possible '@' position: "https://foo@[^here]host"
    47  		sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
    48  	sepLoop:
    49  		for ; sepEndPos < len(bs); sepEndPos++ {
    50  			c := bs[sepEndPos]
    51  			if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
    52  				continue
    53  			}
    54  			switch c {
    55  			case '@':
    56  				sepAtPos = sepEndPos
    57  			case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
    58  				continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
    59  			default:
    60  				break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
    61  			}
    62  		}
    63  		// if there is '@', and the string is like "s://u@h", then hide the "u" part
    64  		if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
    65  			out = append(out, bs[:schemeSepPos]...)
    66  			out = append(out, userPlaceholder...)
    67  			out = append(out, bs[sepAtPos:sepEndPos]...)
    68  		} else {
    69  			out = append(out, bs[:sepEndPos]...)
    70  		}
    71  		bs = bs[sepEndPos:]
    72  		schemeSepPos = bytes.Index(bs, schemeSep)
    73  	}
    74  	out = append(out, bs...)
    75  	return util.BytesToReadOnlyString(out)
    76  }