code.gitea.io/gitea@v1.19.3/modules/util/sanitize.go (about)

     1  // Copyright 2021 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package util
     5  
     6  import (
     7  	"bytes"
     8  	"unicode"
     9  
    10  	"github.com/yuin/goldmark/util"
    11  )
    12  
    13  type sanitizedError struct {
    14  	err error
    15  }
    16  
    17  func (err sanitizedError) Error() string {
    18  	return SanitizeCredentialURLs(err.err.Error())
    19  }
    20  
    21  func (err sanitizedError) Unwrap() error {
    22  	return err.err
    23  }
    24  
    25  // SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
    26  func SanitizeErrorCredentialURLs(err error) error {
    27  	return sanitizedError{err: err}
    28  }
    29  
    30  const userPlaceholder = "sanitized-credential"
    31  
    32  var schemeSep = []byte("://")
    33  
    34  // SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
    35  func SanitizeCredentialURLs(s string) string {
    36  	bs := util.StringToReadOnlyBytes(s)
    37  	schemeSepPos := bytes.Index(bs, schemeSep)
    38  	if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
    39  		return s // fast return if there is no URL scheme or no userinfo
    40  	}
    41  	out := make([]byte, 0, len(bs)+len(userPlaceholder))
    42  	for schemeSepPos != -1 {
    43  		schemeSepPos += 3         // skip the "://"
    44  		sepAtPos := -1            // the possible '@' position: "https://foo@[^here]host"
    45  		sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
    46  	sepLoop:
    47  		for ; sepEndPos < len(bs); sepEndPos++ {
    48  			c := bs[sepEndPos]
    49  			if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
    50  				continue
    51  			}
    52  			switch c {
    53  			case '@':
    54  				sepAtPos = sepEndPos
    55  			case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
    56  				continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
    57  			default:
    58  				break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
    59  			}
    60  		}
    61  		// if there is '@', and the string is like "s://u@h", then hide the "u" part
    62  		if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
    63  			out = append(out, bs[:schemeSepPos]...)
    64  			out = append(out, userPlaceholder...)
    65  			out = append(out, bs[sepAtPos:sepEndPos]...)
    66  		} else {
    67  			out = append(out, bs[:sepEndPos]...)
    68  		}
    69  		bs = bs[sepEndPos:]
    70  		schemeSepPos = bytes.Index(bs, schemeSep)
    71  	}
    72  	out = append(out, bs...)
    73  	return util.BytesToReadOnlyString(out)
    74  }