github.com/afumu/libc@v0.0.6/musl/src/regex/fnmatch.c (about)

     1  /*
     2   * An implementation of what I call the "Sea of Stars" algorithm for
     3   * POSIX fnmatch(). The basic idea is that we factor the pattern into
     4   * a head component (which we match first and can reject without ever
     5   * measuring the length of the string), an optional tail component
     6   * (which only exists if the pattern contains at least one star), and
     7   * an optional "sea of stars", a set of star-separated components
     8   * between the head and tail. After the head and tail matches have
     9   * been removed from the input string, the components in the "sea of
    10   * stars" are matched sequentially by searching for their first
    11   * occurrence past the end of the previous match.
    12   *
    13   * - Rich Felker, April 2012
    14   */
    15  
    16  #include <string.h>
    17  #include <fnmatch.h>
    18  #include <stdlib.h>
    19  #include <wchar.h>
    20  #include <wctype.h>
    21  #include "locale_impl.h"
    22  
    23  #define END 0
    24  #define UNMATCHABLE -2
    25  #define BRACKET -3
    26  #define QUESTION -4
    27  #define STAR -5
    28  
    29  static int str_next(const char *str, size_t n, size_t *step)
    30  {
    31  	if (!n) {
    32  		*step = 0;
    33  		return 0;
    34  	}
    35  	if (str[0] >= 128U) {
    36  		wchar_t wc;
    37  		int k = mbtowc(&wc, str, n);
    38  		if (k<0) {
    39  			*step = 1;
    40  			return -1;
    41  		}
    42  		*step = k;
    43  		return wc;
    44  	}
    45  	*step = 1;
    46  	return str[0];
    47  }
    48  
    49  static int pat_next(const char *pat, size_t m, size_t *step, int flags)
    50  {
    51  	int esc = 0;
    52  	if (!m || !*pat) {
    53  		*step = 0;
    54  		return END;
    55  	}
    56  	*step = 1;
    57  	if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
    58  		*step = 2;
    59  		pat++;
    60  		esc = 1;
    61  		goto escaped;
    62  	}
    63  	if (pat[0]=='[') {
    64  		size_t k = 1;
    65  		if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
    66  		if (k<m) if (pat[k] == ']') k++;
    67  		for (; k<m && pat[k] && pat[k]!=']'; k++) {
    68  			if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
    69  				int z = pat[k+1];
    70  				k+=2;
    71  				if (k<m && pat[k]) k++;
    72  				while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
    73  				if (k==m || !pat[k]) break;
    74  			}
    75  		}
    76  		if (k==m || !pat[k]) {
    77  			*step = 1;
    78  			return '[';
    79  		}
    80  		*step = k+1;
    81  		return BRACKET;
    82  	}
    83  	if (pat[0] == '*')
    84  		return STAR;
    85  	if (pat[0] == '?')
    86  		return QUESTION;
    87  escaped:
    88  	if (pat[0] >= 128U) {
    89  		wchar_t wc;
    90  		int k = mbtowc(&wc, pat, m);
    91  		if (k<0) {
    92  			*step = 0;
    93  			return UNMATCHABLE;
    94  		}
    95  		*step = k + esc;
    96  		return wc;
    97  	}
    98  	return pat[0];
    99  }
   100  
   101  static int casefold(int k)
   102  {
   103  	int c = towupper(k);
   104  	return c == k ? towlower(k) : c;
   105  }
   106  
   107  static int match_bracket(const char *p, int k, int kfold)
   108  {
   109  	wchar_t wc;
   110  	int inv = 0;
   111  	p++;
   112  	if (*p=='^' || *p=='!') {
   113  		inv = 1;
   114  		p++;
   115  	}
   116  	if (*p==']') {
   117  		if (k==']') return !inv;
   118  		p++;
   119  	} else if (*p=='-') {
   120  		if (k=='-') return !inv;
   121  		p++;
   122  	}
   123  	wc = p[-1];
   124  	for (; *p != ']'; p++) {
   125  		if (p[0]=='-' && p[1]!=']') {
   126  			wchar_t wc2;
   127  			int l = mbtowc(&wc2, p+1, 4);
   128  			if (l < 0) return 0;
   129  			if (wc <= wc2)
   130  				if ((unsigned)k-wc <= wc2-wc ||
   131  				    (unsigned)kfold-wc <= wc2-wc)
   132  					return !inv;
   133  			p += l-1;
   134  			continue;
   135  		}
   136  		if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
   137  			const char *p0 = p+2;
   138  			int z = p[1];
   139  			p+=3;
   140  			while (p[-1]!=z || p[0]!=']') p++;
   141  			if (z == ':' && p-1-p0 < 16) {
   142  				char buf[16];
   143  				memcpy(buf, p0, p-1-p0);
   144  				buf[p-1-p0] = 0;
   145  				if (iswctype(k, wctype(buf)) ||
   146  				    iswctype(kfold, wctype(buf)))
   147  					return !inv;
   148  			}
   149  			continue;
   150  		}
   151  		if (*p < 128U) {
   152  			wc = (unsigned char)*p;
   153  		} else {
   154  			int l = mbtowc(&wc, p, 4);
   155  			if (l < 0) return 0;
   156  			p += l-1;
   157  		}
   158  		if (wc==k || wc==kfold) return !inv;
   159  	}
   160  	return inv;
   161  }
   162  
   163  static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
   164  {
   165  	const char *p, *ptail, *endpat;
   166  	const char *s, *stail, *endstr;
   167  	size_t pinc, sinc, tailcnt=0;
   168  	int c, k, kfold;
   169  
   170  	if (flags & FNM_PERIOD) {
   171  		if (*str == '.' && *pat != '.')
   172  			return FNM_NOMATCH;
   173  	}
   174  	for (;;) {
   175  		switch ((c = pat_next(pat, m, &pinc, flags))) {
   176  		case UNMATCHABLE:
   177  			return FNM_NOMATCH;
   178  		case STAR:
   179  			pat++;
   180  			m--;
   181  			break;
   182  		default:
   183  			k = str_next(str, n, &sinc);
   184  			if (k <= 0)
   185  				return (c==END) ? 0 : FNM_NOMATCH;
   186  			str += sinc;
   187  			n -= sinc;
   188  			kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
   189  			if (c == BRACKET) {
   190  				if (!match_bracket(pat, k, kfold))
   191  					return FNM_NOMATCH;
   192  			} else if (c != QUESTION && k != c && kfold != c) {
   193  				return FNM_NOMATCH;
   194  			}
   195  			pat+=pinc;
   196  			m-=pinc;
   197  			continue;
   198  		}
   199  		break;
   200  	}
   201  
   202  	/* Compute real pat length if it was initially unknown/-1 */
   203  	m = strnlen(pat, m);
   204  	endpat = pat + m;
   205  
   206  	/* Find the last * in pat and count chars needed after it */
   207  	for (p=ptail=pat; p<endpat; p+=pinc) {
   208  		switch (pat_next(p, endpat-p, &pinc, flags)) {
   209  		case UNMATCHABLE:
   210  			return FNM_NOMATCH;
   211  		case STAR:
   212  			tailcnt=0;
   213  			ptail = p+1;
   214  			break;
   215  		default:
   216  			tailcnt++;
   217  			break;
   218  		}
   219  	}
   220  
   221  	/* Past this point we need not check for UNMATCHABLE in pat,
   222  	 * because all of pat has already been parsed once. */
   223  
   224  	/* Compute real str length if it was initially unknown/-1 */
   225  	n = strnlen(str, n);
   226  	endstr = str + n;
   227  	if (n < tailcnt) return FNM_NOMATCH;
   228  
   229  	/* Find the final tailcnt chars of str, accounting for UTF-8.
   230  	 * On illegal sequences we may get it wrong, but in that case
   231  	 * we necessarily have a matching failure anyway. */
   232  	for (s=endstr; s>str && tailcnt; tailcnt--) {
   233  		if (s[-1] < 128U || MB_CUR_MAX==1) s--;
   234  		else while ((unsigned char)*--s-0x80U<0x40 && s>str);
   235  	}
   236  	if (tailcnt) return FNM_NOMATCH;
   237  	stail = s;
   238  
   239  	/* Check that the pat and str tails match */
   240  	p = ptail;
   241  	for (;;) {
   242  		c = pat_next(p, endpat-p, &pinc, flags);
   243  		p += pinc;
   244  		if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
   245  			if (c != END) return FNM_NOMATCH;
   246  			break;
   247  		}
   248  		s += sinc;
   249  		kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
   250  		if (c == BRACKET) {
   251  			if (!match_bracket(p-pinc, k, kfold))
   252  				return FNM_NOMATCH;
   253  		} else if (c != QUESTION && k != c && kfold != c) {
   254  			return FNM_NOMATCH;
   255  		}
   256  	}
   257  
   258  	/* We're all done with the tails now, so throw them out */
   259  	endstr = stail;
   260  	endpat = ptail;
   261  
   262  	/* Match pattern components until there are none left */
   263  	while (pat<endpat) {
   264  		p = pat;
   265  		s = str;
   266  		for (;;) {
   267  			c = pat_next(p, endpat-p, &pinc, flags);
   268  			p += pinc;
   269  			/* Encountering * completes/commits a component */
   270  			if (c == STAR) {
   271  				pat = p;
   272  				str = s;
   273  				break;
   274  			}
   275  			k = str_next(s, endstr-s, &sinc);
   276  			if (!k)
   277  				return FNM_NOMATCH;
   278  			kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
   279  			if (c == BRACKET) {
   280  				if (!match_bracket(p-pinc, k, kfold))
   281  					break;
   282  			} else if (c != QUESTION && k != c && kfold != c) {
   283  				break;
   284  			}
   285  			s += sinc;
   286  		}
   287  		if (c == STAR) continue;
   288  		/* If we failed, advance str, by 1 char if it's a valid
   289  		 * char, or past all invalid bytes otherwise. */
   290  		k = str_next(str, endstr-str, &sinc);
   291  		if (k > 0) str += sinc;
   292  		else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
   293  	}
   294  
   295  	return 0;
   296  }
   297  
   298  int fnmatch(const char *pat, const char *str, int flags)
   299  {
   300  	const char *s, *p;
   301  	size_t inc;
   302  	int c;
   303  	if (flags & FNM_PATHNAME) for (;;) {
   304  		for (s=str; *s && *s!='/'; s++);
   305  		for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
   306  		if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
   307  			return FNM_NOMATCH;
   308  		if (fnmatch_internal(pat, p-pat, str, s-str, flags))
   309  			return FNM_NOMATCH;
   310  		if (!c) return 0;
   311  		str = s+1;
   312  		pat = p+inc;
   313  	} else if (flags & FNM_LEADING_DIR) {
   314  		for (s=str; *s; s++) {
   315  			if (*s != '/') continue;
   316  			if (!fnmatch_internal(pat, -1, str, s-str, flags))
   317  				return 0;
   318  		}
   319  	}
   320  	return fnmatch_internal(pat, -1, str, -1, flags);
   321  }