| //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a glob pattern matcher. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Support/GlobPattern.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/Errc.h" |
| |
| using namespace llvm; |
| |
| static bool hasWildcard(StringRef S) { |
| return S.find_first_of("?*[\\") != StringRef::npos; |
| } |
| |
| // Expands character ranges and returns a bitmap. |
| // For example, "a-cf-hz" is expanded to "abcfghz". |
| static Expected<BitVector> expand(StringRef S, StringRef Original) { |
| BitVector BV(256, false); |
| |
| // Expand X-Y. |
| for (;;) { |
| if (S.size() < 3) |
| break; |
| |
| uint8_t Start = S[0]; |
| uint8_t End = S[2]; |
| |
| // If it doesn't start with something like X-Y, |
| // consume the first character and proceed. |
| if (S[1] != '-') { |
| BV[Start] = true; |
| S = S.substr(1); |
| continue; |
| } |
| |
| // It must be in the form of X-Y. |
| // Validate it and then interpret the range. |
| if (Start > End) |
| return make_error<StringError>("invalid glob pattern: " + Original, |
| errc::invalid_argument); |
| |
| for (int C = Start; C <= End; ++C) |
| BV[(uint8_t)C] = true; |
| S = S.substr(3); |
| } |
| |
| for (char C : S) |
| BV[(uint8_t)C] = true; |
| return BV; |
| } |
| |
| // This is a scanner for the glob pattern. |
| // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]" |
| // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is |
| // equivalent to "[^<chars>]"), or a non-meta character. |
| // This function returns the first token in S. |
| static Expected<BitVector> scan(StringRef &S, StringRef Original) { |
| switch (S[0]) { |
| case '*': |
| S = S.substr(1); |
| // '*' is represented by an empty bitvector. |
| // All other bitvectors are 256-bit long. |
| return BitVector(); |
| case '?': |
| S = S.substr(1); |
| return BitVector(256, true); |
| case '[': { |
| // ']' is allowed as the first character of a character class. '[]' is |
| // invalid. So, just skip the first character. |
| size_t End = S.find(']', 2); |
| if (End == StringRef::npos) |
| return make_error<StringError>("invalid glob pattern: " + Original, |
| errc::invalid_argument); |
| |
| StringRef Chars = S.substr(1, End - 1); |
| S = S.substr(End + 1); |
| if (Chars.startswith("^") || Chars.startswith("!")) { |
| Expected<BitVector> BV = expand(Chars.substr(1), Original); |
| if (!BV) |
| return BV.takeError(); |
| return BV->flip(); |
| } |
| return expand(Chars, Original); |
| } |
| case '\\': |
| // Eat this character and fall through below to treat it like a non-meta |
| // character. |
| S = S.substr(1); |
| LLVM_FALLTHROUGH; |
| default: |
| BitVector BV(256, false); |
| BV[(uint8_t)S[0]] = true; |
| S = S.substr(1); |
| return BV; |
| } |
| } |
| |
| Expected<GlobPattern> GlobPattern::create(StringRef S) { |
| GlobPattern Pat; |
| |
| // S doesn't contain any metacharacter, |
| // so the regular string comparison should work. |
| if (!hasWildcard(S)) { |
| Pat.Exact = S; |
| return Pat; |
| } |
| |
| // S is something like "foo*", and the "* is not escaped. We can use |
| // startswith(). |
| if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { |
| Pat.Prefix = S.drop_back(); |
| return Pat; |
| } |
| |
| // S is something like "*foo". We can use endswith(). |
| if (S.startswith("*") && !hasWildcard(S.drop_front())) { |
| Pat.Suffix = S.drop_front(); |
| return Pat; |
| } |
| |
| // Otherwise, we need to do real glob pattern matching. |
| // Parse the pattern now. |
| StringRef Original = S; |
| while (!S.empty()) { |
| Expected<BitVector> BV = scan(S, Original); |
| if (!BV) |
| return BV.takeError(); |
| Pat.Tokens.push_back(*BV); |
| } |
| return Pat; |
| } |
| |
| bool GlobPattern::match(StringRef S) const { |
| if (Exact) |
| return S == *Exact; |
| if (Prefix) |
| return S.startswith(*Prefix); |
| if (Suffix) |
| return S.endswith(*Suffix); |
| return matchOne(Tokens, S); |
| } |
| |
| // Runs glob pattern Pats against string S. |
| bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { |
| for (;;) { |
| if (Pats.empty()) |
| return S.empty(); |
| |
| // If Pats[0] is '*', try to match Pats[1..] against all possible |
| // tail strings of S to see at least one pattern succeeds. |
| if (Pats[0].size() == 0) { |
| Pats = Pats.slice(1); |
| if (Pats.empty()) |
| // Fast path. If a pattern is '*', it matches anything. |
| return true; |
| for (size_t I = 0, E = S.size(); I < E; ++I) |
| if (matchOne(Pats, S.substr(I))) |
| return true; |
| return false; |
| } |
| |
| // If Pats[0] is not '*', it must consume one character. |
| if (S.empty() || !Pats[0][(uint8_t)S[0]]) |
| return false; |
| Pats = Pats.slice(1); |
| S = S.substr(1); |
| } |
| } |