blob: 0eb1802bdb53c5d8594e06a08cfe4d0e5f8f6a94 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001"""Filename matching with shell patterns.
2
3fnmatch(FILENAME, PATTERN) matches according to the local convention.
4fnmatchcase(FILENAME, PATTERN) always takes case in account.
5
6The functions operate by translating the pattern into a regular
7expression. They cache the compiled regular expressions for speed.
8
9The function translate(PATTERN) returns a regular expression
10corresponding to PATTERN. (It does not compile it.)
11"""
12import os
13import posixpath
14import re
15import functools
16
17__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
18
19# Build a thread-safe incrementing counter to help create unique regexp group
20# names across calls.
21from itertools import count
22_nextgroupnum = count().__next__
23del count
24
25def fnmatch(name, pat):
26 """Test whether FILENAME matches PATTERN.
27
28 Patterns are Unix shell style:
29
30 * matches everything
31 ? matches any single character
32 [seq] matches any character in seq
33 [!seq] matches any char not in seq
34
35 An initial period in FILENAME is not special.
36 Both FILENAME and PATTERN are first case-normalized
37 if the operating system requires it.
38 If you don't want this, use fnmatchcase(FILENAME, PATTERN).
39 """
40 name = os.path.normcase(name)
41 pat = os.path.normcase(pat)
42 return fnmatchcase(name, pat)
43
44@functools.lru_cache(maxsize=256, typed=True)
45def _compile_pattern(pat):
46 if isinstance(pat, bytes):
47 pat_str = str(pat, 'ISO-8859-1')
48 res_str = translate(pat_str)
49 res = bytes(res_str, 'ISO-8859-1')
50 else:
51 res = translate(pat)
52 return re.compile(res).match
53
54def filter(names, pat):
55 """Return the subset of the list NAMES that match PAT."""
56 result = []
57 pat = os.path.normcase(pat)
58 match = _compile_pattern(pat)
59 if os.path is posixpath:
60 # normcase on posix is NOP. Optimize it away from the loop.
61 for name in names:
62 if match(name):
63 result.append(name)
64 else:
65 for name in names:
66 if match(os.path.normcase(name)):
67 result.append(name)
68 return result
69
70def fnmatchcase(name, pat):
71 """Test whether FILENAME matches PATTERN, including case.
72
73 This is a version of fnmatch() which doesn't case-normalize
74 its arguments.
75 """
76 match = _compile_pattern(pat)
77 return match(name) is not None
78
79
80def translate(pat):
81 """Translate a shell PATTERN to a regular expression.
82
83 There is no way to quote meta-characters.
84 """
85
86 STAR = object()
87 res = []
88 add = res.append
89 i, n = 0, len(pat)
90 while i < n:
91 c = pat[i]
92 i = i+1
93 if c == '*':
94 # compress consecutive `*` into one
95 if (not res) or res[-1] is not STAR:
96 add(STAR)
97 elif c == '?':
98 add('.')
99 elif c == '[':
100 j = i
101 if j < n and pat[j] == '!':
102 j = j+1
103 if j < n and pat[j] == ']':
104 j = j+1
105 while j < n and pat[j] != ']':
106 j = j+1
107 if j >= n:
108 add('\\[')
109 else:
110 stuff = pat[i:j]
111 if '--' not in stuff:
112 stuff = stuff.replace('\\', r'\\')
113 else:
114 chunks = []
115 k = i+2 if pat[i] == '!' else i+1
116 while True:
117 k = pat.find('-', k, j)
118 if k < 0:
119 break
120 chunks.append(pat[i:k])
121 i = k+1
122 k = k+3
123 chunks.append(pat[i:j])
124 # Escape backslashes and hyphens for set difference (--).
125 # Hyphens that create ranges shouldn't be escaped.
126 stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
127 for s in chunks)
128 # Escape set operations (&&, ~~ and ||).
129 stuff = re.sub(r'([&~|])', r'\\\1', stuff)
130 i = j+1
131 if stuff[0] == '!':
132 stuff = '^' + stuff[1:]
133 elif stuff[0] in ('^', '['):
134 stuff = '\\' + stuff
135 add(f'[{stuff}]')
136 else:
137 add(re.escape(c))
138 assert i == n
139
140 # Deal with STARs.
141 inp = res
142 res = []
143 add = res.append
144 i, n = 0, len(inp)
145 # Fixed pieces at the start?
146 while i < n and inp[i] is not STAR:
147 add(inp[i])
148 i += 1
149 # Now deal with STAR fixed STAR fixed ...
150 # For an interior `STAR fixed` pairing, we want to do a minimal
151 # .*? match followed by `fixed`, with no possibility of backtracking.
152 # We can't spell that directly, but can trick it into working by matching
153 # .*?fixed
154 # in a lookahead assertion, save the matched part in a group, then
155 # consume that group via a backreference. If the overall match fails,
156 # the lookahead assertion won't try alternatives. So the translation is:
157 # (?=(?P<name>.*?fixed))(?P=name)
158 # Group names are created as needed: g0, g1, g2, ...
159 # The numbers are obtained from _nextgroupnum() to ensure they're unique
160 # across calls and across threads. This is because people rely on the
161 # undocumented ability to join multiple translate() results together via
162 # "|" to build large regexps matching "one of many" shell patterns.
163 while i < n:
164 assert inp[i] is STAR
165 i += 1
166 if i == n:
167 add(".*")
168 break
169 assert inp[i] is not STAR
170 fixed = []
171 while i < n and inp[i] is not STAR:
172 fixed.append(inp[i])
173 i += 1
174 fixed = "".join(fixed)
175 if i == n:
176 add(".*")
177 add(fixed)
178 else:
179 groupnum = _nextgroupnum()
180 add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
181 assert i == n
182 res = "".join(res)
183 return fr'(?s:{res})\Z'