Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame^] | 1 | """Filename globbing utility.""" |
| 2 | |
| 3 | import os |
| 4 | import re |
| 5 | import fnmatch |
| 6 | import sys |
| 7 | |
| 8 | __all__ = ["glob", "iglob", "escape"] |
| 9 | |
| 10 | def glob(pathname, *, recursive=False): |
| 11 | """Return a list of paths matching a pathname pattern. |
| 12 | |
| 13 | The pattern may contain simple shell-style wildcards a la |
| 14 | fnmatch. However, unlike fnmatch, filenames starting with a |
| 15 | dot are special cases that are not matched by '*' and '?' |
| 16 | patterns. |
| 17 | |
| 18 | If recursive is true, the pattern '**' will match any files and |
| 19 | zero or more directories and subdirectories. |
| 20 | """ |
| 21 | return list(iglob(pathname, recursive=recursive)) |
| 22 | |
| 23 | def iglob(pathname, *, recursive=False): |
| 24 | """Return an iterator which yields the paths matching a pathname pattern. |
| 25 | |
| 26 | The pattern may contain simple shell-style wildcards a la |
| 27 | fnmatch. However, unlike fnmatch, filenames starting with a |
| 28 | dot are special cases that are not matched by '*' and '?' |
| 29 | patterns. |
| 30 | |
| 31 | If recursive is true, the pattern '**' will match any files and |
| 32 | zero or more directories and subdirectories. |
| 33 | """ |
| 34 | sys.audit("glob.glob", pathname, recursive) |
| 35 | it = _iglob(pathname, recursive, False) |
| 36 | if recursive and _isrecursive(pathname): |
| 37 | s = next(it) # skip empty string |
| 38 | assert not s |
| 39 | return it |
| 40 | |
| 41 | def _iglob(pathname, recursive, dironly): |
| 42 | dirname, basename = os.path.split(pathname) |
| 43 | if not has_magic(pathname): |
| 44 | assert not dironly |
| 45 | if basename: |
| 46 | if os.path.lexists(pathname): |
| 47 | yield pathname |
| 48 | else: |
| 49 | # Patterns ending with a slash should match only directories |
| 50 | if os.path.isdir(dirname): |
| 51 | yield pathname |
| 52 | return |
| 53 | if not dirname: |
| 54 | if recursive and _isrecursive(basename): |
| 55 | yield from _glob2(dirname, basename, dironly) |
| 56 | else: |
| 57 | yield from _glob1(dirname, basename, dironly) |
| 58 | return |
| 59 | # `os.path.split()` returns the argument itself as a dirname if it is a |
| 60 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path |
| 61 | # contains magic characters (i.e. r'\\?\C:'). |
| 62 | if dirname != pathname and has_magic(dirname): |
| 63 | dirs = _iglob(dirname, recursive, True) |
| 64 | else: |
| 65 | dirs = [dirname] |
| 66 | if has_magic(basename): |
| 67 | if recursive and _isrecursive(basename): |
| 68 | glob_in_dir = _glob2 |
| 69 | else: |
| 70 | glob_in_dir = _glob1 |
| 71 | else: |
| 72 | glob_in_dir = _glob0 |
| 73 | for dirname in dirs: |
| 74 | for name in glob_in_dir(dirname, basename, dironly): |
| 75 | yield os.path.join(dirname, name) |
| 76 | |
| 77 | # These 2 helper functions non-recursively glob inside a literal directory. |
| 78 | # They return a list of basenames. _glob1 accepts a pattern while _glob0 |
| 79 | # takes a literal basename (so it only has to check for its existence). |
| 80 | |
| 81 | def _glob1(dirname, pattern, dironly): |
| 82 | names = list(_iterdir(dirname, dironly)) |
| 83 | if not _ishidden(pattern): |
| 84 | names = (x for x in names if not _ishidden(x)) |
| 85 | return fnmatch.filter(names, pattern) |
| 86 | |
| 87 | def _glob0(dirname, basename, dironly): |
| 88 | if not basename: |
| 89 | # `os.path.split()` returns an empty basename for paths ending with a |
| 90 | # directory separator. 'q*x/' should match only directories. |
| 91 | if os.path.isdir(dirname): |
| 92 | return [basename] |
| 93 | else: |
| 94 | if os.path.lexists(os.path.join(dirname, basename)): |
| 95 | return [basename] |
| 96 | return [] |
| 97 | |
| 98 | # Following functions are not public but can be used by third-party code. |
| 99 | |
| 100 | def glob0(dirname, pattern): |
| 101 | return _glob0(dirname, pattern, False) |
| 102 | |
| 103 | def glob1(dirname, pattern): |
| 104 | return _glob1(dirname, pattern, False) |
| 105 | |
| 106 | # This helper function recursively yields relative pathnames inside a literal |
| 107 | # directory. |
| 108 | |
| 109 | def _glob2(dirname, pattern, dironly): |
| 110 | assert _isrecursive(pattern) |
| 111 | yield pattern[:0] |
| 112 | yield from _rlistdir(dirname, dironly) |
| 113 | |
| 114 | # If dironly is false, yields all file names inside a directory. |
| 115 | # If dironly is true, yields only directory names. |
| 116 | def _iterdir(dirname, dironly): |
| 117 | if not dirname: |
| 118 | if isinstance(dirname, bytes): |
| 119 | dirname = bytes(os.curdir, 'ASCII') |
| 120 | else: |
| 121 | dirname = os.curdir |
| 122 | try: |
| 123 | with os.scandir(dirname) as it: |
| 124 | for entry in it: |
| 125 | try: |
| 126 | if not dironly or entry.is_dir(): |
| 127 | yield entry.name |
| 128 | except OSError: |
| 129 | pass |
| 130 | except OSError: |
| 131 | return |
| 132 | |
| 133 | # Recursively yields relative pathnames inside a literal directory. |
| 134 | def _rlistdir(dirname, dironly): |
| 135 | names = list(_iterdir(dirname, dironly)) |
| 136 | for x in names: |
| 137 | if not _ishidden(x): |
| 138 | yield x |
| 139 | path = os.path.join(dirname, x) if dirname else x |
| 140 | for y in _rlistdir(path, dironly): |
| 141 | yield os.path.join(x, y) |
| 142 | |
| 143 | |
| 144 | magic_check = re.compile('([*?[])') |
| 145 | magic_check_bytes = re.compile(b'([*?[])') |
| 146 | |
| 147 | def has_magic(s): |
| 148 | if isinstance(s, bytes): |
| 149 | match = magic_check_bytes.search(s) |
| 150 | else: |
| 151 | match = magic_check.search(s) |
| 152 | return match is not None |
| 153 | |
| 154 | def _ishidden(path): |
| 155 | return path[0] in ('.', b'.'[0]) |
| 156 | |
| 157 | def _isrecursive(pattern): |
| 158 | if isinstance(pattern, bytes): |
| 159 | return pattern == b'**' |
| 160 | else: |
| 161 | return pattern == '**' |
| 162 | |
| 163 | def escape(pathname): |
| 164 | """Escape all special characters. |
| 165 | """ |
| 166 | # Escaping is done by wrapping any of "*?[" between square brackets. |
| 167 | # Metacharacters do not work in the drive part and shouldn't be escaped. |
| 168 | drive, pathname = os.path.splitdrive(pathname) |
| 169 | if isinstance(pathname, bytes): |
| 170 | pathname = magic_check_bytes.sub(br'[\1]', pathname) |
| 171 | else: |
| 172 | pathname = magic_check.sub(r'[\1]', pathname) |
| 173 | return drive + pathname |