blob: ecb4e5a8f7072c52effe4606b7406190a2dd7787 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path. The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13# Strings representing various path-related bits and pieces.
14# These are primarily for export; internally, they are hardcoded.
15# Should be set before imports for resolving cyclic dependency.
16curdir = '.'
17pardir = '..'
18extsep = '.'
19sep = '/'
20pathsep = ':'
21defpath = '/bin:/usr/bin'
22altsep = None
23devnull = '/dev/null'
24
25import os
26import sys
27import stat
28import genericpath
29from genericpath import *
30
31__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
32 "basename","dirname","commonprefix","getsize","getmtime",
33 "getatime","getctime","islink","exists","lexists","isdir","isfile",
34 "ismount", "expanduser","expandvars","normpath","abspath",
35 "samefile","sameopenfile","samestat",
36 "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
37 "devnull","realpath","supports_unicode_filenames","relpath",
38 "commonpath"]
39
40
41def _get_sep(path):
42 if isinstance(path, bytes):
43 return b'/'
44 else:
45 return '/'
46
47# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
48# On MS-DOS this may also turn slashes into backslashes; however, other
49# normalizations (such as optimizing '../' away) are not allowed
50# (another function should be defined to do that).
51
52def normcase(s):
53 """Normalize case of pathname. Has no effect under Posix"""
54 return os.fspath(s)
55
56
57# Return whether a path is absolute.
58# Trivial in Posix, harder on the Mac or MS-DOS.
59
60def isabs(s):
61 """Test whether a path is absolute"""
62 s = os.fspath(s)
63 sep = _get_sep(s)
64 return s.startswith(sep)
65
66
67# Join pathnames.
68# Ignore the previous parts if a part is absolute.
69# Insert a '/' unless the first part is empty or already ends in '/'.
70
71def join(a, *p):
72 """Join two or more pathname components, inserting '/' as needed.
73 If any component is an absolute path, all previous path components
74 will be discarded. An empty last part will result in a path that
75 ends with a separator."""
76 a = os.fspath(a)
77 sep = _get_sep(a)
78 path = a
79 try:
80 if not p:
81 path[:0] + sep #23780: Ensure compatible data type even if p is null.
82 for b in map(os.fspath, p):
83 if b.startswith(sep):
84 path = b
85 elif not path or path.endswith(sep):
86 path += b
87 else:
88 path += sep + b
89 except (TypeError, AttributeError, BytesWarning):
90 genericpath._check_arg_types('join', a, *p)
91 raise
92 return path
93
94
95# Split a path in head (everything up to the last '/') and tail (the
96# rest). If the path ends in '/', tail will be empty. If there is no
97# '/' in the path, head will be empty.
98# Trailing '/'es are stripped from head unless it is the root.
99
100def split(p):
101 """Split a pathname. Returns tuple "(head, tail)" where "tail" is
102 everything after the final slash. Either part may be empty."""
103 p = os.fspath(p)
104 sep = _get_sep(p)
105 i = p.rfind(sep) + 1
106 head, tail = p[:i], p[i:]
107 if head and head != sep*len(head):
108 head = head.rstrip(sep)
109 return head, tail
110
111
112# Split a path in root and extension.
113# The extension is everything starting at the last dot in the last
114# pathname component; the root is everything before that.
115# It is always true that root + ext == p.
116
117def splitext(p):
118 p = os.fspath(p)
119 if isinstance(p, bytes):
120 sep = b'/'
121 extsep = b'.'
122 else:
123 sep = '/'
124 extsep = '.'
125 return genericpath._splitext(p, sep, None, extsep)
126splitext.__doc__ = genericpath._splitext.__doc__
127
128# Split a pathname into a drive specification and the rest of the
129# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
130
131def splitdrive(p):
132 """Split a pathname into drive and path. On Posix, drive is always
133 empty."""
134 p = os.fspath(p)
135 return p[:0], p
136
137
138# Return the tail (basename) part of a path, same as split(path)[1].
139
140def basename(p):
141 """Returns the final component of a pathname"""
142 p = os.fspath(p)
143 sep = _get_sep(p)
144 i = p.rfind(sep) + 1
145 return p[i:]
146
147
148# Return the head (dirname) part of a path, same as split(path)[0].
149
150def dirname(p):
151 """Returns the directory component of a pathname"""
152 p = os.fspath(p)
153 sep = _get_sep(p)
154 i = p.rfind(sep) + 1
155 head = p[:i]
156 if head and head != sep*len(head):
157 head = head.rstrip(sep)
158 return head
159
160
161# Is a path a symbolic link?
162# This will always return false on systems where os.lstat doesn't exist.
163
164def islink(path):
165 """Test whether a path is a symbolic link"""
166 try:
167 st = os.lstat(path)
168 except (OSError, ValueError, AttributeError):
169 return False
170 return stat.S_ISLNK(st.st_mode)
171
172# Being true for dangling symbolic links is also useful.
173
174def lexists(path):
175 """Test whether a path exists. Returns True for broken symbolic links"""
176 try:
177 os.lstat(path)
178 except (OSError, ValueError):
179 return False
180 return True
181
182
183# Is a path a mount point?
184# (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
185
186def ismount(path):
187 """Test whether a path is a mount point"""
188 try:
189 s1 = os.lstat(path)
190 except (OSError, ValueError):
191 # It doesn't exist -- so not a mount point. :-)
192 return False
193 else:
194 # A symlink can never be a mount point
195 if stat.S_ISLNK(s1.st_mode):
196 return False
197
198 if isinstance(path, bytes):
199 parent = join(path, b'..')
200 else:
201 parent = join(path, '..')
202 parent = realpath(parent)
203 try:
204 s2 = os.lstat(parent)
205 except (OSError, ValueError):
206 return False
207
208 dev1 = s1.st_dev
209 dev2 = s2.st_dev
210 if dev1 != dev2:
211 return True # path/.. on a different device as path
212 ino1 = s1.st_ino
213 ino2 = s2.st_ino
214 if ino1 == ino2:
215 return True # path/.. is the same i-node as path
216 return False
217
218
219# Expand paths beginning with '~' or '~user'.
220# '~' means $HOME; '~user' means that user's home directory.
221# If the path doesn't begin with '~', or if the user or $HOME is unknown,
222# the path is returned unchanged (leaving error reporting to whatever
223# function is called with the expanded path as argument).
224# See also module 'glob' for expansion of *, ? and [...] in pathnames.
225# (A function should also be defined to do full *sh-style environment
226# variable expansion.)
227
228def expanduser(path):
229 """Expand ~ and ~user constructions. If user or $HOME is unknown,
230 do nothing."""
231 path = os.fspath(path)
232 if isinstance(path, bytes):
233 tilde = b'~'
234 else:
235 tilde = '~'
236 if not path.startswith(tilde):
237 return path
238 sep = _get_sep(path)
239 i = path.find(sep, 1)
240 if i < 0:
241 i = len(path)
242 if i == 1:
243 if 'HOME' not in os.environ:
244 import pwd
245 try:
246 userhome = pwd.getpwuid(os.getuid()).pw_dir
247 except KeyError:
248 # bpo-10496: if the current user identifier doesn't exist in the
249 # password database, return the path unchanged
250 return path
251 else:
252 userhome = os.environ['HOME']
253 else:
254 import pwd
255 name = path[1:i]
256 if isinstance(name, bytes):
257 name = str(name, 'ASCII')
258 try:
259 pwent = pwd.getpwnam(name)
260 except KeyError:
261 # bpo-10496: if the user name from the path doesn't exist in the
262 # password database, return the path unchanged
263 return path
264 userhome = pwent.pw_dir
265 if isinstance(path, bytes):
266 userhome = os.fsencode(userhome)
267 root = b'/'
268 else:
269 root = '/'
270 userhome = userhome.rstrip(root)
271 return (userhome + path[i:]) or root
272
273
274# Expand paths containing shell variable substitutions.
275# This expands the forms $variable and ${variable} only.
276# Non-existent variables are left unchanged.
277
278_varprog = None
279_varprogb = None
280
281def expandvars(path):
282 """Expand shell variables of form $var and ${var}. Unknown variables
283 are left unchanged."""
284 path = os.fspath(path)
285 global _varprog, _varprogb
286 if isinstance(path, bytes):
287 if b'$' not in path:
288 return path
289 if not _varprogb:
290 import re
291 _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
292 search = _varprogb.search
293 start = b'{'
294 end = b'}'
295 environ = getattr(os, 'environb', None)
296 else:
297 if '$' not in path:
298 return path
299 if not _varprog:
300 import re
301 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
302 search = _varprog.search
303 start = '{'
304 end = '}'
305 environ = os.environ
306 i = 0
307 while True:
308 m = search(path, i)
309 if not m:
310 break
311 i, j = m.span(0)
312 name = m.group(1)
313 if name.startswith(start) and name.endswith(end):
314 name = name[1:-1]
315 try:
316 if environ is None:
317 value = os.fsencode(os.environ[os.fsdecode(name)])
318 else:
319 value = environ[name]
320 except KeyError:
321 i = j
322 else:
323 tail = path[j:]
324 path = path[:i] + value
325 i = len(path)
326 path += tail
327 return path
328
329
330# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
331# It should be understood that this may change the meaning of the path
332# if it contains symbolic links!
333
334def normpath(path):
335 """Normalize path, eliminating double slashes, etc."""
336 path = os.fspath(path)
337 if isinstance(path, bytes):
338 sep = b'/'
339 empty = b''
340 dot = b'.'
341 dotdot = b'..'
342 else:
343 sep = '/'
344 empty = ''
345 dot = '.'
346 dotdot = '..'
347 if path == empty:
348 return dot
349 initial_slashes = path.startswith(sep)
350 # POSIX allows one or two initial slashes, but treats three or more
351 # as single slash.
352 if (initial_slashes and
353 path.startswith(sep*2) and not path.startswith(sep*3)):
354 initial_slashes = 2
355 comps = path.split(sep)
356 new_comps = []
357 for comp in comps:
358 if comp in (empty, dot):
359 continue
360 if (comp != dotdot or (not initial_slashes and not new_comps) or
361 (new_comps and new_comps[-1] == dotdot)):
362 new_comps.append(comp)
363 elif new_comps:
364 new_comps.pop()
365 comps = new_comps
366 path = sep.join(comps)
367 if initial_slashes:
368 path = sep*initial_slashes + path
369 return path or dot
370
371
372def abspath(path):
373 """Return an absolute path."""
374 path = os.fspath(path)
375 if not isabs(path):
376 if isinstance(path, bytes):
377 cwd = os.getcwdb()
378 else:
379 cwd = os.getcwd()
380 path = join(cwd, path)
381 return normpath(path)
382
383
384# Return a canonical path (i.e. the absolute location of a file on the
385# filesystem).
386
387def realpath(filename):
388 """Return the canonical path of the specified filename, eliminating any
389symbolic links encountered in the path."""
390 filename = os.fspath(filename)
391 path, ok = _joinrealpath(filename[:0], filename, {})
392 return abspath(path)
393
394# Join two paths, normalizing and eliminating any symbolic links
395# encountered in the second path.
396def _joinrealpath(path, rest, seen):
397 if isinstance(path, bytes):
398 sep = b'/'
399 curdir = b'.'
400 pardir = b'..'
401 else:
402 sep = '/'
403 curdir = '.'
404 pardir = '..'
405
406 if isabs(rest):
407 rest = rest[1:]
408 path = sep
409
410 while rest:
411 name, _, rest = rest.partition(sep)
412 if not name or name == curdir:
413 # current dir
414 continue
415 if name == pardir:
416 # parent dir
417 if path:
418 path, name = split(path)
419 if name == pardir:
420 path = join(path, pardir, pardir)
421 else:
422 path = pardir
423 continue
424 newpath = join(path, name)
425 if not islink(newpath):
426 path = newpath
427 continue
428 # Resolve the symbolic link
429 if newpath in seen:
430 # Already seen this path
431 path = seen[newpath]
432 if path is not None:
433 # use cached value
434 continue
435 # The symlink is not resolved, so we must have a symlink loop.
436 # Return already resolved part + rest of the path unchanged.
437 return join(newpath, rest), False
438 seen[newpath] = None # not resolved symlink
439 path, ok = _joinrealpath(path, os.readlink(newpath), seen)
440 if not ok:
441 return join(path, rest), False
442 seen[newpath] = path # resolved symlink
443
444 return path, True
445
446
447supports_unicode_filenames = (sys.platform == 'darwin')
448
449def relpath(path, start=None):
450 """Return a relative version of a path"""
451
452 if not path:
453 raise ValueError("no path specified")
454
455 path = os.fspath(path)
456 if isinstance(path, bytes):
457 curdir = b'.'
458 sep = b'/'
459 pardir = b'..'
460 else:
461 curdir = '.'
462 sep = '/'
463 pardir = '..'
464
465 if start is None:
466 start = curdir
467 else:
468 start = os.fspath(start)
469
470 try:
471 start_list = [x for x in abspath(start).split(sep) if x]
472 path_list = [x for x in abspath(path).split(sep) if x]
473 # Work out how much of the filepath is shared by start and path.
474 i = len(commonprefix([start_list, path_list]))
475
476 rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
477 if not rel_list:
478 return curdir
479 return join(*rel_list)
480 except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
481 genericpath._check_arg_types('relpath', path, start)
482 raise
483
484
485# Return the longest common sub-path of the sequence of paths given as input.
486# The paths are not normalized before comparing them (this is the
487# responsibility of the caller). Any trailing separator is stripped from the
488# returned path.
489
490def commonpath(paths):
491 """Given a sequence of path names, returns the longest common sub-path."""
492
493 if not paths:
494 raise ValueError('commonpath() arg is an empty sequence')
495
496 paths = tuple(map(os.fspath, paths))
497 if isinstance(paths[0], bytes):
498 sep = b'/'
499 curdir = b'.'
500 else:
501 sep = '/'
502 curdir = '.'
503
504 try:
505 split_paths = [path.split(sep) for path in paths]
506
507 try:
508 isabs, = set(p[:1] == sep for p in paths)
509 except ValueError:
510 raise ValueError("Can't mix absolute and relative paths") from None
511
512 split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
513 s1 = min(split_paths)
514 s2 = max(split_paths)
515 common = s1
516 for i, c in enumerate(s1):
517 if c != s2[i]:
518 common = s1[:i]
519 break
520
521 prefix = sep if isabs else sep[:0]
522 return prefix + sep.join(common)
523 except (TypeError, AttributeError):
524 genericpath._check_arg_types('commonpath', *paths)
525 raise