blob: ccd2f50fa4ad2ea7f9f31a531a453569e2ca9289 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
Andrew Walbran16937d02019-10-22 13:54:20 +01005# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01008#
9#===------------------------------------------------------------------------===#
10
Andrew Walbran3d2c1972020-04-07 12:24:26 +010011r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010024
Andrew Walbran16937d02019-10-22 13:54:20 +010025from __future__ import absolute_import, division, print_function
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010026import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD. Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46 clangFormat.binary
47 clangFormat.commit
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020048 clangFormat.extensions
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010049 clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61 config = load_git_config()
62
63 # In order to keep '--' yet allow options after positionals, we need to
64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
65 # nargs=argparse.REMAINDER disallows options after positionals.)
66 argv = sys.argv[1:]
67 try:
68 idx = argv.index('--')
69 except ValueError:
70 dash_dash = []
71 else:
72 dash_dash = argv[idx:]
73 argv = argv[:idx]
74
75 default_extensions = ','.join([
76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77 'c', 'h', # C
78 'm', # ObjC
79 'mm', # ObjC++
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020080 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++
81 'cu', 'cuh', # CUDA
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010082 # Other languages that clang-format supports
83 'proto', 'protodevel', # Protocol Buffers
84 'java', # Java
85 'js', # JavaScript
86 'ts', # TypeScript
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020087 'cs', # C Sharp
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010088 ])
89
90 p = argparse.ArgumentParser(
91 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
92 description=desc)
93 p.add_argument('--binary',
94 default=config.get('clangformat.binary', 'clang-format'),
95 help='path to clang-format'),
96 p.add_argument('--commit',
97 default=config.get('clangformat.commit', 'HEAD'),
98 help='default commit to use if none is specified'),
99 p.add_argument('--diff', action='store_true',
100 help='print a diff instead of applying the changes')
101 p.add_argument('--extensions',
102 default=config.get('clangformat.extensions',
103 default_extensions),
104 help=('comma-separated list of file extensions to format, '
105 'excluding the period and case-insensitive')),
106 p.add_argument('-f', '--force', action='store_true',
107 help='allow changes to unstaged files')
108 p.add_argument('-p', '--patch', action='store_true',
109 help='select hunks interactively')
110 p.add_argument('-q', '--quiet', action='count', default=0,
111 help='print less information')
112 p.add_argument('--style',
113 default=config.get('clangformat.style', None),
114 help='passed to clang-format'),
115 p.add_argument('-v', '--verbose', action='count', default=0,
116 help='print extra information')
117 # We gather all the remaining positional arguments into 'args' since we need
118 # to use some heuristics to determine whether or not <commit> was present.
119 # However, to print pretty messages, we make use of metavar and help.
120 p.add_argument('args', nargs='*', metavar='<commit>',
121 help='revision from which to compute the diff')
122 p.add_argument('ignored', nargs='*', metavar='<file>...',
123 help='if specified, only consider differences in these files')
124 opts = p.parse_args(argv)
125
126 opts.verbose -= opts.quiet
127 del opts.quiet
128
129 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
130 if len(commits) > 1:
131 if not opts.diff:
132 die('--diff is required when two commits are given')
133 else:
134 if len(commits) > 2:
135 die('at most two commits allowed; %d given' % len(commits))
136 changed_lines = compute_diff_and_extract_lines(commits, files)
137 if opts.verbose >= 1:
138 ignored_files = set(changed_lines)
139 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
140 if opts.verbose >= 1:
141 ignored_files.difference_update(changed_lines)
142 if ignored_files:
143 print('Ignoring changes in the following files (wrong extension):')
144 for filename in ignored_files:
145 print(' %s' % filename)
146 if changed_lines:
147 print('Running clang-format on the following files:')
148 for filename in changed_lines:
149 print(' %s' % filename)
150 if not changed_lines:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200151 if opts.verbose >= 0:
152 print('no modified files to format')
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100153 return
154 # The computed diff outputs absolute paths, so we must cd before accessing
155 # those files.
156 cd_to_toplevel()
157 if len(commits) > 1:
158 old_tree = commits[1]
159 new_tree = run_clang_format_and_save_to_tree(changed_lines,
160 revision=commits[1],
161 binary=opts.binary,
162 style=opts.style)
163 else:
164 old_tree = create_tree_from_workdir(changed_lines)
165 new_tree = run_clang_format_and_save_to_tree(changed_lines,
166 binary=opts.binary,
167 style=opts.style)
168 if opts.verbose >= 1:
169 print('old tree: %s' % old_tree)
170 print('new tree: %s' % new_tree)
171 if old_tree == new_tree:
172 if opts.verbose >= 0:
173 print('clang-format did not modify any files')
174 elif opts.diff:
175 print_diff(old_tree, new_tree)
176 else:
177 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
178 patch_mode=opts.patch)
179 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
180 print('changed files:')
181 for filename in changed_files:
182 print(' %s' % filename)
183
184
185def load_git_config(non_string_options=None):
186 """Return the git configuration as a dictionary.
187
188 All options are assumed to be strings unless in `non_string_options`, in which
189 is a dictionary mapping option name (in lower case) to either "--bool" or
190 "--int"."""
191 if non_string_options is None:
192 non_string_options = {}
193 out = {}
194 for entry in run('git', 'config', '--list', '--null').split('\0'):
195 if entry:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200196 if '\n' in entry:
197 name, value = entry.split('\n', 1)
198 else:
199 # A setting with no '=' ('\n' with --null) is implicitly 'true'
200 name = entry
201 value = 'true'
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100202 if name in non_string_options:
203 value = run('git', 'config', non_string_options[name], name)
204 out[name] = value
205 return out
206
207
208def interpret_args(args, dash_dash, default_commit):
209 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
210
211 It is assumed that "--" and everything that follows has been removed from
212 args and placed in `dash_dash`.
213
214 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
215 left (if present) are taken as commits. Otherwise, the arguments are checked
216 from left to right if they are commits or files. If commits are not given,
217 a list with `default_commit` is used."""
218 if dash_dash:
219 if len(args) == 0:
220 commits = [default_commit]
221 else:
222 commits = args
223 for commit in commits:
224 object_type = get_object_type(commit)
225 if object_type not in ('commit', 'tag'):
226 if object_type is None:
227 die("'%s' is not a commit" % commit)
228 else:
229 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
230 files = dash_dash[1:]
231 elif args:
232 commits = []
233 while args:
234 if not disambiguate_revision(args[0]):
235 break
236 commits.append(args.pop(0))
237 if not commits:
238 commits = [default_commit]
239 files = args
240 else:
241 commits = [default_commit]
242 files = []
243 return commits, files
244
245
246def disambiguate_revision(value):
247 """Returns True if `value` is a revision, False if it is a file, or dies."""
248 # If `value` is ambiguous (neither a commit nor a file), the following
249 # command will die with an appropriate error message.
250 run('git', 'rev-parse', value, verbose=False)
251 object_type = get_object_type(value)
252 if object_type is None:
253 return False
254 if object_type in ('commit', 'tag'):
255 return True
256 die('`%s` is a %s, but a commit or filename was expected' %
257 (value, object_type))
258
259
260def get_object_type(value):
261 """Returns a string description of an object's type, or None if it is not
262 a valid git object."""
263 cmd = ['git', 'cat-file', '-t', value]
264 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
265 stdout, stderr = p.communicate()
266 if p.returncode != 0:
267 return None
268 return convert_string(stdout.strip())
269
270
271def compute_diff_and_extract_lines(commits, files):
272 """Calls compute_diff() followed by extract_lines()."""
273 diff_process = compute_diff(commits, files)
274 changed_lines = extract_lines(diff_process.stdout)
275 diff_process.stdout.close()
276 diff_process.wait()
277 if diff_process.returncode != 0:
278 # Assume error was already printed to stderr.
279 sys.exit(2)
280 return changed_lines
281
282
283def compute_diff(commits, files):
284 """Return a subprocess object producing the diff from `commits`.
285
286 The return value's `stdin` file object will produce a patch with the
287 differences between the working directory and the first commit if a single
288 one was specified, or the difference between both specified commits, filtered
289 on `files` (if non-empty). Zero context lines are used in the patch."""
290 git_tool = 'diff-index'
291 if len(commits) > 1:
292 git_tool = 'diff-tree'
293 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
294 cmd.extend(files)
295 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
296 p.stdin.close()
297 return p
298
299
300def extract_lines(patch_file):
301 """Extract the changed lines in `patch_file`.
302
303 The return value is a dictionary mapping filename to a list of (start_line,
304 line_count) pairs.
305
306 The input must have been produced with ``-U0``, meaning unidiff format with
307 zero lines of context. The return value is a dict mapping filename to a
308 list of line `Range`s."""
309 matches = {}
310 for line in patch_file:
311 line = convert_string(line)
312 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
313 if match:
314 filename = match.group(1).rstrip('\r\n')
315 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
316 if match:
317 start_line = int(match.group(1))
318 line_count = 1
319 if match.group(3):
320 line_count = int(match.group(3))
321 if line_count > 0:
322 matches.setdefault(filename, []).append(Range(start_line, line_count))
323 return matches
324
325
326def filter_by_extension(dictionary, allowed_extensions):
327 """Delete every key in `dictionary` that doesn't have an allowed extension.
328
329 `allowed_extensions` must be a collection of lowercase file extensions,
330 excluding the period."""
331 allowed_extensions = frozenset(allowed_extensions)
332 for filename in list(dictionary.keys()):
333 base_ext = filename.rsplit('.', 1)
334 if len(base_ext) == 1 and '' in allowed_extensions:
335 continue
336 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
337 del dictionary[filename]
338
339
340def cd_to_toplevel():
341 """Change to the top level of the git repository."""
342 toplevel = run('git', 'rev-parse', '--show-toplevel')
343 os.chdir(toplevel)
344
345
346def create_tree_from_workdir(filenames):
347 """Create a new git tree with the given files from the working directory.
348
349 Returns the object ID (SHA-1) of the created tree."""
350 return create_tree(filenames, '--stdin')
351
352
353def run_clang_format_and_save_to_tree(changed_lines, revision=None,
354 binary='clang-format', style=None):
355 """Run clang-format on each file and save the result to a git tree.
356
357 Returns the object ID (SHA-1) of the created tree."""
358 def iteritems(container):
359 try:
360 return container.iteritems() # Python 2
361 except AttributeError:
362 return container.items() # Python 3
363 def index_info_generator():
364 for filename, line_ranges in iteritems(changed_lines):
365 if revision:
366 git_metadata_cmd = ['git', 'ls-tree',
367 '%s:%s' % (revision, os.path.dirname(filename)),
368 os.path.basename(filename)]
369 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
370 stdout=subprocess.PIPE)
371 stdout = git_metadata.communicate()[0]
372 mode = oct(int(stdout.split()[0], 8))
373 else:
374 mode = oct(os.stat(filename).st_mode)
375 # Adjust python3 octal format so that it matches what git expects
376 if mode.startswith('0o'):
377 mode = '0' + mode[2:]
378 blob_id = clang_format_to_blob(filename, line_ranges,
379 revision=revision,
380 binary=binary,
381 style=style)
382 yield '%s %s\t%s' % (mode, blob_id, filename)
383 return create_tree(index_info_generator(), '--index-info')
384
385
386def create_tree(input_lines, mode):
387 """Create a tree object from the given input.
388
389 If mode is '--stdin', it must be a list of filenames. If mode is
390 '--index-info' is must be a list of values suitable for "git update-index
391 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
392 is invalid."""
393 assert mode in ('--stdin', '--index-info')
394 cmd = ['git', 'update-index', '--add', '-z', mode]
395 with temporary_index_file():
396 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
397 for line in input_lines:
398 p.stdin.write(to_bytes('%s\0' % line))
399 p.stdin.close()
400 if p.wait() != 0:
401 die('`%s` failed' % ' '.join(cmd))
402 tree_id = run('git', 'write-tree')
403 return tree_id
404
405
406def clang_format_to_blob(filename, line_ranges, revision=None,
407 binary='clang-format', style=None):
408 """Run clang-format on the given file and save the result to a git blob.
409
410 Runs on the file in `revision` if not None, or on the file in the working
411 directory if `revision` is None.
412
413 Returns the object ID (SHA-1) of the created blob."""
414 clang_format_cmd = [binary]
415 if style:
416 clang_format_cmd.extend(['-style='+style])
417 clang_format_cmd.extend([
418 '-lines=%s:%s' % (start_line, start_line+line_count-1)
419 for start_line, line_count in line_ranges])
420 if revision:
421 clang_format_cmd.extend(['-assume-filename='+filename])
422 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
423 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
424 stdout=subprocess.PIPE)
425 git_show.stdin.close()
426 clang_format_stdin = git_show.stdout
427 else:
428 clang_format_cmd.extend([filename])
429 git_show = None
430 clang_format_stdin = subprocess.PIPE
431 try:
432 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
433 stdout=subprocess.PIPE)
434 if clang_format_stdin == subprocess.PIPE:
435 clang_format_stdin = clang_format.stdin
436 except OSError as e:
437 if e.errno == errno.ENOENT:
438 die('cannot find executable "%s"' % binary)
439 else:
440 raise
441 clang_format_stdin.close()
442 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
443 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
444 stdout=subprocess.PIPE)
445 clang_format.stdout.close()
446 stdout = hash_object.communicate()[0]
447 if hash_object.returncode != 0:
448 die('`%s` failed' % ' '.join(hash_object_cmd))
449 if clang_format.wait() != 0:
450 die('`%s` failed' % ' '.join(clang_format_cmd))
451 if git_show and git_show.wait() != 0:
452 die('`%s` failed' % ' '.join(git_show_cmd))
453 return convert_string(stdout).rstrip('\r\n')
454
455
456@contextlib.contextmanager
457def temporary_index_file(tree=None):
458 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
459 the file afterward."""
460 index_path = create_temporary_index(tree)
461 old_index_path = os.environ.get('GIT_INDEX_FILE')
462 os.environ['GIT_INDEX_FILE'] = index_path
463 try:
464 yield
465 finally:
466 if old_index_path is None:
467 del os.environ['GIT_INDEX_FILE']
468 else:
469 os.environ['GIT_INDEX_FILE'] = old_index_path
470 os.remove(index_path)
471
472
473def create_temporary_index(tree=None):
474 """Create a temporary index file and return the created file's path.
475
476 If `tree` is not None, use that as the tree to read in. Otherwise, an
477 empty index is created."""
478 gitdir = run('git', 'rev-parse', '--git-dir')
479 path = os.path.join(gitdir, temp_index_basename)
480 if tree is None:
481 tree = '--empty'
482 run('git', 'read-tree', '--index-output='+path, tree)
483 return path
484
485
486def print_diff(old_tree, new_tree):
487 """Print the diff between the two trees to stdout."""
488 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
489 # is expected to be viewed by the user, and only the former does nice things
490 # like color and pagination.
491 #
492 # We also only print modified files since `new_tree` only contains the files
493 # that were modified, so unmodified files would show as deleted without the
494 # filter.
495 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
496 '--'])
497
498
499def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
500 """Apply the changes in `new_tree` to the working directory.
501
502 Bails if there are local changes in those files and not `force`. If
503 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
504 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
505 '--name-only', old_tree,
506 new_tree).rstrip('\0').split('\0')
507 if not force:
508 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
509 if unstaged_files:
510 print('The following files would be modified but '
511 'have unstaged changes:', file=sys.stderr)
512 print(unstaged_files, file=sys.stderr)
513 print('Please commit, stage, or stash them first.', file=sys.stderr)
514 sys.exit(2)
515 if patch_mode:
516 # In patch mode, we could just as well create an index from the new tree
517 # and checkout from that, but then the user will be presented with a
518 # message saying "Discard ... from worktree". Instead, we use the old
519 # tree as the index and checkout from new_tree, which gives the slightly
520 # better message, "Apply ... to index and worktree". This is not quite
521 # right, since it won't be applied to the user's index, but oh well.
522 with temporary_index_file(old_tree):
523 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
524 index_tree = old_tree
525 else:
526 with temporary_index_file(new_tree):
527 run('git', 'checkout-index', '-a', '-f')
528 return changed_files
529
530
531def run(*args, **kwargs):
532 stdin = kwargs.pop('stdin', '')
533 verbose = kwargs.pop('verbose', True)
534 strip = kwargs.pop('strip', True)
535 for name in kwargs:
536 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
537 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
538 stdin=subprocess.PIPE)
539 stdout, stderr = p.communicate(input=stdin)
540
541 stdout = convert_string(stdout)
542 stderr = convert_string(stderr)
543
544 if p.returncode == 0:
545 if stderr:
546 if verbose:
547 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
548 print(stderr.rstrip(), file=sys.stderr)
549 if strip:
550 stdout = stdout.rstrip('\r\n')
551 return stdout
552 if verbose:
553 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
554 if stderr:
555 print(stderr.rstrip(), file=sys.stderr)
556 sys.exit(2)
557
558
559def die(message):
560 print('error:', message, file=sys.stderr)
561 sys.exit(2)
562
563
564def to_bytes(str_input):
565 # Encode to UTF-8 to get binary data.
566 if isinstance(str_input, bytes):
567 return str_input
568 return str_input.encode('utf-8')
569
570
571def to_string(bytes_input):
572 if isinstance(bytes_input, str):
573 return bytes_input
574 return bytes_input.encode('utf-8')
575
576
577def convert_string(bytes_input):
578 try:
579 return to_string(bytes_input.decode('utf-8'))
580 except AttributeError: # 'str' object has no attribute 'decode'.
581 return str(bytes_input)
582 except UnicodeError:
583 return str(bytes_input)
584
585if __name__ == '__main__':
586 main()