blob: 2696d099e4c7f71900cce571d5b04616b3767dd9 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
Andrew Walbran16937d02019-10-22 13:54:20 +01005# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01008#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
Andrew Walbran16937d02019-10-22 13:54:20 +010025from __future__ import absolute_import, division, print_function
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010026import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD. Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46 clangFormat.binary
47 clangFormat.commit
48 clangFormat.extension
49 clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61 config = load_git_config()
62
63 # In order to keep '--' yet allow options after positionals, we need to
64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
65 # nargs=argparse.REMAINDER disallows options after positionals.)
66 argv = sys.argv[1:]
67 try:
68 idx = argv.index('--')
69 except ValueError:
70 dash_dash = []
71 else:
72 dash_dash = argv[idx:]
73 argv = argv[:idx]
74
75 default_extensions = ','.join([
76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77 'c', 'h', # C
78 'm', # ObjC
79 'mm', # ObjC++
80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++
81 'cu', # CUDA
82 # Other languages that clang-format supports
83 'proto', 'protodevel', # Protocol Buffers
84 'java', # Java
85 'js', # JavaScript
86 'ts', # TypeScript
87 ])
88
89 p = argparse.ArgumentParser(
90 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
91 description=desc)
92 p.add_argument('--binary',
93 default=config.get('clangformat.binary', 'clang-format'),
94 help='path to clang-format'),
95 p.add_argument('--commit',
96 default=config.get('clangformat.commit', 'HEAD'),
97 help='default commit to use if none is specified'),
98 p.add_argument('--diff', action='store_true',
99 help='print a diff instead of applying the changes')
100 p.add_argument('--extensions',
101 default=config.get('clangformat.extensions',
102 default_extensions),
103 help=('comma-separated list of file extensions to format, '
104 'excluding the period and case-insensitive')),
105 p.add_argument('-f', '--force', action='store_true',
106 help='allow changes to unstaged files')
107 p.add_argument('-p', '--patch', action='store_true',
108 help='select hunks interactively')
109 p.add_argument('-q', '--quiet', action='count', default=0,
110 help='print less information')
111 p.add_argument('--style',
112 default=config.get('clangformat.style', None),
113 help='passed to clang-format'),
114 p.add_argument('-v', '--verbose', action='count', default=0,
115 help='print extra information')
116 # We gather all the remaining positional arguments into 'args' since we need
117 # to use some heuristics to determine whether or not <commit> was present.
118 # However, to print pretty messages, we make use of metavar and help.
119 p.add_argument('args', nargs='*', metavar='<commit>',
120 help='revision from which to compute the diff')
121 p.add_argument('ignored', nargs='*', metavar='<file>...',
122 help='if specified, only consider differences in these files')
123 opts = p.parse_args(argv)
124
125 opts.verbose -= opts.quiet
126 del opts.quiet
127
128 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
129 if len(commits) > 1:
130 if not opts.diff:
131 die('--diff is required when two commits are given')
132 else:
133 if len(commits) > 2:
134 die('at most two commits allowed; %d given' % len(commits))
135 changed_lines = compute_diff_and_extract_lines(commits, files)
136 if opts.verbose >= 1:
137 ignored_files = set(changed_lines)
138 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
139 if opts.verbose >= 1:
140 ignored_files.difference_update(changed_lines)
141 if ignored_files:
142 print('Ignoring changes in the following files (wrong extension):')
143 for filename in ignored_files:
144 print(' %s' % filename)
145 if changed_lines:
146 print('Running clang-format on the following files:')
147 for filename in changed_lines:
148 print(' %s' % filename)
149 if not changed_lines:
150 print('no modified files to format')
151 return
152 # The computed diff outputs absolute paths, so we must cd before accessing
153 # those files.
154 cd_to_toplevel()
155 if len(commits) > 1:
156 old_tree = commits[1]
157 new_tree = run_clang_format_and_save_to_tree(changed_lines,
158 revision=commits[1],
159 binary=opts.binary,
160 style=opts.style)
161 else:
162 old_tree = create_tree_from_workdir(changed_lines)
163 new_tree = run_clang_format_and_save_to_tree(changed_lines,
164 binary=opts.binary,
165 style=opts.style)
166 if opts.verbose >= 1:
167 print('old tree: %s' % old_tree)
168 print('new tree: %s' % new_tree)
169 if old_tree == new_tree:
170 if opts.verbose >= 0:
171 print('clang-format did not modify any files')
172 elif opts.diff:
173 print_diff(old_tree, new_tree)
174 else:
175 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
176 patch_mode=opts.patch)
177 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
178 print('changed files:')
179 for filename in changed_files:
180 print(' %s' % filename)
181
182
183def load_git_config(non_string_options=None):
184 """Return the git configuration as a dictionary.
185
186 All options are assumed to be strings unless in `non_string_options`, in which
187 is a dictionary mapping option name (in lower case) to either "--bool" or
188 "--int"."""
189 if non_string_options is None:
190 non_string_options = {}
191 out = {}
192 for entry in run('git', 'config', '--list', '--null').split('\0'):
193 if entry:
194 name, value = entry.split('\n', 1)
195 if name in non_string_options:
196 value = run('git', 'config', non_string_options[name], name)
197 out[name] = value
198 return out
199
200
201def interpret_args(args, dash_dash, default_commit):
202 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
203
204 It is assumed that "--" and everything that follows has been removed from
205 args and placed in `dash_dash`.
206
207 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
208 left (if present) are taken as commits. Otherwise, the arguments are checked
209 from left to right if they are commits or files. If commits are not given,
210 a list with `default_commit` is used."""
211 if dash_dash:
212 if len(args) == 0:
213 commits = [default_commit]
214 else:
215 commits = args
216 for commit in commits:
217 object_type = get_object_type(commit)
218 if object_type not in ('commit', 'tag'):
219 if object_type is None:
220 die("'%s' is not a commit" % commit)
221 else:
222 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
223 files = dash_dash[1:]
224 elif args:
225 commits = []
226 while args:
227 if not disambiguate_revision(args[0]):
228 break
229 commits.append(args.pop(0))
230 if not commits:
231 commits = [default_commit]
232 files = args
233 else:
234 commits = [default_commit]
235 files = []
236 return commits, files
237
238
239def disambiguate_revision(value):
240 """Returns True if `value` is a revision, False if it is a file, or dies."""
241 # If `value` is ambiguous (neither a commit nor a file), the following
242 # command will die with an appropriate error message.
243 run('git', 'rev-parse', value, verbose=False)
244 object_type = get_object_type(value)
245 if object_type is None:
246 return False
247 if object_type in ('commit', 'tag'):
248 return True
249 die('`%s` is a %s, but a commit or filename was expected' %
250 (value, object_type))
251
252
253def get_object_type(value):
254 """Returns a string description of an object's type, or None if it is not
255 a valid git object."""
256 cmd = ['git', 'cat-file', '-t', value]
257 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
258 stdout, stderr = p.communicate()
259 if p.returncode != 0:
260 return None
261 return convert_string(stdout.strip())
262
263
264def compute_diff_and_extract_lines(commits, files):
265 """Calls compute_diff() followed by extract_lines()."""
266 diff_process = compute_diff(commits, files)
267 changed_lines = extract_lines(diff_process.stdout)
268 diff_process.stdout.close()
269 diff_process.wait()
270 if diff_process.returncode != 0:
271 # Assume error was already printed to stderr.
272 sys.exit(2)
273 return changed_lines
274
275
276def compute_diff(commits, files):
277 """Return a subprocess object producing the diff from `commits`.
278
279 The return value's `stdin` file object will produce a patch with the
280 differences between the working directory and the first commit if a single
281 one was specified, or the difference between both specified commits, filtered
282 on `files` (if non-empty). Zero context lines are used in the patch."""
283 git_tool = 'diff-index'
284 if len(commits) > 1:
285 git_tool = 'diff-tree'
286 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
287 cmd.extend(files)
288 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
289 p.stdin.close()
290 return p
291
292
293def extract_lines(patch_file):
294 """Extract the changed lines in `patch_file`.
295
296 The return value is a dictionary mapping filename to a list of (start_line,
297 line_count) pairs.
298
299 The input must have been produced with ``-U0``, meaning unidiff format with
300 zero lines of context. The return value is a dict mapping filename to a
301 list of line `Range`s."""
302 matches = {}
303 for line in patch_file:
304 line = convert_string(line)
305 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
306 if match:
307 filename = match.group(1).rstrip('\r\n')
308 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
309 if match:
310 start_line = int(match.group(1))
311 line_count = 1
312 if match.group(3):
313 line_count = int(match.group(3))
314 if line_count > 0:
315 matches.setdefault(filename, []).append(Range(start_line, line_count))
316 return matches
317
318
319def filter_by_extension(dictionary, allowed_extensions):
320 """Delete every key in `dictionary` that doesn't have an allowed extension.
321
322 `allowed_extensions` must be a collection of lowercase file extensions,
323 excluding the period."""
324 allowed_extensions = frozenset(allowed_extensions)
325 for filename in list(dictionary.keys()):
326 base_ext = filename.rsplit('.', 1)
327 if len(base_ext) == 1 and '' in allowed_extensions:
328 continue
329 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
330 del dictionary[filename]
331
332
333def cd_to_toplevel():
334 """Change to the top level of the git repository."""
335 toplevel = run('git', 'rev-parse', '--show-toplevel')
336 os.chdir(toplevel)
337
338
339def create_tree_from_workdir(filenames):
340 """Create a new git tree with the given files from the working directory.
341
342 Returns the object ID (SHA-1) of the created tree."""
343 return create_tree(filenames, '--stdin')
344
345
346def run_clang_format_and_save_to_tree(changed_lines, revision=None,
347 binary='clang-format', style=None):
348 """Run clang-format on each file and save the result to a git tree.
349
350 Returns the object ID (SHA-1) of the created tree."""
351 def iteritems(container):
352 try:
353 return container.iteritems() # Python 2
354 except AttributeError:
355 return container.items() # Python 3
356 def index_info_generator():
357 for filename, line_ranges in iteritems(changed_lines):
358 if revision:
359 git_metadata_cmd = ['git', 'ls-tree',
360 '%s:%s' % (revision, os.path.dirname(filename)),
361 os.path.basename(filename)]
362 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
363 stdout=subprocess.PIPE)
364 stdout = git_metadata.communicate()[0]
365 mode = oct(int(stdout.split()[0], 8))
366 else:
367 mode = oct(os.stat(filename).st_mode)
368 # Adjust python3 octal format so that it matches what git expects
369 if mode.startswith('0o'):
370 mode = '0' + mode[2:]
371 blob_id = clang_format_to_blob(filename, line_ranges,
372 revision=revision,
373 binary=binary,
374 style=style)
375 yield '%s %s\t%s' % (mode, blob_id, filename)
376 return create_tree(index_info_generator(), '--index-info')
377
378
379def create_tree(input_lines, mode):
380 """Create a tree object from the given input.
381
382 If mode is '--stdin', it must be a list of filenames. If mode is
383 '--index-info' is must be a list of values suitable for "git update-index
384 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
385 is invalid."""
386 assert mode in ('--stdin', '--index-info')
387 cmd = ['git', 'update-index', '--add', '-z', mode]
388 with temporary_index_file():
389 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
390 for line in input_lines:
391 p.stdin.write(to_bytes('%s\0' % line))
392 p.stdin.close()
393 if p.wait() != 0:
394 die('`%s` failed' % ' '.join(cmd))
395 tree_id = run('git', 'write-tree')
396 return tree_id
397
398
399def clang_format_to_blob(filename, line_ranges, revision=None,
400 binary='clang-format', style=None):
401 """Run clang-format on the given file and save the result to a git blob.
402
403 Runs on the file in `revision` if not None, or on the file in the working
404 directory if `revision` is None.
405
406 Returns the object ID (SHA-1) of the created blob."""
407 clang_format_cmd = [binary]
408 if style:
409 clang_format_cmd.extend(['-style='+style])
410 clang_format_cmd.extend([
411 '-lines=%s:%s' % (start_line, start_line+line_count-1)
412 for start_line, line_count in line_ranges])
413 if revision:
414 clang_format_cmd.extend(['-assume-filename='+filename])
415 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
416 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
417 stdout=subprocess.PIPE)
418 git_show.stdin.close()
419 clang_format_stdin = git_show.stdout
420 else:
421 clang_format_cmd.extend([filename])
422 git_show = None
423 clang_format_stdin = subprocess.PIPE
424 try:
425 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
426 stdout=subprocess.PIPE)
427 if clang_format_stdin == subprocess.PIPE:
428 clang_format_stdin = clang_format.stdin
429 except OSError as e:
430 if e.errno == errno.ENOENT:
431 die('cannot find executable "%s"' % binary)
432 else:
433 raise
434 clang_format_stdin.close()
435 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
436 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
437 stdout=subprocess.PIPE)
438 clang_format.stdout.close()
439 stdout = hash_object.communicate()[0]
440 if hash_object.returncode != 0:
441 die('`%s` failed' % ' '.join(hash_object_cmd))
442 if clang_format.wait() != 0:
443 die('`%s` failed' % ' '.join(clang_format_cmd))
444 if git_show and git_show.wait() != 0:
445 die('`%s` failed' % ' '.join(git_show_cmd))
446 return convert_string(stdout).rstrip('\r\n')
447
448
449@contextlib.contextmanager
450def temporary_index_file(tree=None):
451 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
452 the file afterward."""
453 index_path = create_temporary_index(tree)
454 old_index_path = os.environ.get('GIT_INDEX_FILE')
455 os.environ['GIT_INDEX_FILE'] = index_path
456 try:
457 yield
458 finally:
459 if old_index_path is None:
460 del os.environ['GIT_INDEX_FILE']
461 else:
462 os.environ['GIT_INDEX_FILE'] = old_index_path
463 os.remove(index_path)
464
465
466def create_temporary_index(tree=None):
467 """Create a temporary index file and return the created file's path.
468
469 If `tree` is not None, use that as the tree to read in. Otherwise, an
470 empty index is created."""
471 gitdir = run('git', 'rev-parse', '--git-dir')
472 path = os.path.join(gitdir, temp_index_basename)
473 if tree is None:
474 tree = '--empty'
475 run('git', 'read-tree', '--index-output='+path, tree)
476 return path
477
478
479def print_diff(old_tree, new_tree):
480 """Print the diff between the two trees to stdout."""
481 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
482 # is expected to be viewed by the user, and only the former does nice things
483 # like color and pagination.
484 #
485 # We also only print modified files since `new_tree` only contains the files
486 # that were modified, so unmodified files would show as deleted without the
487 # filter.
488 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
489 '--'])
490
491
492def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
493 """Apply the changes in `new_tree` to the working directory.
494
495 Bails if there are local changes in those files and not `force`. If
496 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
497 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
498 '--name-only', old_tree,
499 new_tree).rstrip('\0').split('\0')
500 if not force:
501 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
502 if unstaged_files:
503 print('The following files would be modified but '
504 'have unstaged changes:', file=sys.stderr)
505 print(unstaged_files, file=sys.stderr)
506 print('Please commit, stage, or stash them first.', file=sys.stderr)
507 sys.exit(2)
508 if patch_mode:
509 # In patch mode, we could just as well create an index from the new tree
510 # and checkout from that, but then the user will be presented with a
511 # message saying "Discard ... from worktree". Instead, we use the old
512 # tree as the index and checkout from new_tree, which gives the slightly
513 # better message, "Apply ... to index and worktree". This is not quite
514 # right, since it won't be applied to the user's index, but oh well.
515 with temporary_index_file(old_tree):
516 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
517 index_tree = old_tree
518 else:
519 with temporary_index_file(new_tree):
520 run('git', 'checkout-index', '-a', '-f')
521 return changed_files
522
523
524def run(*args, **kwargs):
525 stdin = kwargs.pop('stdin', '')
526 verbose = kwargs.pop('verbose', True)
527 strip = kwargs.pop('strip', True)
528 for name in kwargs:
529 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
530 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
531 stdin=subprocess.PIPE)
532 stdout, stderr = p.communicate(input=stdin)
533
534 stdout = convert_string(stdout)
535 stderr = convert_string(stderr)
536
537 if p.returncode == 0:
538 if stderr:
539 if verbose:
540 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
541 print(stderr.rstrip(), file=sys.stderr)
542 if strip:
543 stdout = stdout.rstrip('\r\n')
544 return stdout
545 if verbose:
546 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
547 if stderr:
548 print(stderr.rstrip(), file=sys.stderr)
549 sys.exit(2)
550
551
552def die(message):
553 print('error:', message, file=sys.stderr)
554 sys.exit(2)
555
556
557def to_bytes(str_input):
558 # Encode to UTF-8 to get binary data.
559 if isinstance(str_input, bytes):
560 return str_input
561 return str_input.encode('utf-8')
562
563
564def to_string(bytes_input):
565 if isinstance(bytes_input, str):
566 return bytes_input
567 return bytes_input.encode('utf-8')
568
569
570def convert_string(bytes_input):
571 try:
572 return to_string(bytes_input.decode('utf-8'))
573 except AttributeError: # 'str' object has no attribute 'decode'.
574 return str(bytes_input)
575 except UnicodeError:
576 return str(bytes_input)
577
578if __name__ == '__main__':
579 main()