blob: 0b2103962a31e8a04e7b44b1116e874034396ac1 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""
13clang-format git integration
14============================
15
16This file provides a clang-format integration for git. Put it somewhere in your
17path and ensure that it is executable. Then, "git clang-format" will invoke
18clang-format on the changes in current files or a specific commit.
19
20For further details, run:
21git clang-format -h
22
23Requires Python 2.7 or Python 3
24"""
25
26from __future__ import print_function
27import argparse
28import collections
29import contextlib
30import errno
31import os
32import re
33import subprocess
34import sys
35
36usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
37
38desc = '''
39If zero or one commits are given, run clang-format on all lines that differ
40between the working directory and <commit>, which defaults to HEAD. Changes are
41only applied to the working directory.
42
43If two commits are given (requires --diff), run clang-format on all lines in the
44second <commit> that differ from the first <commit>.
45
46The following git-config settings set the default of the corresponding option:
47 clangFormat.binary
48 clangFormat.commit
49 clangFormat.extension
50 clangFormat.style
51'''
52
53# Name of the temporary index file in which save the output of clang-format.
54# This file is created within the .git directory.
55temp_index_basename = 'clang-format-index'
56
57
58Range = collections.namedtuple('Range', 'start, count')
59
60
61def main():
62 config = load_git_config()
63
64 # In order to keep '--' yet allow options after positionals, we need to
65 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
66 # nargs=argparse.REMAINDER disallows options after positionals.)
67 argv = sys.argv[1:]
68 try:
69 idx = argv.index('--')
70 except ValueError:
71 dash_dash = []
72 else:
73 dash_dash = argv[idx:]
74 argv = argv[:idx]
75
76 default_extensions = ','.join([
77 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
78 'c', 'h', # C
79 'm', # ObjC
80 'mm', # ObjC++
81 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++
82 'cu', # CUDA
83 # Other languages that clang-format supports
84 'proto', 'protodevel', # Protocol Buffers
85 'java', # Java
86 'js', # JavaScript
87 'ts', # TypeScript
88 ])
89
90 p = argparse.ArgumentParser(
91 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
92 description=desc)
93 p.add_argument('--binary',
94 default=config.get('clangformat.binary', 'clang-format'),
95 help='path to clang-format'),
96 p.add_argument('--commit',
97 default=config.get('clangformat.commit', 'HEAD'),
98 help='default commit to use if none is specified'),
99 p.add_argument('--diff', action='store_true',
100 help='print a diff instead of applying the changes')
101 p.add_argument('--extensions',
102 default=config.get('clangformat.extensions',
103 default_extensions),
104 help=('comma-separated list of file extensions to format, '
105 'excluding the period and case-insensitive')),
106 p.add_argument('-f', '--force', action='store_true',
107 help='allow changes to unstaged files')
108 p.add_argument('-p', '--patch', action='store_true',
109 help='select hunks interactively')
110 p.add_argument('-q', '--quiet', action='count', default=0,
111 help='print less information')
112 p.add_argument('--style',
113 default=config.get('clangformat.style', None),
114 help='passed to clang-format'),
115 p.add_argument('-v', '--verbose', action='count', default=0,
116 help='print extra information')
117 # We gather all the remaining positional arguments into 'args' since we need
118 # to use some heuristics to determine whether or not <commit> was present.
119 # However, to print pretty messages, we make use of metavar and help.
120 p.add_argument('args', nargs='*', metavar='<commit>',
121 help='revision from which to compute the diff')
122 p.add_argument('ignored', nargs='*', metavar='<file>...',
123 help='if specified, only consider differences in these files')
124 opts = p.parse_args(argv)
125
126 opts.verbose -= opts.quiet
127 del opts.quiet
128
129 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
130 if len(commits) > 1:
131 if not opts.diff:
132 die('--diff is required when two commits are given')
133 else:
134 if len(commits) > 2:
135 die('at most two commits allowed; %d given' % len(commits))
136 changed_lines = compute_diff_and_extract_lines(commits, files)
137 if opts.verbose >= 1:
138 ignored_files = set(changed_lines)
139 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
140 if opts.verbose >= 1:
141 ignored_files.difference_update(changed_lines)
142 if ignored_files:
143 print('Ignoring changes in the following files (wrong extension):')
144 for filename in ignored_files:
145 print(' %s' % filename)
146 if changed_lines:
147 print('Running clang-format on the following files:')
148 for filename in changed_lines:
149 print(' %s' % filename)
150 if not changed_lines:
151 print('no modified files to format')
152 return
153 # The computed diff outputs absolute paths, so we must cd before accessing
154 # those files.
155 cd_to_toplevel()
156 if len(commits) > 1:
157 old_tree = commits[1]
158 new_tree = run_clang_format_and_save_to_tree(changed_lines,
159 revision=commits[1],
160 binary=opts.binary,
161 style=opts.style)
162 else:
163 old_tree = create_tree_from_workdir(changed_lines)
164 new_tree = run_clang_format_and_save_to_tree(changed_lines,
165 binary=opts.binary,
166 style=opts.style)
167 if opts.verbose >= 1:
168 print('old tree: %s' % old_tree)
169 print('new tree: %s' % new_tree)
170 if old_tree == new_tree:
171 if opts.verbose >= 0:
172 print('clang-format did not modify any files')
173 elif opts.diff:
174 print_diff(old_tree, new_tree)
175 else:
176 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
177 patch_mode=opts.patch)
178 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
179 print('changed files:')
180 for filename in changed_files:
181 print(' %s' % filename)
182
183
184def load_git_config(non_string_options=None):
185 """Return the git configuration as a dictionary.
186
187 All options are assumed to be strings unless in `non_string_options`, in which
188 is a dictionary mapping option name (in lower case) to either "--bool" or
189 "--int"."""
190 if non_string_options is None:
191 non_string_options = {}
192 out = {}
193 for entry in run('git', 'config', '--list', '--null').split('\0'):
194 if entry:
195 name, value = entry.split('\n', 1)
196 if name in non_string_options:
197 value = run('git', 'config', non_string_options[name], name)
198 out[name] = value
199 return out
200
201
202def interpret_args(args, dash_dash, default_commit):
203 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
204
205 It is assumed that "--" and everything that follows has been removed from
206 args and placed in `dash_dash`.
207
208 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
209 left (if present) are taken as commits. Otherwise, the arguments are checked
210 from left to right if they are commits or files. If commits are not given,
211 a list with `default_commit` is used."""
212 if dash_dash:
213 if len(args) == 0:
214 commits = [default_commit]
215 else:
216 commits = args
217 for commit in commits:
218 object_type = get_object_type(commit)
219 if object_type not in ('commit', 'tag'):
220 if object_type is None:
221 die("'%s' is not a commit" % commit)
222 else:
223 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
224 files = dash_dash[1:]
225 elif args:
226 commits = []
227 while args:
228 if not disambiguate_revision(args[0]):
229 break
230 commits.append(args.pop(0))
231 if not commits:
232 commits = [default_commit]
233 files = args
234 else:
235 commits = [default_commit]
236 files = []
237 return commits, files
238
239
240def disambiguate_revision(value):
241 """Returns True if `value` is a revision, False if it is a file, or dies."""
242 # If `value` is ambiguous (neither a commit nor a file), the following
243 # command will die with an appropriate error message.
244 run('git', 'rev-parse', value, verbose=False)
245 object_type = get_object_type(value)
246 if object_type is None:
247 return False
248 if object_type in ('commit', 'tag'):
249 return True
250 die('`%s` is a %s, but a commit or filename was expected' %
251 (value, object_type))
252
253
254def get_object_type(value):
255 """Returns a string description of an object's type, or None if it is not
256 a valid git object."""
257 cmd = ['git', 'cat-file', '-t', value]
258 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
259 stdout, stderr = p.communicate()
260 if p.returncode != 0:
261 return None
262 return convert_string(stdout.strip())
263
264
265def compute_diff_and_extract_lines(commits, files):
266 """Calls compute_diff() followed by extract_lines()."""
267 diff_process = compute_diff(commits, files)
268 changed_lines = extract_lines(diff_process.stdout)
269 diff_process.stdout.close()
270 diff_process.wait()
271 if diff_process.returncode != 0:
272 # Assume error was already printed to stderr.
273 sys.exit(2)
274 return changed_lines
275
276
277def compute_diff(commits, files):
278 """Return a subprocess object producing the diff from `commits`.
279
280 The return value's `stdin` file object will produce a patch with the
281 differences between the working directory and the first commit if a single
282 one was specified, or the difference between both specified commits, filtered
283 on `files` (if non-empty). Zero context lines are used in the patch."""
284 git_tool = 'diff-index'
285 if len(commits) > 1:
286 git_tool = 'diff-tree'
287 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
288 cmd.extend(files)
289 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
290 p.stdin.close()
291 return p
292
293
294def extract_lines(patch_file):
295 """Extract the changed lines in `patch_file`.
296
297 The return value is a dictionary mapping filename to a list of (start_line,
298 line_count) pairs.
299
300 The input must have been produced with ``-U0``, meaning unidiff format with
301 zero lines of context. The return value is a dict mapping filename to a
302 list of line `Range`s."""
303 matches = {}
304 for line in patch_file:
305 line = convert_string(line)
306 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
307 if match:
308 filename = match.group(1).rstrip('\r\n')
309 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
310 if match:
311 start_line = int(match.group(1))
312 line_count = 1
313 if match.group(3):
314 line_count = int(match.group(3))
315 if line_count > 0:
316 matches.setdefault(filename, []).append(Range(start_line, line_count))
317 return matches
318
319
320def filter_by_extension(dictionary, allowed_extensions):
321 """Delete every key in `dictionary` that doesn't have an allowed extension.
322
323 `allowed_extensions` must be a collection of lowercase file extensions,
324 excluding the period."""
325 allowed_extensions = frozenset(allowed_extensions)
326 for filename in list(dictionary.keys()):
327 base_ext = filename.rsplit('.', 1)
328 if len(base_ext) == 1 and '' in allowed_extensions:
329 continue
330 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
331 del dictionary[filename]
332
333
334def cd_to_toplevel():
335 """Change to the top level of the git repository."""
336 toplevel = run('git', 'rev-parse', '--show-toplevel')
337 os.chdir(toplevel)
338
339
340def create_tree_from_workdir(filenames):
341 """Create a new git tree with the given files from the working directory.
342
343 Returns the object ID (SHA-1) of the created tree."""
344 return create_tree(filenames, '--stdin')
345
346
347def run_clang_format_and_save_to_tree(changed_lines, revision=None,
348 binary='clang-format', style=None):
349 """Run clang-format on each file and save the result to a git tree.
350
351 Returns the object ID (SHA-1) of the created tree."""
352 def iteritems(container):
353 try:
354 return container.iteritems() # Python 2
355 except AttributeError:
356 return container.items() # Python 3
357 def index_info_generator():
358 for filename, line_ranges in iteritems(changed_lines):
359 if revision:
360 git_metadata_cmd = ['git', 'ls-tree',
361 '%s:%s' % (revision, os.path.dirname(filename)),
362 os.path.basename(filename)]
363 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
364 stdout=subprocess.PIPE)
365 stdout = git_metadata.communicate()[0]
366 mode = oct(int(stdout.split()[0], 8))
367 else:
368 mode = oct(os.stat(filename).st_mode)
369 # Adjust python3 octal format so that it matches what git expects
370 if mode.startswith('0o'):
371 mode = '0' + mode[2:]
372 blob_id = clang_format_to_blob(filename, line_ranges,
373 revision=revision,
374 binary=binary,
375 style=style)
376 yield '%s %s\t%s' % (mode, blob_id, filename)
377 return create_tree(index_info_generator(), '--index-info')
378
379
380def create_tree(input_lines, mode):
381 """Create a tree object from the given input.
382
383 If mode is '--stdin', it must be a list of filenames. If mode is
384 '--index-info' is must be a list of values suitable for "git update-index
385 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
386 is invalid."""
387 assert mode in ('--stdin', '--index-info')
388 cmd = ['git', 'update-index', '--add', '-z', mode]
389 with temporary_index_file():
390 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
391 for line in input_lines:
392 p.stdin.write(to_bytes('%s\0' % line))
393 p.stdin.close()
394 if p.wait() != 0:
395 die('`%s` failed' % ' '.join(cmd))
396 tree_id = run('git', 'write-tree')
397 return tree_id
398
399
400def clang_format_to_blob(filename, line_ranges, revision=None,
401 binary='clang-format', style=None):
402 """Run clang-format on the given file and save the result to a git blob.
403
404 Runs on the file in `revision` if not None, or on the file in the working
405 directory if `revision` is None.
406
407 Returns the object ID (SHA-1) of the created blob."""
408 clang_format_cmd = [binary]
409 if style:
410 clang_format_cmd.extend(['-style='+style])
411 clang_format_cmd.extend([
412 '-lines=%s:%s' % (start_line, start_line+line_count-1)
413 for start_line, line_count in line_ranges])
414 if revision:
415 clang_format_cmd.extend(['-assume-filename='+filename])
416 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
417 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
418 stdout=subprocess.PIPE)
419 git_show.stdin.close()
420 clang_format_stdin = git_show.stdout
421 else:
422 clang_format_cmd.extend([filename])
423 git_show = None
424 clang_format_stdin = subprocess.PIPE
425 try:
426 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
427 stdout=subprocess.PIPE)
428 if clang_format_stdin == subprocess.PIPE:
429 clang_format_stdin = clang_format.stdin
430 except OSError as e:
431 if e.errno == errno.ENOENT:
432 die('cannot find executable "%s"' % binary)
433 else:
434 raise
435 clang_format_stdin.close()
436 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
437 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
438 stdout=subprocess.PIPE)
439 clang_format.stdout.close()
440 stdout = hash_object.communicate()[0]
441 if hash_object.returncode != 0:
442 die('`%s` failed' % ' '.join(hash_object_cmd))
443 if clang_format.wait() != 0:
444 die('`%s` failed' % ' '.join(clang_format_cmd))
445 if git_show and git_show.wait() != 0:
446 die('`%s` failed' % ' '.join(git_show_cmd))
447 return convert_string(stdout).rstrip('\r\n')
448
449
450@contextlib.contextmanager
451def temporary_index_file(tree=None):
452 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
453 the file afterward."""
454 index_path = create_temporary_index(tree)
455 old_index_path = os.environ.get('GIT_INDEX_FILE')
456 os.environ['GIT_INDEX_FILE'] = index_path
457 try:
458 yield
459 finally:
460 if old_index_path is None:
461 del os.environ['GIT_INDEX_FILE']
462 else:
463 os.environ['GIT_INDEX_FILE'] = old_index_path
464 os.remove(index_path)
465
466
467def create_temporary_index(tree=None):
468 """Create a temporary index file and return the created file's path.
469
470 If `tree` is not None, use that as the tree to read in. Otherwise, an
471 empty index is created."""
472 gitdir = run('git', 'rev-parse', '--git-dir')
473 path = os.path.join(gitdir, temp_index_basename)
474 if tree is None:
475 tree = '--empty'
476 run('git', 'read-tree', '--index-output='+path, tree)
477 return path
478
479
480def print_diff(old_tree, new_tree):
481 """Print the diff between the two trees to stdout."""
482 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
483 # is expected to be viewed by the user, and only the former does nice things
484 # like color and pagination.
485 #
486 # We also only print modified files since `new_tree` only contains the files
487 # that were modified, so unmodified files would show as deleted without the
488 # filter.
489 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
490 '--'])
491
492
493def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
494 """Apply the changes in `new_tree` to the working directory.
495
496 Bails if there are local changes in those files and not `force`. If
497 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
498 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
499 '--name-only', old_tree,
500 new_tree).rstrip('\0').split('\0')
501 if not force:
502 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
503 if unstaged_files:
504 print('The following files would be modified but '
505 'have unstaged changes:', file=sys.stderr)
506 print(unstaged_files, file=sys.stderr)
507 print('Please commit, stage, or stash them first.', file=sys.stderr)
508 sys.exit(2)
509 if patch_mode:
510 # In patch mode, we could just as well create an index from the new tree
511 # and checkout from that, but then the user will be presented with a
512 # message saying "Discard ... from worktree". Instead, we use the old
513 # tree as the index and checkout from new_tree, which gives the slightly
514 # better message, "Apply ... to index and worktree". This is not quite
515 # right, since it won't be applied to the user's index, but oh well.
516 with temporary_index_file(old_tree):
517 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
518 index_tree = old_tree
519 else:
520 with temporary_index_file(new_tree):
521 run('git', 'checkout-index', '-a', '-f')
522 return changed_files
523
524
525def run(*args, **kwargs):
526 stdin = kwargs.pop('stdin', '')
527 verbose = kwargs.pop('verbose', True)
528 strip = kwargs.pop('strip', True)
529 for name in kwargs:
530 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
531 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
532 stdin=subprocess.PIPE)
533 stdout, stderr = p.communicate(input=stdin)
534
535 stdout = convert_string(stdout)
536 stderr = convert_string(stderr)
537
538 if p.returncode == 0:
539 if stderr:
540 if verbose:
541 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
542 print(stderr.rstrip(), file=sys.stderr)
543 if strip:
544 stdout = stdout.rstrip('\r\n')
545 return stdout
546 if verbose:
547 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
548 if stderr:
549 print(stderr.rstrip(), file=sys.stderr)
550 sys.exit(2)
551
552
553def die(message):
554 print('error:', message, file=sys.stderr)
555 sys.exit(2)
556
557
558def to_bytes(str_input):
559 # Encode to UTF-8 to get binary data.
560 if isinstance(str_input, bytes):
561 return str_input
562 return str_input.encode('utf-8')
563
564
565def to_string(bytes_input):
566 if isinstance(bytes_input, str):
567 return bytes_input
568 return bytes_input.encode('utf-8')
569
570
571def convert_string(bytes_input):
572 try:
573 return to_string(bytes_input.decode('utf-8'))
574 except AttributeError: # 'str' object has no attribute 'decode'.
575 return str(bytes_input)
576 except UnicodeError:
577 return str(bytes_input)
578
579if __name__ == '__main__':
580 main()