Fathi Boudra | 422bf77 | 2019-12-02 11:10:16 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Copyright (c) 2019, Arm Limited. All rights reserved. |
| 4 | # |
| 5 | # SPDX-License-Identifier: BSD-3-Clause |
| 6 | # |
| 7 | |
| 8 | import argparse |
| 9 | import codecs |
| 10 | import os |
| 11 | import re |
| 12 | import sys |
| 13 | import utils |
| 14 | |
| 15 | |
| 16 | # File extensions to check |
| 17 | VALID_FILE_EXTENSIONS = ('.c', '.S', '.h') |
| 18 | |
| 19 | |
| 20 | # Paths inside the tree to ignore. Hidden folders and files are always ignored. |
| 21 | # They mustn't end in '/'. |
| 22 | IGNORED_FOLDERS = ("include/lib/stdlib", |
| 23 | "include/lib/libc", |
| 24 | "include/lib/libfdt", |
| 25 | "lib/libfdt", |
| 26 | "lib/libc", |
| 27 | "lib/stdlib") |
| 28 | |
| 29 | # List of ignored files in folders that aren't ignored |
| 30 | IGNORED_FILES = ( |
| 31 | ) |
| 32 | |
| 33 | def line_remove_comments(line): |
| 34 | '''Remove C comments within a line. This code doesn't know if the line is |
| 35 | commented in a multi line comment that involves more lines than itself.''' |
| 36 | |
| 37 | # Multi line comments |
| 38 | while line.find("/*") != -1: |
| 39 | start_comment = line.find("/*") |
| 40 | end_comment = line.find("*/") |
| 41 | if end_comment != -1: |
| 42 | end_comment = end_comment + 2 # Skip the "*/" |
| 43 | line = line[ : start_comment ] + line[ end_comment : ] |
| 44 | else: # The comment doesn't end this line. |
| 45 | line = line[ : start_comment ] |
| 46 | |
| 47 | # Single line comments |
| 48 | comment = line.find("//") |
| 49 | if comment != -1: |
| 50 | line = line[ : comment ] |
| 51 | |
| 52 | return line |
| 53 | |
| 54 | |
| 55 | def line_get_include_path(line): |
| 56 | '''It takes a line of code with an include directive and returns the file |
| 57 | path with < or the first " included to tell them apart.''' |
| 58 | if line.find('<') != -1: |
| 59 | if line.find('.h>') == -1: |
| 60 | return None |
| 61 | inc = line[ line.find('<') : line.find('.h>') ] |
| 62 | elif line.find('"') != -1: |
| 63 | if line.find('.h"') == -1: |
| 64 | return None |
| 65 | inc = line[ line.find('"') : line.find('.h"') ] |
| 66 | else: |
| 67 | inc = None |
| 68 | |
| 69 | return inc |
| 70 | |
| 71 | |
| 72 | def file_get_include_list(path, _encoding='ascii'): |
| 73 | '''Reads all lines from a file and returns a list of include paths. It |
| 74 | tries to read the file in ASCII mode and UTF-8 if it fails. If it succeeds |
| 75 | it will return a list of include paths. If it fails it will return None.''' |
| 76 | |
| 77 | inc_list = [] |
| 78 | |
| 79 | try: |
| 80 | f = codecs.open(path, encoding=_encoding) |
| 81 | except: |
| 82 | print("ERROR:" + path + ":open() error!") |
| 83 | utils.print_exception_info() |
| 84 | return None |
| 85 | |
| 86 | # Allow spaces in between, but not comments. |
| 87 | pattern = re.compile(r"^\s*#\s*include\s\s*[\"<]") |
| 88 | |
| 89 | fatal_error = False |
| 90 | |
| 91 | try: |
| 92 | for line in f: |
| 93 | if pattern.match(line): |
| 94 | line_remove_comments(line) |
| 95 | inc = line_get_include_path(line) |
| 96 | if inc != None: |
| 97 | inc_list.append(inc) |
| 98 | |
| 99 | except UnicodeDecodeError: |
| 100 | # Capture exceptions caused by non-ASCII encoded files. |
| 101 | if _encoding == 'ascii': |
| 102 | # Reopen the file in UTF-8 mode. Python allows a file to be opened |
| 103 | # more than once at a time. Exceptions for the recursively called |
| 104 | # function will be handled inside it. |
| 105 | # Output a warning. |
| 106 | print("ERROR:" + path + ":Non-ASCII encoded file!") |
| 107 | inc_list = file_get_include_list(path,'utf-8') |
| 108 | else: |
| 109 | # Already tried to decode in UTF-8 mode. Don't try again. |
| 110 | print("ERROR:" + path + ":Failed to decode UTF-8!") |
| 111 | fatal_error = True # Can't return while file is still open. |
| 112 | utils.print_exception_info() |
| 113 | except: |
| 114 | print("ERROR:" + path + ":error while parsing!") |
| 115 | utils.print_exception_info() |
| 116 | |
| 117 | f.close() |
| 118 | |
| 119 | if fatal_error: |
| 120 | return None |
| 121 | |
| 122 | return inc_list |
| 123 | |
| 124 | |
| 125 | def inc_order_is_correct(inc_list, path, commit_hash=""): |
| 126 | '''Returns true if the provided list is in order. If not, output error |
| 127 | messages to stdout.''' |
| 128 | |
| 129 | # If there are less than 2 includes there's no need to check. |
| 130 | if len(inc_list) < 2: |
| 131 | return True |
| 132 | |
| 133 | if commit_hash != "": |
| 134 | commit_hash = commit_hash + ":" # For formatting |
| 135 | |
| 136 | sys_after_user = False |
| 137 | sys_order_wrong = False |
| 138 | user_order_wrong = False |
| 139 | |
| 140 | # First, check if all system includes are before the user includes. |
| 141 | previous_delimiter = '<' # Begin with system includes. |
| 142 | |
| 143 | for inc in inc_list: |
| 144 | delimiter = inc[0] |
| 145 | if previous_delimiter == '<' and delimiter == '"': |
| 146 | previous_delimiter = '"' # Started user includes. |
| 147 | elif previous_delimiter == '"' and delimiter == '<': |
| 148 | sys_after_user = True |
| 149 | |
| 150 | # Then, check alphabetic order (system and user separately). |
| 151 | usr_incs = [] |
| 152 | sys_incs = [] |
| 153 | |
| 154 | for inc in inc_list: |
| 155 | if inc.startswith('<'): |
| 156 | sys_incs.append(inc) |
| 157 | elif inc.startswith('"'): |
| 158 | usr_incs.append(inc) |
| 159 | |
| 160 | if sorted(sys_incs) != sys_incs: |
| 161 | sys_order_wrong = True |
| 162 | if sorted(usr_incs) != usr_incs: |
| 163 | user_order_wrong = True |
| 164 | |
| 165 | # Output error messages. |
| 166 | if sys_after_user: |
| 167 | print("ERROR:" + commit_hash + path + |
| 168 | ":System include after user include.") |
| 169 | if sys_order_wrong: |
| 170 | print("ERROR:" + commit_hash + path + |
| 171 | ":System includes not in order.") |
| 172 | if user_order_wrong: |
| 173 | print("ERROR:" + commit_hash + path + |
| 174 | ":User includes not in order.") |
| 175 | |
| 176 | return not ( sys_after_user or sys_order_wrong or user_order_wrong ) |
| 177 | |
| 178 | |
| 179 | def file_is_correct(path): |
| 180 | '''Checks whether the order of includes in the file specified in the path |
| 181 | is correct or not.''' |
| 182 | |
| 183 | inc_list = file_get_include_list(path) |
| 184 | |
| 185 | if inc_list == None: # Failed to decode - Flag as incorrect. |
| 186 | return False |
| 187 | |
| 188 | return inc_order_is_correct(inc_list, path) |
| 189 | |
| 190 | |
| 191 | def directory_tree_is_correct(): |
| 192 | '''Checks all tracked files in the current git repository, except the ones |
| 193 | explicitly ignored by this script. |
| 194 | Returns True if all files are correct.''' |
| 195 | |
| 196 | # Get list of files tracked by git |
| 197 | (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ]) |
| 198 | if rc != 0: |
| 199 | return False |
| 200 | |
| 201 | all_files_correct = True |
| 202 | |
| 203 | files = stdout.splitlines() |
| 204 | |
| 205 | for f in files: |
| 206 | if not utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS): |
| 207 | if not file_is_correct(f): |
| 208 | # Make the script end with an error code, but continue |
| 209 | # checking files even if one of them is incorrect. |
| 210 | all_files_correct = False |
| 211 | |
| 212 | return all_files_correct |
| 213 | |
| 214 | |
| 215 | def patch_is_correct(base_commit, end_commit): |
| 216 | '''Get the output of a git diff and analyse each modified file.''' |
| 217 | |
| 218 | # Get patches of the affected commits with one line of context. |
| 219 | (rc, stdout, stderr) = utils.shell_command([ 'git', 'log', '--unified=1', |
| 220 | '--pretty="commit %h"', |
| 221 | base_commit + '..' + end_commit ]) |
| 222 | |
| 223 | if rc != 0: |
| 224 | return False |
| 225 | |
| 226 | # Parse stdout to get all renamed, modified and added file paths. |
| 227 | # Then, check order of new includes. The log output begins with each commit |
| 228 | # comment and then a list of files and differences. |
| 229 | lines = stdout.splitlines() |
| 230 | |
| 231 | all_files_correct = True |
| 232 | |
| 233 | # All files without a valid extension are ignored. /dev/null is also used by |
| 234 | # git patch to tell that a file has been deleted, and it doesn't have a |
| 235 | # valid extension, so it will be used as a reset value. |
| 236 | path = "/dev/null" |
| 237 | commit_hash = "0" |
| 238 | # There are only 2 states: commit msg or file. Start inside commit message |
| 239 | # because the include list is not checked when changing from this state. |
| 240 | inside_commit_message = True |
| 241 | inc_list = [] |
| 242 | |
| 243 | # Allow spaces in between, but not comments. |
| 244 | # Check for lines with "+" or " " at the beginning (added or not modified) |
| 245 | pattern = re.compile(r"^[+ ]\s*#\s*include\s\s*[\"<]") |
| 246 | |
| 247 | total_line_num = len(lines) |
| 248 | # By iterating this way the loop can detect if it's the last iteration and |
| 249 | # check the last file (the log doesn't have any indicator of the end) |
| 250 | for i, line in enumerate(lines): # Save line number in i |
| 251 | |
| 252 | new_commit = False |
| 253 | new_file = False |
| 254 | log_last_line = i == total_line_num-1 |
| 255 | |
| 256 | # 1. Check which kind of line this is. If this line means that the file |
| 257 | # being analysed is finished, don't update the path or hash until after |
| 258 | # checking the order of includes, they are used in error messages. Check |
| 259 | # for any includes in case this is the last line of the log. |
| 260 | |
| 261 | # Line format: <"commit 0000000"> (quotes present in stdout) |
| 262 | if line.startswith('"commit '): # New commit |
| 263 | new_commit = True |
| 264 | # Line format: <+++ b/path> |
| 265 | elif line.startswith("+++ b/"): # New file. |
| 266 | new_file = True |
| 267 | # Any other line |
| 268 | else: # Check for includes inside files, not in the commit message. |
| 269 | if not inside_commit_message: |
| 270 | if pattern.match(line): |
| 271 | line_remove_comments(line) |
| 272 | inc = line_get_include_path(line) |
| 273 | if inc != None: |
| 274 | inc_list.append(inc) |
| 275 | |
| 276 | # 2. Check order of includes if the file that was being analysed has |
| 277 | # finished. Print hash and path of the analised file in the error |
| 278 | # messages. |
| 279 | |
| 280 | if new_commit or new_file or log_last_line: |
| 281 | if not inside_commit_message: # If a file is being analysed |
| 282 | if not utils.file_is_ignored(path, VALID_FILE_EXTENSIONS, |
| 283 | IGNORED_FILES, IGNORED_FOLDERS): |
| 284 | if not inc_order_is_correct(inc_list, path, commit_hash): |
| 285 | all_files_correct = False |
| 286 | inc_list = [] # Reset the include list for the next file (if any) |
| 287 | |
| 288 | # 3. Update path or hash for the new file or commit. Update state. |
| 289 | |
| 290 | if new_commit: # New commit, save hash |
| 291 | inside_commit_message = True # Enter commit message state |
| 292 | commit_hash = line[ 8 : -1 ] # Discard last " |
| 293 | elif new_file: # New file, save path. |
| 294 | inside_commit_message = False # Save path, exit commit message state |
| 295 | # A deleted file will appear as /dev/null so it will be ignored. |
| 296 | path = line[ 6 : ] |
| 297 | |
| 298 | return all_files_correct |
| 299 | |
| 300 | |
| 301 | |
| 302 | def parse_cmd_line(argv, prog_name): |
| 303 | parser = argparse.ArgumentParser( |
| 304 | prog=prog_name, |
| 305 | formatter_class=argparse.RawTextHelpFormatter, |
| 306 | description="Check alphabetical order of #includes", |
| 307 | epilog=""" |
| 308 | For each source file in the tree, checks that #include's C preprocessor |
| 309 | directives are ordered alphabetically (as mandated by the Trusted |
| 310 | Firmware coding style). System header includes must come before user |
| 311 | header includes. |
| 312 | """) |
| 313 | |
| 314 | parser.add_argument("--tree", "-t", |
| 315 | help="Path to the source tree to check (default: %(default)s)", |
| 316 | default=os.curdir) |
| 317 | parser.add_argument("--patch", "-p", |
| 318 | help=""" |
| 319 | Patch mode. |
| 320 | Instead of checking all files in the source tree, the script will consider |
| 321 | only files that are modified by the latest patch(es).""", |
| 322 | action="store_true") |
| 323 | parser.add_argument("--from-ref", |
| 324 | help="Base commit in patch mode (default: %(default)s)", |
| 325 | default="master") |
| 326 | parser.add_argument("--to-ref", |
| 327 | help="Final commit in patch mode (default: %(default)s)", |
| 328 | default="HEAD") |
| 329 | args = parser.parse_args(argv) |
| 330 | return args |
| 331 | |
| 332 | |
| 333 | if __name__ == "__main__": |
| 334 | args = parse_cmd_line(sys.argv[1:], sys.argv[0]) |
| 335 | |
| 336 | os.chdir(args.tree) |
| 337 | |
| 338 | if args.patch: |
| 339 | print("Checking files modified between patches " + args.from_ref |
| 340 | + " and " + args.to_ref + "...") |
| 341 | if not patch_is_correct(args.from_ref, args.to_ref): |
| 342 | sys.exit(1) |
| 343 | else: |
| 344 | print("Checking all files in directory '%s'..." % os.path.abspath(args.tree)) |
| 345 | if not directory_tree_is_correct(): |
| 346 | sys.exit(1) |
| 347 | |
| 348 | # All source code files are correct. |
| 349 | sys.exit(0) |