Sync scripts with Arm internal CI
This patch syncs utility scripts and scripts
in the script directory with the internal CI.
Where a path update is required,
the changes have been commented out.
Signed-off-by: Zelalem <zelalem.aweke@arm.com>
Change-Id: Ifa4bd805e345184d1378e8423e5f878a2fbfbcd4
diff --git a/script/static-checks/check-include-order.py b/script/static-checks/check-include-order.py
index 481ca42..4f605f3 100755
--- a/script/static-checks/check-include-order.py
+++ b/script/static-checks/check-include-order.py
@@ -1,304 +1,197 @@
#!/usr/bin/env python3
#
-# Copyright (c) 2019, Arm Limited. All rights reserved.
+# Copyright (c) 2019-2020, Arm Limited. All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#
import argparse
import codecs
+import collections
import os
import re
+import subprocess
import sys
import utils
+import yaml
+import logging
# File extensions to check
-VALID_FILE_EXTENSIONS = ('.c', '.S', '.h')
+VALID_FILE_EXTENSIONS = (".c", ".S", ".h")
# Paths inside the tree to ignore. Hidden folders and files are always ignored.
# They mustn't end in '/'.
-IGNORED_FOLDERS = ("include/lib/stdlib",
- "include/lib/libc",
- "include/lib/libfdt",
- "lib/libfdt",
- "lib/libc",
- "lib/stdlib")
-
-# List of ignored files in folders that aren't ignored
-IGNORED_FILES = (
+IGNORED_FOLDERS = (
+ "include/lib/stdlib",
+ "include/lib/libc",
+ "include/lib/libfdt",
+ "lib/libfdt",
+ "lib/libc",
+ "lib/stdlib",
)
-def line_remove_comments(line):
- '''Remove C comments within a line. This code doesn't know if the line is
- commented in a multi line comment that involves more lines than itself.'''
+# List of ignored files in folders that aren't ignored
+IGNORED_FILES = ()
- # Multi line comments
- while line.find("/*") != -1:
- start_comment = line.find("/*")
- end_comment = line.find("*/")
- if end_comment != -1:
- end_comment = end_comment + 2 # Skip the "*/"
- line = line[ : start_comment ] + line[ end_comment : ]
- else: # The comment doesn't end this line.
- line = line[ : start_comment ]
-
- # Single line comments
- comment = line.find("//")
- if comment != -1:
- line = line[ : comment ]
-
- return line
+INCLUDE_RE = re.compile(r"^\s*#\s*include\s\s*(?P<path>[\"<].+[\">])")
+INCLUDE_RE_DIFF = re.compile(r"^\+?\s*#\s*include\s\s*(?P<path>[\"<].+[\">])")
-def line_get_include_path(line):
- '''It takes a line of code with an include directive and returns the file
- path with < or the first " included to tell them apart.'''
- if line.find('<') != -1:
- if line.find('.h>') == -1:
- return None
- inc = line[ line.find('<') : line.find('.h>') ]
- elif line.find('"') != -1:
- if line.find('.h"') == -1:
- return None
- inc = line[ line.find('"') : line.find('.h"') ]
- else:
- inc = None
-
- return inc
+def include_paths(lines, diff_mode=False):
+ """List all include paths in a file. Ignore starting `+` in diff mode."""
+ pattern = INCLUDE_RE_DIFF if diff_mode else INCLUDE_RE
+ matches = (pattern.match(line) for line in lines)
+ return [m.group("path") for m in matches if m]
-def file_get_include_list(path, _encoding='ascii'):
- '''Reads all lines from a file and returns a list of include paths. It
- tries to read the file in ASCII mode and UTF-8 if it fails. If it succeeds
- it will return a list of include paths. If it fails it will return None.'''
-
- inc_list = []
-
+def file_include_list(path):
+ """Return a list of all include paths in a file or None on failure."""
try:
- f = codecs.open(path, encoding=_encoding)
- except:
- print("ERROR:" + path + ":open() error!")
- utils.print_exception_info()
+ with codecs.open(path, encoding="utf-8") as f:
+ return include_paths(f)
+ except Exception:
+ logging.exception(path + ":error while parsing.")
return None
- # Allow spaces in between, but not comments.
- pattern = re.compile(r"^\s*#\s*include\s\s*[\"<]")
-
- fatal_error = False
-
- try:
- for line in f:
- if pattern.match(line):
- line_remove_comments(line)
- inc = line_get_include_path(line)
- if inc != None:
- inc_list.append(inc)
-
- except UnicodeDecodeError:
- # Capture exceptions caused by non-ASCII encoded files.
- if _encoding == 'ascii':
- # Reopen the file in UTF-8 mode. Python allows a file to be opened
- # more than once at a time. Exceptions for the recursively called
- # function will be handled inside it.
- # Output a warning.
- print("ERROR:" + path + ":Non-ASCII encoded file!")
- inc_list = file_get_include_list(path,'utf-8')
- else:
- # Already tried to decode in UTF-8 mode. Don't try again.
- print("ERROR:" + path + ":Failed to decode UTF-8!")
- fatal_error = True # Can't return while file is still open.
- utils.print_exception_info()
- except:
- print("ERROR:" + path + ":error while parsing!")
- utils.print_exception_info()
-
- f.close()
-
- if fatal_error:
- return None
-
- return inc_list
-
def inc_order_is_correct(inc_list, path, commit_hash=""):
- '''Returns true if the provided list is in order. If not, output error
- messages to stdout.'''
+ """Returns true if the provided list is in order. If not, output error
+ messages to stdout."""
# If there are less than 2 includes there's no need to check.
if len(inc_list) < 2:
return True
if commit_hash != "":
- commit_hash = commit_hash + ":" # For formatting
+ commit_hash = commit_hash + ":"
- sys_after_user = False
- sys_order_wrong = False
- user_order_wrong = False
+ # Get list of system includes from libc include directory.
+ libc_incs = [f for f in os.listdir("include/lib/libc") if f.endswith(".h")]
- # First, check if all system includes are before the user includes.
- previous_delimiter = '<' # Begin with system includes.
+ # First, check if all includes are in the appropriate group.
+ inc_group = "System"
+ incs = collections.defaultdict(list)
+ error_msgs = []
for inc in inc_list:
- delimiter = inc[0]
- if previous_delimiter == '<' and delimiter == '"':
- previous_delimiter = '"' # Started user includes.
- elif previous_delimiter == '"' and delimiter == '<':
- sys_after_user = True
+ if inc[1:-1] in libc_incs:
+ if inc_group != "System":
+ error_msgs.append(inc[1:-1] + " should be in system group, at the top")
+ elif (
+ "plat/" in inc
+ or "platform" in inc
+ or (inc.startswith('"') and "plat" in path)
+ ):
+ inc_group = "Platform"
+ elif inc_group in ("Project", "System"):
+ inc_group = "Project"
+ else:
+ error_msgs.append(
+ inc[1:-1] + " should be in project group, after system group"
+ )
+ incs[inc_group].append(inc[1:-1])
- # Then, check alphabetic order (system and user separately).
- usr_incs = []
- sys_incs = []
-
- for inc in inc_list:
- if inc.startswith('<'):
- sys_incs.append(inc)
- elif inc.startswith('"'):
- usr_incs.append(inc)
-
- if sorted(sys_incs) != sys_incs:
- sys_order_wrong = True
- if sorted(usr_incs) != usr_incs:
- user_order_wrong = True
+ # Then, check alphabetic order (system, project and user separately).
+ if not error_msgs:
+ for name, inc_list in incs.items():
+ if sorted(inc_list) != inc_list:
+ error_msgs.append("{} includes not in order.".format(name))
# Output error messages.
- if sys_after_user:
- print("ERROR:" + commit_hash + path +
- ":System include after user include.")
- if sys_order_wrong:
- print("ERROR:" + commit_hash + path +
- ":System includes not in order.")
- if user_order_wrong:
- print("ERROR:" + commit_hash + path +
- ":User includes not in order.")
-
- return not ( sys_after_user or sys_order_wrong or user_order_wrong )
+ if error_msgs:
+ print(yaml.dump({commit_hash + path: error_msgs}))
+ return False
+ else:
+ return True
def file_is_correct(path):
- '''Checks whether the order of includes in the file specified in the path
- is correct or not.'''
-
- inc_list = file_get_include_list(path)
-
- if inc_list == None: # Failed to decode - Flag as incorrect.
- return False
-
- return inc_order_is_correct(inc_list, path)
+ """Checks whether the order of includes in the file specified in the path
+ is correct or not."""
+ inc_list = file_include_list(path)
+ return inc_list is not None and inc_order_is_correct(inc_list, path)
def directory_tree_is_correct():
- '''Checks all tracked files in the current git repository, except the ones
+ """Checks all tracked files in the current git repository, except the ones
explicitly ignored by this script.
- Returns True if all files are correct.'''
-
- # Get list of files tracked by git
- (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ])
+ Returns True if all files are correct."""
+ (rc, stdout, stderr) = utils.shell_command(["git", "ls-files"])
if rc != 0:
return False
-
all_files_correct = True
-
- files = stdout.splitlines()
-
- for f in files:
- if not utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS):
- if not file_is_correct(f):
- # Make the script end with an error code, but continue
- # checking files even if one of them is incorrect.
- all_files_correct = False
-
+ for f in stdout.splitlines():
+ if not utils.file_is_ignored(
+ f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS
+ ):
+ all_files_correct &= file_is_correct(f)
return all_files_correct
+def group_lines(patchlines, starting_with):
+ """Generator of (name, lines) almost the same as itertools.groupby
+
+ This function's control flow is non-trivial. In particular, the clearing
+ of the lines variable, marked with [1], is intentional and must come
+ after the yield. That's because we must yield the (name, lines) tuple
+ after we have found the name of the next section but before we assign the
+ name and start collecting lines. Further, [2] is required to yeild the
+ last block as there will not be a block start delimeter at the end of
+ the stream.
+ """
+ lines = []
+ name = None
+ for line in patchlines:
+ if line.startswith(starting_with):
+ if name:
+ yield name, lines
+ name = line[len(starting_with) :]
+ lines = [] # [1]
+ else:
+ lines.append(line)
+ yield name, lines # [2]
+
+
+def group_files(commitlines):
+ """Generator of (commit hash, lines) almost the same as itertools.groupby"""
+ return group_lines(commitlines, "+++ b/")
+
+
+def group_commits(commitlines):
+ """Generator of (file name, lines) almost the same as itertools.groupby"""
+ return group_lines(commitlines, "commit ")
+
+
def patch_is_correct(base_commit, end_commit):
- '''Get the output of a git diff and analyse each modified file.'''
+ """Get the output of a git diff and analyse each modified file."""
# Get patches of the affected commits with one line of context.
- (rc, stdout, stderr) = utils.shell_command([ 'git', 'log', '--unified=1',
- '--pretty="commit %h"',
- base_commit + '..' + end_commit ])
+ gitlog = subprocess.run(
+ [
+ "git",
+ "log",
+ "--unified=1",
+ "--pretty=commit %h",
+ base_commit + ".." + end_commit,
+ ],
+ stdout=subprocess.PIPE,
+ )
- if rc != 0:
+ if gitlog.returncode != 0:
return False
- # Parse stdout to get all renamed, modified and added file paths.
- # Then, check order of new includes. The log output begins with each commit
- # comment and then a list of files and differences.
- lines = stdout.splitlines()
-
+ gitlines = gitlog.stdout.decode("utf-8").splitlines()
all_files_correct = True
-
- # All files without a valid extension are ignored. /dev/null is also used by
- # git patch to tell that a file has been deleted, and it doesn't have a
- # valid extension, so it will be used as a reset value.
- path = "/dev/null"
- commit_hash = "0"
- # There are only 2 states: commit msg or file. Start inside commit message
- # because the include list is not checked when changing from this state.
- inside_commit_message = True
- inc_list = []
-
- # Allow spaces in between, but not comments.
- # Check for lines with "+" or " " at the beginning (added or not modified)
- pattern = re.compile(r"^[+ ]\s*#\s*include\s\s*[\"<]")
-
- total_line_num = len(lines)
- # By iterating this way the loop can detect if it's the last iteration and
- # check the last file (the log doesn't have any indicator of the end)
- for i, line in enumerate(lines): # Save line number in i
-
- new_commit = False
- new_file = False
- log_last_line = i == total_line_num-1
-
- # 1. Check which kind of line this is. If this line means that the file
- # being analysed is finished, don't update the path or hash until after
- # checking the order of includes, they are used in error messages. Check
- # for any includes in case this is the last line of the log.
-
- # Line format: <"commit 0000000"> (quotes present in stdout)
- if line.startswith('"commit '): # New commit
- new_commit = True
- # Line format: <+++ b/path>
- elif line.startswith("+++ b/"): # New file.
- new_file = True
- # Any other line
- else: # Check for includes inside files, not in the commit message.
- if not inside_commit_message:
- if pattern.match(line):
- line_remove_comments(line)
- inc = line_get_include_path(line)
- if inc != None:
- inc_list.append(inc)
-
- # 2. Check order of includes if the file that was being analysed has
- # finished. Print hash and path of the analised file in the error
- # messages.
-
- if new_commit or new_file or log_last_line:
- if not inside_commit_message: # If a file is being analysed
- if not utils.file_is_ignored(path, VALID_FILE_EXTENSIONS,
- IGNORED_FILES, IGNORED_FOLDERS):
- if not inc_order_is_correct(inc_list, path, commit_hash):
- all_files_correct = False
- inc_list = [] # Reset the include list for the next file (if any)
-
- # 3. Update path or hash for the new file or commit. Update state.
-
- if new_commit: # New commit, save hash
- inside_commit_message = True # Enter commit message state
- commit_hash = line[ 8 : -1 ] # Discard last "
- elif new_file: # New file, save path.
- inside_commit_message = False # Save path, exit commit message state
- # A deleted file will appear as /dev/null so it will be ignored.
- path = line[ 6 : ]
-
+ for commit, comlines in group_commits(gitlines):
+ for path, lines in group_files(comlines):
+ all_files_correct &= inc_order_is_correct(
+ include_paths(lines, diff_mode=True), path, commit
+ )
return all_files_correct
-
def parse_cmd_line(argv, prog_name):
parser = argparse.ArgumentParser(
prog=prog_name,
@@ -309,23 +202,34 @@
directives are ordered alphabetically (as mandated by the Trusted
Firmware coding style). System header includes must come before user
header includes.
-""")
+""",
+ )
- parser.add_argument("--tree", "-t",
- help="Path to the source tree to check (default: %(default)s)",
- default=os.curdir)
- parser.add_argument("--patch", "-p",
- help="""
+ parser.add_argument(
+ "--tree",
+ "-t",
+ help="Path to the source tree to check (default: %(default)s)",
+ default=os.curdir,
+ )
+ parser.add_argument(
+ "--patch",
+ "-p",
+ help="""
Patch mode.
Instead of checking all files in the source tree, the script will consider
only files that are modified by the latest patch(es).""",
- action="store_true")
- parser.add_argument("--from-ref",
- help="Base commit in patch mode (default: %(default)s)",
- default="master")
- parser.add_argument("--to-ref",
- help="Final commit in patch mode (default: %(default)s)",
- default="HEAD")
+ action="store_true",
+ )
+ parser.add_argument(
+ "--from-ref",
+ help="Base commit in patch mode (default: %(default)s)",
+ default="master",
+ )
+ parser.add_argument(
+ "--to-ref",
+ help="Final commit in patch mode (default: %(default)s)",
+ default="HEAD",
+ )
args = parser.parse_args(argv)
return args
@@ -336,8 +240,13 @@
os.chdir(args.tree)
if args.patch:
- print("Checking files modified between patches " + args.from_ref
- + " and " + args.to_ref + "...")
+ print(
+ "Checking files modified between patches "
+ + args.from_ref
+ + " and "
+ + args.to_ref
+ + "..."
+ )
if not patch_is_correct(args.from_ref, args.to_ref):
sys.exit(1)
else: