blob: 98961f920c9767f207ef1ebc58e1b27c05798bba [file] [log] [blame]
Leonardo Sandoval314eed82020-08-05 13:32:04 -05001#!/usr/bin/env python3
2#
Xinyu Zhang235d5ae2021-02-07 10:42:38 +08003# Copyright (c) 2019-2021, Arm Limited. All rights reserved.
Leonardo Sandoval314eed82020-08-05 13:32:04 -05004#
5# SPDX-License-Identifier: BSD-3-Clause
6#
7
8"""
9Check if a given file includes the copyright boiler plate.
10This checker supports the following comment styles:
11 /*
12 *
13 //
14 #
15"""
16
17import argparse
18import datetime
19import collections
20import fnmatch
21import shlex
22import os
23import re
24import sys
25import utils
26from itertools import islice
27
28# File extensions to check
29VALID_FILE_EXTENSIONS = ('.c', '.conf', '.dts', '.dtsi', '.editorconfig',
30 '.h', '.i', '.ld', 'Makefile', '.mk', '.msvc',
31 '.py', '.S', '.scat', '.sh')
32
33# Paths inside the tree to ignore. Hidden folders and files are always ignored.
34# They mustn't end in '/'.
35IGNORED_FOLDERS = (
Xinyu Zhang235d5ae2021-02-07 10:42:38 +080036 'platform/ext',
37 'bl2/ext',
38 'docs',
39 'lib',
40 'tools'
Leonardo Sandoval314eed82020-08-05 13:32:04 -050041)
42
43# List of ignored files in folders that aren't ignored
Xinyu Zhang235d5ae2021-02-07 10:42:38 +080044IGNORED_FILES = ()
Leonardo Sandoval314eed82020-08-05 13:32:04 -050045
46# Supported comment styles (Python regex)
47COMMENT_PATTERN = '(\*|/\*|\#|//)'
48
49# Any combination of spaces and/or tabs
50SPACING = '[ \t]*'
51
52# Line must start with a comment and optional spacing
53LINE_START = '^' + SPACING + COMMENT_PATTERN + SPACING
54
55# Line end with optional spacing
56EOL = SPACING + '$'
57
58# Year or period as YYYY or YYYY-YYYY
59TIME_PERIOD = '[0-9]{4}(-[0-9]{4})?'
60
61# Any string with valid license ID, don't allow adding postfix
62LICENSE_ID = '.*(BSD-3-Clause|BSD-2-Clause-FreeBSD)([ ,.\);].*)?'
63
64# File must contain both lines to pass the check
65COPYRIGHT_LINE = LINE_START + 'Copyright' + '.*' + TIME_PERIOD + '.*' + EOL
66LICENSE_ID_LINE = LINE_START + 'SPDX-License-Identifier:' + LICENSE_ID + EOL
67
68# Compiled license patterns
69COPYRIGHT_PATTERN = re.compile(COPYRIGHT_LINE, re.MULTILINE)
70LICENSE_ID_PATTERN = re.compile(LICENSE_ID_LINE, re.MULTILINE)
71
72CURRENT_YEAR = str(datetime.datetime.now().year)
73
74COPYRIGHT_OK = 0
75COPYRIGHT_ERROR = 1
76
77def check_copyright(path, args, encoding='utf-8'):
78 '''Checks a file for a correct copyright header.'''
79
80 result = COPYRIGHT_OK
81
82 with open(path, encoding=encoding) as file_:
83 file_content = file_.read()
84
85 copyright_line = COPYRIGHT_PATTERN.search(file_content)
86 if not copyright_line:
87 print("ERROR: Missing copyright in " + file_.name)
88 result = COPYRIGHT_ERROR
89 elif CURRENT_YEAR not in copyright_line.group():
90 print("WARNING: Copyright is out of date in " + file_.name + ": '" +
91 copyright_line.group() + "'")
92
93 if not LICENSE_ID_PATTERN.search(file_content):
94 print("ERROR: License ID error in " + file_.name)
95 result = COPYRIGHT_ERROR
96
97 return result
98
99def main(args):
100 print("Checking the copyrights in the code...")
101
102 if args.verbose:
103 print ("Copyright regexp: " + COPYRIGHT_LINE)
104 print ("License regexp: " + LICENSE_ID_LINE)
105
106 if args.patch:
107 print("Checking files modified between patches " + args.from_ref
108 + " and " + args.to_ref + "...")
109
110 (rc, stdout, stderr) = utils.shell_command(['git', 'diff',
111 '--diff-filter=ACMRT', '--name-only', args.from_ref, args.to_ref ])
112 if rc:
113 return COPYRIGHT_ERROR
114
115 files = stdout.splitlines()
116
117 else:
118 print("Checking all files tracked by git...")
119
120 (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ])
121 if rc:
122 return COPYRIGHT_ERROR
123
124 files = stdout.splitlines()
125
126 count_ok = 0
127 count_warning = 0
128 count_error = 0
129
130 for f in files:
131
132 if utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS):
133 if args.verbose:
134 print("Ignoring file " + f)
135 continue
136
137 if args.verbose:
138 print("Checking file " + f)
139
140 rc = check_copyright(f, args)
141
142 if rc == COPYRIGHT_OK:
143 count_ok += 1
144 elif rc == COPYRIGHT_ERROR:
145 count_error += 1
146
147 print("\nSummary:")
148 print("\t{} files analyzed".format(count_ok + count_error))
149
150 if count_error == 0:
151 print("\tNo errors found")
152 return COPYRIGHT_OK
153 else:
154 print("\t{} errors found".format(count_error))
155 return COPYRIGHT_ERROR
156
157def parse_cmd_line(argv, prog_name):
158 parser = argparse.ArgumentParser(
159 prog=prog_name,
160 formatter_class=argparse.RawTextHelpFormatter,
161 description="Check copyright of all files of codebase",
162 epilog="""
163For each source file in the tree, checks that the copyright header
164has the correct format.
165""")
166
167 parser.add_argument("--tree", "-t",
168 help="Path to the source tree to check (default: %(default)s)",
169 default=os.curdir)
170
171 parser.add_argument("--verbose", "-v",
172 help="Increase verbosity to the source tree to check (default: %(default)s)",
173 action='store_true', default=False)
174
175 parser.add_argument("--patch", "-p",
176 help="""
177Patch mode.
178Instead of checking all files in the source tree, the script will consider
179only files that are modified by the latest patch(es).""",
180 action="store_true")
181
Leonardo Sandoval900de582020-09-07 18:34:57 -0500182 (rc, stdout, stderr) = utils.shell_command(['git', 'merge-base', 'HEAD', 'origin/master'])
Leonardo Sandoval314eed82020-08-05 13:32:04 -0500183 if rc:
184 print("Git merge-base command failed. Cannot determine base commit.")
185 sys.exit(rc)
186 merge_bases = stdout.splitlines()
187
188 # This should not happen, but it's better to be safe.
189 if len(merge_bases) > 1:
190 print("WARNING: Multiple merge bases found. Using the first one as base commit.")
191
192 parser.add_argument("--from-ref",
193 help="Base commit in patch mode (default: %(default)s)",
194 default=merge_bases[0])
195 parser.add_argument("--to-ref",
196 help="Final commit in patch mode (default: %(default)s)",
197 default="HEAD")
198
199 args = parser.parse_args(argv)
200 return args
201
202
203if __name__ == "__main__":
204 args = parse_cmd_line(sys.argv[1:], sys.argv[0])
205
206 os.chdir(args.tree)
207
208 rc = main(args)
209
210 sys.exit(rc)