blob: 63db4605d2b338a76731ae32df26cc58adb93228 [file] [log] [blame]
Fathi Boudra422bf772019-12-02 11:10:16 +02001#!/usr/bin/env python3
2#
Zelalem219df412020-05-17 19:21:20 -05003# Copyright (c) 2019-2020, Arm Limited. All rights reserved.
Fathi Boudra422bf772019-12-02 11:10:16 +02004#
5# SPDX-License-Identifier: BSD-3-Clause
6#
7
8"""
9Check if a given file includes the copyright boiler plate.
10This checker supports the following comment styles:
Zelalem219df412020-05-17 19:21:20 -050011 /*
12 *
13 //
14 #
Fathi Boudra422bf772019-12-02 11:10:16 +020015"""
16
17import argparse
18import datetime
19import collections
20import fnmatch
21import shlex
22import os
23import re
24import sys
25import utils
26from itertools import islice
27
28# File extensions to check
Zelalem219df412020-05-17 19:21:20 -050029VALID_FILE_EXTENSIONS = ('.c', '.conf', '.dts', '.dtsi', '.editorconfig',
30 '.h', '.i', '.ld', 'Makefile', '.mk', '.msvc',
31 '.py', '.S', '.scat', '.sh')
Fathi Boudra422bf772019-12-02 11:10:16 +020032
33# Paths inside the tree to ignore. Hidden folders and files are always ignored.
34# They mustn't end in '/'.
35IGNORED_FOLDERS = (
36 'include/lib/libfdt',
37 'lib/compiler-rt',
38 'lib/libfdt',
39 'lib/zlib'
40)
41
42# List of ignored files in folders that aren't ignored
43IGNORED_FILES = (
44 'include/tools_share/uuid.h'
45)
46
47# Supported comment styles (Python regex)
Zelalem219df412020-05-17 19:21:20 -050048COMMENT_PATTERN = '(\*|/\*|\#|//)'
Fathi Boudra422bf772019-12-02 11:10:16 +020049
Zelalem219df412020-05-17 19:21:20 -050050# Any combination of spaces and/or tabs
51SPACING = '[ \t]*'
Fathi Boudra422bf772019-12-02 11:10:16 +020052
Zelalem219df412020-05-17 19:21:20 -050053# Line must start with a comment and optional spacing
54LINE_START = '^' + SPACING + COMMENT_PATTERN + SPACING
55
56# Line end with optional spacing
57EOL = SPACING + '$'
58
59# Year or period as YYYY or YYYY-YYYY
60TIME_PERIOD = '[0-9]{4}(-[0-9]{4})?'
61
62# Any string with valid license ID, don't allow adding postfix
63LICENSE_ID = '.*(BSD-3-Clause|BSD-2-Clause-FreeBSD)([ ,.\);].*)?'
64
65# File must contain both lines to pass the check
66COPYRIGHT_LINE = LINE_START + 'Copyright' + '.*' + TIME_PERIOD + '.*' + EOL
67LICENSE_ID_LINE = LINE_START + 'SPDX-License-Identifier:' + LICENSE_ID + EOL
68
69# Compiled license patterns
70COPYRIGHT_PATTERN = re.compile(COPYRIGHT_LINE, re.MULTILINE)
71LICENSE_ID_PATTERN = re.compile(LICENSE_ID_LINE, re.MULTILINE)
72
73CURRENT_YEAR = str(datetime.datetime.now().year)
Fathi Boudra422bf772019-12-02 11:10:16 +020074
75COPYRIGHT_OK = 0
76COPYRIGHT_ERROR = 1
Fathi Boudra422bf772019-12-02 11:10:16 +020077
Zelalem219df412020-05-17 19:21:20 -050078def check_copyright(path, args, encoding='utf-8'):
Fathi Boudra422bf772019-12-02 11:10:16 +020079 '''Checks a file for a correct copyright header.'''
80
Zelalem219df412020-05-17 19:21:20 -050081 result = COPYRIGHT_OK
82
83 with open(path, encoding=encoding) as file_:
Fathi Boudra422bf772019-12-02 11:10:16 +020084 file_content = file_.read()
85
Zelalem219df412020-05-17 19:21:20 -050086 copyright_line = COPYRIGHT_PATTERN.search(file_content)
87 if not copyright_line:
88 print("ERROR: Missing copyright in " + file_.name)
89 result = COPYRIGHT_ERROR
90 elif CURRENT_YEAR not in copyright_line.group():
91 print("WARNING: Copyright is out of date in " + file_.name + ": '" +
92 copyright_line.group() + "'")
Fathi Boudra422bf772019-12-02 11:10:16 +020093
Zelalem219df412020-05-17 19:21:20 -050094 if not LICENSE_ID_PATTERN.search(file_content):
95 print("ERROR: License ID error in " + file_.name)
96 result = COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +020097
Zelalem219df412020-05-17 19:21:20 -050098 return result
Fathi Boudra422bf772019-12-02 11:10:16 +020099
100def main(args):
101 print("Checking the copyrights in the code...")
102
Zelalem219df412020-05-17 19:21:20 -0500103 if args.verbose:
104 print ("Copyright regexp: " + COPYRIGHT_LINE)
105 print ("License regexp: " + LICENSE_ID_LINE)
Fathi Boudra422bf772019-12-02 11:10:16 +0200106
107 if args.patch:
108 print("Checking files modified between patches " + args.from_ref
109 + " and " + args.to_ref + "...")
110
111 (rc, stdout, stderr) = utils.shell_command(['git', 'diff',
112 '--diff-filter=ACMRT', '--name-only', args.from_ref, args.to_ref ])
113 if rc:
Zelalem219df412020-05-17 19:21:20 -0500114 return COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +0200115
116 files = stdout.splitlines()
117
118 else:
119 print("Checking all files tracked by git...")
120
121 (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ])
122 if rc:
Zelalem219df412020-05-17 19:21:20 -0500123 return COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +0200124
125 files = stdout.splitlines()
126
127 count_ok = 0
128 count_warning = 0
129 count_error = 0
130
131 for f in files:
132
133 if utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS):
134 if args.verbose:
135 print("Ignoring file " + f)
136 continue
137
138 if args.verbose:
139 print("Checking file " + f)
140
Zelalem219df412020-05-17 19:21:20 -0500141 rc = check_copyright(f, args)
Fathi Boudra422bf772019-12-02 11:10:16 +0200142
143 if rc == COPYRIGHT_OK:
144 count_ok += 1
Fathi Boudra422bf772019-12-02 11:10:16 +0200145 elif rc == COPYRIGHT_ERROR:
146 count_error += 1
Fathi Boudra422bf772019-12-02 11:10:16 +0200147
148 print("\nSummary:")
Zelalem219df412020-05-17 19:21:20 -0500149 print("\t{} files analyzed".format(count_ok + count_error))
Fathi Boudra422bf772019-12-02 11:10:16 +0200150
Zelalem219df412020-05-17 19:21:20 -0500151 if count_error == 0:
Fathi Boudra422bf772019-12-02 11:10:16 +0200152 print("\tNo errors found")
Zelalem219df412020-05-17 19:21:20 -0500153 return COPYRIGHT_OK
154 else:
Fathi Boudra422bf772019-12-02 11:10:16 +0200155 print("\t{} errors found".format(count_error))
Zelalem219df412020-05-17 19:21:20 -0500156 return COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +0200157
158def parse_cmd_line(argv, prog_name):
159 parser = argparse.ArgumentParser(
160 prog=prog_name,
161 formatter_class=argparse.RawTextHelpFormatter,
162 description="Check copyright of all files of codebase",
163 epilog="""
164For each source file in the tree, checks that the copyright header
165has the correct format.
166""")
167
168 parser.add_argument("--tree", "-t",
169 help="Path to the source tree to check (default: %(default)s)",
170 default=os.curdir)
171
172 parser.add_argument("--verbose", "-v",
173 help="Increase verbosity to the source tree to check (default: %(default)s)",
174 action='store_true', default=False)
175
176 parser.add_argument("--patch", "-p",
177 help="""
178Patch mode.
179Instead of checking all files in the source tree, the script will consider
180only files that are modified by the latest patch(es).""",
181 action="store_true")
Zelalem219df412020-05-17 19:21:20 -0500182
Leonardo Sandoval9b3163e2020-10-13 12:55:24 -0500183 (rc, stdout, stderr) = utils.shell_command(['git', 'merge-base', 'HEAD', 'refs/remotes/origin/master'])
Zelalem219df412020-05-17 19:21:20 -0500184 if rc:
185 print("Git merge-base command failed. Cannot determine base commit.")
186 sys.exit(rc)
187 merge_bases = stdout.splitlines()
188
189 # This should not happen, but it's better to be safe.
190 if len(merge_bases) > 1:
191 print("WARNING: Multiple merge bases found. Using the first one as base commit.")
192
Fathi Boudra422bf772019-12-02 11:10:16 +0200193 parser.add_argument("--from-ref",
194 help="Base commit in patch mode (default: %(default)s)",
Zelalem219df412020-05-17 19:21:20 -0500195 default=merge_bases[0])
Fathi Boudra422bf772019-12-02 11:10:16 +0200196 parser.add_argument("--to-ref",
197 help="Final commit in patch mode (default: %(default)s)",
198 default="HEAD")
199
200 args = parser.parse_args(argv)
201 return args
202
203
204if __name__ == "__main__":
205 args = parse_cmd_line(sys.argv[1:], sys.argv[0])
206
207 os.chdir(args.tree)
208
209 rc = main(args)
210
211 sys.exit(rc)