blob: 227a11e6fa6d964b2c3ef5d7201c59e6ad88bf50 [file] [log] [blame]
Fathi Boudra422bf772019-12-02 11:10:16 +02001#!/usr/bin/env python3
2#
Zelalem219df412020-05-17 19:21:20 -05003# Copyright (c) 2019-2020, Arm Limited. All rights reserved.
Fathi Boudra422bf772019-12-02 11:10:16 +02004#
5# SPDX-License-Identifier: BSD-3-Clause
6#
7
8"""
9Check if a given file includes the copyright boiler plate.
10This checker supports the following comment styles:
Zelalem219df412020-05-17 19:21:20 -050011 /*
12 *
13 //
14 #
Fathi Boudra422bf772019-12-02 11:10:16 +020015"""
16
17import argparse
18import datetime
19import collections
20import fnmatch
21import shlex
22import os
23import re
24import sys
25import utils
26from itertools import islice
27
28# File extensions to check
Zelalem219df412020-05-17 19:21:20 -050029VALID_FILE_EXTENSIONS = ('.c', '.conf', '.dts', '.dtsi', '.editorconfig',
30 '.h', '.i', '.ld', 'Makefile', '.mk', '.msvc',
31 '.py', '.S', '.scat', '.sh')
Fathi Boudra422bf772019-12-02 11:10:16 +020032
33# Paths inside the tree to ignore. Hidden folders and files are always ignored.
34# They mustn't end in '/'.
35IGNORED_FOLDERS = (
Manish Pandeyd07fe0b2024-12-06 11:33:33 +000036 'include/lib/hob',
Fathi Boudra422bf772019-12-02 11:10:16 +020037 'include/lib/libfdt',
38 'lib/compiler-rt',
Manish Pandeyd07fe0b2024-12-06 11:33:33 +000039 'lib/hob',
Fathi Boudra422bf772019-12-02 11:10:16 +020040 'lib/libfdt',
41 'lib/zlib'
42)
43
44# List of ignored files in folders that aren't ignored
45IGNORED_FILES = (
46 'include/tools_share/uuid.h'
47)
48
49# Supported comment styles (Python regex)
Zelalem219df412020-05-17 19:21:20 -050050COMMENT_PATTERN = '(\*|/\*|\#|//)'
Fathi Boudra422bf772019-12-02 11:10:16 +020051
Zelalem219df412020-05-17 19:21:20 -050052# Any combination of spaces and/or tabs
53SPACING = '[ \t]*'
Fathi Boudra422bf772019-12-02 11:10:16 +020054
Zelalem219df412020-05-17 19:21:20 -050055# Line must start with a comment and optional spacing
56LINE_START = '^' + SPACING + COMMENT_PATTERN + SPACING
57
58# Line end with optional spacing
59EOL = SPACING + '$'
60
61# Year or period as YYYY or YYYY-YYYY
62TIME_PERIOD = '[0-9]{4}(-[0-9]{4})?'
63
64# Any string with valid license ID, don't allow adding postfix
Chris Kayc4318602025-06-18 12:48:31 +010065LICENSE_ID = '.*(BSD-3-Clause|BSD-2-Clause-FreeBSD|MIT|Apache-2.0)([ ,.\);].*)?'
Zelalem219df412020-05-17 19:21:20 -050066
67# File must contain both lines to pass the check
68COPYRIGHT_LINE = LINE_START + 'Copyright' + '.*' + TIME_PERIOD + '.*' + EOL
69LICENSE_ID_LINE = LINE_START + 'SPDX-License-Identifier:' + LICENSE_ID + EOL
70
71# Compiled license patterns
72COPYRIGHT_PATTERN = re.compile(COPYRIGHT_LINE, re.MULTILINE)
73LICENSE_ID_PATTERN = re.compile(LICENSE_ID_LINE, re.MULTILINE)
74
75CURRENT_YEAR = str(datetime.datetime.now().year)
Fathi Boudra422bf772019-12-02 11:10:16 +020076
77COPYRIGHT_OK = 0
78COPYRIGHT_ERROR = 1
Fathi Boudra422bf772019-12-02 11:10:16 +020079
Zelalem219df412020-05-17 19:21:20 -050080def check_copyright(path, args, encoding='utf-8'):
Fathi Boudra422bf772019-12-02 11:10:16 +020081 '''Checks a file for a correct copyright header.'''
82
Zelalem219df412020-05-17 19:21:20 -050083 result = COPYRIGHT_OK
84
85 with open(path, encoding=encoding) as file_:
Fathi Boudra422bf772019-12-02 11:10:16 +020086 file_content = file_.read()
87
Zelalem219df412020-05-17 19:21:20 -050088 copyright_line = COPYRIGHT_PATTERN.search(file_content)
89 if not copyright_line:
90 print("ERROR: Missing copyright in " + file_.name)
91 result = COPYRIGHT_ERROR
92 elif CURRENT_YEAR not in copyright_line.group():
93 print("WARNING: Copyright is out of date in " + file_.name + ": '" +
94 copyright_line.group() + "'")
Fathi Boudra422bf772019-12-02 11:10:16 +020095
Zelalem219df412020-05-17 19:21:20 -050096 if not LICENSE_ID_PATTERN.search(file_content):
97 print("ERROR: License ID error in " + file_.name)
98 result = COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +020099
Zelalem219df412020-05-17 19:21:20 -0500100 return result
Fathi Boudra422bf772019-12-02 11:10:16 +0200101
102def main(args):
103 print("Checking the copyrights in the code...")
104
Zelalem219df412020-05-17 19:21:20 -0500105 if args.verbose:
106 print ("Copyright regexp: " + COPYRIGHT_LINE)
107 print ("License regexp: " + LICENSE_ID_LINE)
Fathi Boudra422bf772019-12-02 11:10:16 +0200108
109 if args.patch:
Harrison Mutai7a93cd22022-09-29 12:31:31 +0100110 print("Checking files added between patches " + args.from_ref
Fathi Boudra422bf772019-12-02 11:10:16 +0200111 + " and " + args.to_ref + "...")
112
113 (rc, stdout, stderr) = utils.shell_command(['git', 'diff',
Harrison Mutai7a93cd22022-09-29 12:31:31 +0100114 '--diff-filter=ACRT', '--name-only', args.from_ref, args.to_ref ])
Fathi Boudra422bf772019-12-02 11:10:16 +0200115 if rc:
Zelalem219df412020-05-17 19:21:20 -0500116 return COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +0200117
118 files = stdout.splitlines()
119
120 else:
121 print("Checking all files tracked by git...")
122
123 (rc, stdout, stderr) = utils.shell_command([ 'git', 'ls-files' ])
124 if rc:
Zelalem219df412020-05-17 19:21:20 -0500125 return COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +0200126
127 files = stdout.splitlines()
128
129 count_ok = 0
130 count_warning = 0
131 count_error = 0
132
133 for f in files:
134
135 if utils.file_is_ignored(f, VALID_FILE_EXTENSIONS, IGNORED_FILES, IGNORED_FOLDERS):
136 if args.verbose:
137 print("Ignoring file " + f)
138 continue
139
140 if args.verbose:
141 print("Checking file " + f)
142
Zelalem219df412020-05-17 19:21:20 -0500143 rc = check_copyright(f, args)
Fathi Boudra422bf772019-12-02 11:10:16 +0200144
145 if rc == COPYRIGHT_OK:
146 count_ok += 1
Fathi Boudra422bf772019-12-02 11:10:16 +0200147 elif rc == COPYRIGHT_ERROR:
148 count_error += 1
Fathi Boudra422bf772019-12-02 11:10:16 +0200149
150 print("\nSummary:")
Zelalem219df412020-05-17 19:21:20 -0500151 print("\t{} files analyzed".format(count_ok + count_error))
Fathi Boudra422bf772019-12-02 11:10:16 +0200152
Zelalem219df412020-05-17 19:21:20 -0500153 if count_error == 0:
Fathi Boudra422bf772019-12-02 11:10:16 +0200154 print("\tNo errors found")
Zelalem219df412020-05-17 19:21:20 -0500155 return COPYRIGHT_OK
156 else:
Fathi Boudra422bf772019-12-02 11:10:16 +0200157 print("\t{} errors found".format(count_error))
Zelalem219df412020-05-17 19:21:20 -0500158 return COPYRIGHT_ERROR
Fathi Boudra422bf772019-12-02 11:10:16 +0200159
160def parse_cmd_line(argv, prog_name):
161 parser = argparse.ArgumentParser(
162 prog=prog_name,
163 formatter_class=argparse.RawTextHelpFormatter,
164 description="Check copyright of all files of codebase",
165 epilog="""
166For each source file in the tree, checks that the copyright header
167has the correct format.
168""")
169
170 parser.add_argument("--tree", "-t",
171 help="Path to the source tree to check (default: %(default)s)",
172 default=os.curdir)
173
174 parser.add_argument("--verbose", "-v",
175 help="Increase verbosity to the source tree to check (default: %(default)s)",
176 action='store_true', default=False)
177
178 parser.add_argument("--patch", "-p",
179 help="""
180Patch mode.
181Instead of checking all files in the source tree, the script will consider
182only files that are modified by the latest patch(es).""",
183 action="store_true")
Zelalem219df412020-05-17 19:21:20 -0500184
Leonardo Sandoval9b3163e2020-10-13 12:55:24 -0500185 (rc, stdout, stderr) = utils.shell_command(['git', 'merge-base', 'HEAD', 'refs/remotes/origin/master'])
Zelalem219df412020-05-17 19:21:20 -0500186 if rc:
187 print("Git merge-base command failed. Cannot determine base commit.")
188 sys.exit(rc)
189 merge_bases = stdout.splitlines()
190
191 # This should not happen, but it's better to be safe.
192 if len(merge_bases) > 1:
193 print("WARNING: Multiple merge bases found. Using the first one as base commit.")
194
Fathi Boudra422bf772019-12-02 11:10:16 +0200195 parser.add_argument("--from-ref",
196 help="Base commit in patch mode (default: %(default)s)",
Zelalem219df412020-05-17 19:21:20 -0500197 default=merge_bases[0])
Fathi Boudra422bf772019-12-02 11:10:16 +0200198 parser.add_argument("--to-ref",
199 help="Final commit in patch mode (default: %(default)s)",
200 default="HEAD")
201
202 args = parser.parse_args(argv)
203 return args
204
205
206if __name__ == "__main__":
207 args = parse_cmd_line(sys.argv[1:], sys.argv[0])
208
209 os.chdir(args.tree)
210
211 rc = main(args)
212
213 sys.exit(rc)