Check copyright statements and SPDX license identifier
Enforce a specific copyright statement and a specific SPDX license
identifier where they are present.
Binary files, third-party modules and a few other exceptions are not
checked.
There is currently no check that copyright statements and license
identifiers are present.
Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py
index 31edf8b..554ed47 100755
--- a/tests/scripts/check_files.py
+++ b/tests/scripts/check_files.py
@@ -12,6 +12,7 @@
import argparse
import codecs
+import inspect
import logging
import os
import re
@@ -345,6 +346,84 @@
return False
+THIS_FILE_BASE_NAME = \
+ os.path.basename(inspect.getframeinfo(inspect.currentframe()).filename)
+LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = \
+ inspect.getframeinfo(inspect.currentframe()).lineno
+class LicenseIssueTracker(LineIssueTracker):
+ """Check copyright statements and license indications.
+
+ This class only checks that statements are correct if present. It does
+ not enforce the presence of statements in each file.
+ """
+
+ heading = "License issue:"
+
+ LICENSE_EXEMPTION_RE_LIST = [
+ # Third-party code, other than whitelisted third-party modules,
+ # may be under a different license.
+ r'3rdparty/(?!(p256-m)/.*)',
+ # Documentation explaining the license may have accidental
+ # false positives.
+ r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
+ # Files imported from TF-M, and not used except in test builds,
+ # may be under a different license.
+ r'configs/crypto_config_profile_medium\.h\Z',
+ r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
+ # Third-party file.
+ r'dco\.txt\Z',
+ ]
+ path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
+ LICENSE_EXEMPTION_RE_LIST))
+
+ COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
+ # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
+ COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
+
+ SPDX_HEADER_KEY = b'SPDX-License-Identifier'
+ LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
+ SPDX_RE = re.compile(br'.*?(' +
+ re.escape(SPDX_HEADER_KEY) +
+ br')(:\s*(.*?)\W*\Z|.*)', re.I)
+
+ def __init__(self):
+ super().__init__()
+ # Record what problem was caused. We can't easily report it due to
+ # the structure of the script. To be fixed after
+ # https://github.com/Mbed-TLS/mbedtls/pull/2506
+ self.problem = None
+
+ def issue_with_line(self, line, filepath, line_number):
+ # Use endswith() rather than the more correct os.path.basename()
+ # because experimentally, it makes a significant difference to
+ # the running time.
+ if filepath.endswith(THIS_FILE_BASE_NAME) and \
+ line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
+ # Avoid false positives from the code in this class.
+ # Also skip the rest of this file, which is highly unlikely to
+ # contain any problematic statements since we put those near the
+ # top of files.
+ return False
+
+ m = self.COPYRIGHT_RE.match(line)
+ if m and m.group(1) != self.COPYRIGHT_HOLDER:
+ self.problem = 'Invalid copyright line'
+ return True
+
+ m = self.SPDX_RE.match(line)
+ if m:
+ if m.group(1) != self.SPDX_HEADER_KEY:
+ self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
+ return True
+ if not m.group(3):
+ self.problem = 'Improperly formatted SPDX license identifier'
+ return True
+ if m.group(3) != self.LICENSE_IDENTIFIER:
+ self.problem = 'Wrong SPDX license identifier'
+ return True
+ return False
+
+
class IntegrityChecker:
"""Sanity-check files under the current directory."""
@@ -365,6 +444,7 @@
TrailingWhitespaceIssueTracker(),
TabIssueTracker(),
MergeArtifactIssueTracker(),
+ LicenseIssueTracker(),
]
def setup_logger(self, log_file, level=logging.INFO):