blob: d3fbe859e3b8ff47c6adfe254959339a67aecbc0 [file] [log] [blame]
Darryl Green10d9ce32018-02-28 10:02:55 +00001#!/usr/bin/env python3
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02002
Bence Szépkúti1e148272020-08-07 13:07:28 +02003# Copyright The Mbed TLS Contributors
Dave Rodgman16799db2023-11-02 19:47:20 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02005
Darryl Green10d9ce32018-02-28 10:02:55 +00006"""
Darryl Green10d9ce32018-02-28 10:02:55 +00007This script checks the current state of the source code for minor issues,
8including incorrect file permissions, presence of tabs, non-Unix line endings,
Gilles Peskine55b49ee2019-07-04 19:31:33 +02009trailing whitespace, and presence of UTF-8 BOM.
Darryl Green10d9ce32018-02-28 10:02:55 +000010Note: requires python 3, must be run from Mbed TLS root.
11"""
12
Darryl Green10d9ce32018-02-28 10:02:55 +000013import argparse
Darryl Green10d9ce32018-02-28 10:02:55 +000014import codecs
Gilles Peskinef2fb9f62023-11-03 14:13:55 +010015import inspect
Gilles Peskine990030b2023-11-03 13:55:00 +010016import logging
17import os
Gilles Peskine0598db82020-05-10 16:57:16 +020018import re
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +020019import subprocess
Darryl Green10d9ce32018-02-28 10:02:55 +000020import sys
Gilles Peskineac9e7c02020-08-11 15:11:50 +020021try:
22 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
23except ImportError:
24 pass
Darryl Green10d9ce32018-02-28 10:02:55 +000025
Gilles Peskined9071e72022-09-18 21:17:09 +020026import scripts_path # pylint: disable=unused-import
David Horstmanncd84bb22024-05-03 14:36:12 +010027from mbedtls_framework import build_tree
Gilles Peskined9071e72022-09-18 21:17:09 +020028
Darryl Green10d9ce32018-02-28 10:02:55 +000029
Gilles Peskine184c0962020-03-24 18:25:17 +010030class FileIssueTracker:
Gilles Peskine6ee576e2019-02-25 20:59:05 +010031 """Base class for file-wide issue tracking.
32
33 To implement a checker that processes a file as a whole, inherit from
Gilles Peskine1e9698a2019-02-25 21:10:04 +010034 this class and implement `check_file_for_issue` and define ``heading``.
35
Gilles Peskine05a51a82020-05-10 16:52:44 +020036 ``suffix_exemptions``: files whose name ends with a string in this set
Gilles Peskine1e9698a2019-02-25 21:10:04 +010037 will not be checked.
38
Gilles Peskine0598db82020-05-10 16:57:16 +020039 ``path_exemptions``: files whose path (relative to the root of the source
40 tree) matches this regular expression will not be checked. This can be
41 ``None`` to match no path. Paths are normalized and converted to ``/``
42 separators before matching.
43
Gilles Peskine1e9698a2019-02-25 21:10:04 +010044 ``heading``: human-readable description of the issue
Gilles Peskine6ee576e2019-02-25 20:59:05 +010045 """
Darryl Green10d9ce32018-02-28 10:02:55 +000046
Gilles Peskineac9e7c02020-08-11 15:11:50 +020047 suffix_exemptions = frozenset() #type: FrozenSet[str]
48 path_exemptions = None #type: Optional[Pattern[str]]
Gilles Peskine1e9698a2019-02-25 21:10:04 +010049 # heading must be defined in derived classes.
50 # pylint: disable=no-member
51
Darryl Green10d9ce32018-02-28 10:02:55 +000052 def __init__(self):
Darryl Green10d9ce32018-02-28 10:02:55 +000053 self.files_with_issues = {}
54
Gilles Peskine0598db82020-05-10 16:57:16 +020055 @staticmethod
56 def normalize_path(filepath):
Gilles Peskineeca95db2020-05-28 18:19:20 +020057 """Normalize ``filepath`` with / as the directory separator."""
Gilles Peskine0598db82020-05-10 16:57:16 +020058 filepath = os.path.normpath(filepath)
Gilles Peskineeca95db2020-05-28 18:19:20 +020059 # On Windows, we may have backslashes to separate directories.
60 # We need slashes to match exemption lists.
Gilles Peskine0598db82020-05-10 16:57:16 +020061 seps = os.path.sep
62 if os.path.altsep is not None:
63 seps += os.path.altsep
64 return '/'.join(filepath.split(seps))
65
Darryl Green10d9ce32018-02-28 10:02:55 +000066 def should_check_file(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010067 """Whether the given file name should be checked.
68
Gilles Peskine05a51a82020-05-10 16:52:44 +020069 Files whose name ends with a string listed in ``self.suffix_exemptions``
70 or whose path matches ``self.path_exemptions`` will not be checked.
Gilles Peskineaaee4442020-03-24 16:49:21 +010071 """
Gilles Peskine05a51a82020-05-10 16:52:44 +020072 for files_exemption in self.suffix_exemptions:
Darryl Green10d9ce32018-02-28 10:02:55 +000073 if filepath.endswith(files_exemption):
74 return False
Gilles Peskine0598db82020-05-10 16:57:16 +020075 if self.path_exemptions and \
76 re.match(self.path_exemptions, self.normalize_path(filepath)):
77 return False
Darryl Green10d9ce32018-02-28 10:02:55 +000078 return True
79
Darryl Green10d9ce32018-02-28 10:02:55 +000080 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010081 """Check the specified file for the issue that this class is for.
82
83 Subclasses must implement this method.
84 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +010085 raise NotImplementedError
Darryl Green10d9ce32018-02-28 10:02:55 +000086
Gilles Peskine04398052018-11-23 21:11:30 +010087 def record_issue(self, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +010088 """Record that an issue was found at the specified location."""
Gilles Peskine04398052018-11-23 21:11:30 +010089 if filepath not in self.files_with_issues.keys():
90 self.files_with_issues[filepath] = []
91 self.files_with_issues[filepath].append(line_number)
92
Darryl Green10d9ce32018-02-28 10:02:55 +000093 def output_file_issues(self, logger):
Gilles Peskineaaee4442020-03-24 16:49:21 +010094 """Log all the locations where the issue was found."""
Darryl Green10d9ce32018-02-28 10:02:55 +000095 if self.files_with_issues.values():
96 logger.info(self.heading)
97 for filename, lines in sorted(self.files_with_issues.items()):
98 if lines:
99 logger.info("{}: {}".format(
100 filename, ", ".join(str(x) for x in lines)
101 ))
102 else:
103 logger.info(filename)
104 logger.info("")
105
Gilles Peskined4a853d2020-05-10 16:57:59 +0200106BINARY_FILE_PATH_RE_LIST = [
107 r'docs/.*\.pdf\Z',
Ryan Everettabd89772023-12-15 12:28:38 +0000108 r'docs/.*\.png\Z',
Ronald Cron9fb40d72024-12-09 10:04:55 +0100109 r'tf-psa-crypto/docs/.*\.pdf\Z',
110 r'tf-psa-crypto/docs/.*\.png\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200111 r'programs/fuzz/corpuses/[^.]+\Z',
David Horstmanndcf18dd2024-06-11 17:44:00 +0100112 r'framework/data_files/[^.]+\Z',
113 r'framework/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
114 r'framework/data_files/.*\.req\.[^/]+\Z',
115 r'framework/data_files/.*malformed[^/]+\Z',
116 r'framework/data_files/format_pkcs12\.fmt\Z',
117 r'framework/data_files/.*\.bin\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200118]
119BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
120
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100121class LineIssueTracker(FileIssueTracker):
122 """Base class for line-by-line issue tracking.
Darryl Green10d9ce32018-02-28 10:02:55 +0000123
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100124 To implement a checker that processes files line by line, inherit from
125 this class and implement `line_with_issue`.
126 """
127
Gilles Peskined4a853d2020-05-10 16:57:59 +0200128 # Exclude binary files.
129 path_exemptions = BINARY_FILE_PATH_RE
130
Gilles Peskineb3897432023-01-05 20:28:30 +0100131 def issue_with_line(self, line, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100132 """Check the specified line for the issue that this class is for.
133
134 Subclasses must implement this method.
135 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100136 raise NotImplementedError
137
138 def check_file_line(self, filepath, line, line_number):
Gilles Peskineb3897432023-01-05 20:28:30 +0100139 if self.issue_with_line(line, filepath, line_number):
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100140 self.record_issue(filepath, line_number)
141
142 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100143 """Check the lines of the specified file.
144
145 Subclasses must implement the ``issue_with_line`` method.
146 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100147 with open(filepath, "rb") as f:
148 for i, line in enumerate(iter(f.readline, b"")):
149 self.check_file_line(filepath, line, i + 1)
150
Gilles Peskine2c618732020-03-24 22:26:01 +0100151
152def is_windows_file(filepath):
153 _root, ext = os.path.splitext(filepath)
Gilles Peskined2df86f2020-05-10 17:36:51 +0200154 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
Gilles Peskine2c618732020-03-24 22:26:01 +0100155
156
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200157class ShebangIssueTracker(FileIssueTracker):
158 """Track files with a bad, missing or extraneous shebang line.
159
160 Executable scripts must start with a valid shebang (#!) line.
161 """
162
163 heading = "Invalid shebang line:"
164
165 # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
166 # Allow at most one argument (this is a Linux limitation).
167 # For sh and bash, the argument if present must be options.
Shaun Case8b0ecbc2021-12-20 21:14:10 -0800168 # For env, the argument must be the base name of the interpreter.
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200169 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
170 rb'|/usr/bin/env ([^\n /]+))$')
171 _extensions = {
172 b'bash': 'sh',
173 b'perl': 'pl',
174 b'python3': 'py',
175 b'sh': 'sh',
176 }
177
Elena Uziunaite9f548742024-11-19 16:26:10 +0000178 path_exemptions = re.compile(r'framework/scripts/quiet/.*')
Dave Rodgman5c745fa2024-01-17 09:59:10 +0000179
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200180 def is_valid_shebang(self, first_line, filepath):
181 m = re.match(self._shebang_re, first_line)
182 if not m:
183 return False
184 interpreter = m.group(1) or m.group(2)
185 if interpreter not in self._extensions:
186 return False
187 if not filepath.endswith('.' + self._extensions[interpreter]):
188 return False
189 return True
190
191 def check_file_for_issue(self, filepath):
192 is_executable = os.access(filepath, os.X_OK)
193 with open(filepath, "rb") as f:
194 first_line = f.readline()
195 if first_line.startswith(b'#!'):
196 if not is_executable:
197 # Shebang on a non-executable file
198 self.files_with_issues[filepath] = None
199 elif not self.is_valid_shebang(first_line, filepath):
200 self.files_with_issues[filepath] = [1]
201 elif is_executable:
202 # Executable without a shebang
203 self.files_with_issues[filepath] = None
204
205
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100206class EndOfFileNewlineIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100207 """Track files that end with an incomplete line
208 (no newline character at the end of the last line)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000209
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100210 heading = "Missing newline at end of file:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000211
Gilles Peskined4a853d2020-05-10 16:57:59 +0200212 path_exemptions = BINARY_FILE_PATH_RE
213
Darryl Green10d9ce32018-02-28 10:02:55 +0000214 def check_file_for_issue(self, filepath):
215 with open(filepath, "rb") as f:
Gilles Peskine12b180a2020-05-10 17:36:42 +0200216 try:
217 f.seek(-1, 2)
218 except OSError:
219 # This script only works on regular files. If we can't seek
220 # 1 before the end, it means that this position is before
221 # the beginning of the file, i.e. that the file is empty.
222 return
223 if f.read(1) != b"\n":
Darryl Green10d9ce32018-02-28 10:02:55 +0000224 self.files_with_issues[filepath] = None
225
226
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100227class Utf8BomIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100228 """Track files that start with a UTF-8 BOM.
229 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000230
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100231 heading = "UTF-8 BOM present:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000232
Gilles Peskine05a51a82020-05-10 16:52:44 +0200233 suffix_exemptions = frozenset([".vcxproj", ".sln"])
Gilles Peskined4a853d2020-05-10 16:57:59 +0200234 path_exemptions = BINARY_FILE_PATH_RE
Gilles Peskine2c618732020-03-24 22:26:01 +0100235
Darryl Green10d9ce32018-02-28 10:02:55 +0000236 def check_file_for_issue(self, filepath):
237 with open(filepath, "rb") as f:
238 if f.read().startswith(codecs.BOM_UTF8):
239 self.files_with_issues[filepath] = None
240
241
Gilles Peskined11bb472023-01-05 20:28:57 +0100242class UnicodeIssueTracker(LineIssueTracker):
243 """Track lines with invalid characters or invalid text encoding."""
244
245 heading = "Invalid UTF-8 or forbidden character:"
246
Aditya Deshpande15b6dd02023-01-30 13:46:58 +0000247 # Only allow valid UTF-8, and only other explicitly allowed characters.
Gilles Peskined11bb472023-01-05 20:28:57 +0100248 # We deliberately exclude all characters that aren't a simple non-blank,
249 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
250 # line breaks, "basic" no-break space and soft hyphen). In particular,
251 # non-ASCII control characters, combinig characters, and Unicode state
252 # changes (e.g. right-to-left text) are forbidden.
253 # Note that we do allow some characters with a risk of visual confusion,
254 # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
255 # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
256 # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
257 GOOD_CHARACTERS = ''.join([
258 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
259 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
260 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
261 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
262 '\u2190-\u21FF', # Arrows
263 '\u2200-\u22FF', # Mathematical Symbols
Aditya Deshpandeebb22692023-02-01 13:30:26 +0000264 '\u2500-\u257F' # Box Drawings characters used in markdown trees
Gilles Peskined11bb472023-01-05 20:28:57 +0100265 ])
266 # Allow any of the characters and ranges above, and anything classified
267 # as a word constituent.
268 GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
269
270 def issue_with_line(self, line, _filepath, line_number):
271 try:
272 text = line.decode('utf-8')
273 except UnicodeDecodeError:
274 return True
275 if line_number == 1 and text.startswith('\uFEFF'):
276 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
277 # Which files are allowed to have a BOM is handled in
278 # Utf8BomIssueTracker.
279 text = text[1:]
280 return not self.GOOD_CHARACTERS_RE.match(text)
281
Gilles Peskine2c618732020-03-24 22:26:01 +0100282class UnixLineEndingIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100283 """Track files with non-Unix line endings (i.e. files with CR)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000284
Gilles Peskine2c618732020-03-24 22:26:01 +0100285 heading = "Non-Unix line endings:"
286
287 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200288 if not super().should_check_file(filepath):
289 return False
Gilles Peskine2c618732020-03-24 22:26:01 +0100290 return not is_windows_file(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000291
Gilles Peskineb3897432023-01-05 20:28:30 +0100292 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000293 return b"\r" in line
294
295
Gilles Peskine545e13f2020-03-24 22:29:11 +0100296class WindowsLineEndingIssueTracker(LineIssueTracker):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200297 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
Gilles Peskine545e13f2020-03-24 22:29:11 +0100298
299 heading = "Non-Windows line endings:"
300
301 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200302 if not super().should_check_file(filepath):
303 return False
Gilles Peskine545e13f2020-03-24 22:29:11 +0100304 return is_windows_file(filepath)
305
Gilles Peskineb3897432023-01-05 20:28:30 +0100306 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200307 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
Gilles Peskine545e13f2020-03-24 22:29:11 +0100308
309
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100310class TrailingWhitespaceIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100311 """Track lines with trailing whitespace."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000312
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100313 heading = "Trailing whitespace:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200314 suffix_exemptions = frozenset([".dsp", ".md"])
Darryl Green10d9ce32018-02-28 10:02:55 +0000315
Gilles Peskineb3897432023-01-05 20:28:30 +0100316 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000317 return line.rstrip(b"\r\n") != line.rstrip()
318
319
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100320class TabIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100321 """Track lines with tabs."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000322
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100323 heading = "Tabs present:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200324 suffix_exemptions = frozenset([
Gilles Peskine76022982023-12-22 15:28:07 +0100325 ".make",
Gilles Peskine344da1c2020-05-10 17:37:02 +0200326 ".pem", # some openssl dumps have tabs
Gilles Peskine2c618732020-03-24 22:26:01 +0100327 ".sln",
Gilles Peskine2aa63ea2024-03-04 11:08:19 +0100328 "/.gitmodules",
Gilles Peskine6e8d5a02020-03-24 22:01:28 +0100329 "/Makefile",
330 "/Makefile.inc",
331 "/generate_visualc_files.pl",
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100332 ])
Darryl Green10d9ce32018-02-28 10:02:55 +0000333
Gilles Peskineb3897432023-01-05 20:28:30 +0100334 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000335 return b"\t" in line
336
337
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100338class MergeArtifactIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100339 """Track lines with merge artifacts.
340 These are leftovers from a ``git merge`` that wasn't fully edited."""
Gilles Peskinec117d592018-11-23 21:11:52 +0100341
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100342 heading = "Merge artifact:"
Gilles Peskinec117d592018-11-23 21:11:52 +0100343
Gilles Peskineb3897432023-01-05 20:28:30 +0100344 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskinec117d592018-11-23 21:11:52 +0100345 # Detect leftover git conflict markers.
346 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
347 return True
348 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
349 return True
350 if line.rstrip(b'\r\n') == b'=======' and \
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100351 not _filepath.endswith('.md'):
Gilles Peskinec117d592018-11-23 21:11:52 +0100352 return True
353 return False
354
Darryl Green10d9ce32018-02-28 10:02:55 +0000355
Gilles Peskinece782002023-11-03 14:49:12 +0100356def this_location():
357 frame = inspect.currentframe()
358 assert frame is not None
359 info = inspect.getframeinfo(frame)
360 return os.path.basename(info.filename), info.lineno
361THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
362
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100363class LicenseIssueTracker(LineIssueTracker):
364 """Check copyright statements and license indications.
365
366 This class only checks that statements are correct if present. It does
367 not enforce the presence of statements in each file.
368 """
369
370 heading = "License issue:"
371
372 LICENSE_EXEMPTION_RE_LIST = [
Ronald Cron080ab4f2024-07-04 09:57:15 +0200373 # Exempt third-party drivers which may be under a different license
374 r'tf-psa-crypto/drivers/(?=(everest)/.*)',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100375 # Documentation explaining the license may have accidental
376 # false positives.
Ronald Cron1e05deb2024-04-25 12:24:00 +0200377 r'(ChangeLog|LICENSE|framework\/LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100378 # Files imported from TF-M, and not used except in test builds,
379 # may be under a different license.
Dave Rodgman1c910572023-12-08 17:58:44 +0000380 r'configs/ext/crypto_config_profile_medium\.h\Z',
381 r'configs/ext/tfm_mbedcrypto_config_profile_medium\.h\Z',
382 r'configs/ext/README\.md\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100383 # Third-party file.
384 r'dco\.txt\Z',
Ronald Cron1e05deb2024-04-25 12:24:00 +0200385 r'framework\/dco\.txt\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100386 ]
387 path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
388 LICENSE_EXEMPTION_RE_LIST))
389
390 COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
391 # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
392 COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
393
394 SPDX_HEADER_KEY = b'SPDX-License-Identifier'
395 LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
396 SPDX_RE = re.compile(br'.*?(' +
397 re.escape(SPDX_HEADER_KEY) +
398 br')(:\s*(.*?)\W*\Z|.*)', re.I)
399
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100400 LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
401 rb'Apache License',
402 rb'General Public License',
403 ]) + rb')', re.I)
404
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100405 def __init__(self):
406 super().__init__()
407 # Record what problem was caused. We can't easily report it due to
408 # the structure of the script. To be fixed after
409 # https://github.com/Mbed-TLS/mbedtls/pull/2506
410 self.problem = None
411
412 def issue_with_line(self, line, filepath, line_number):
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100413 #pylint: disable=too-many-return-statements
414
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100415 # Use endswith() rather than the more correct os.path.basename()
416 # because experimentally, it makes a significant difference to
417 # the running time.
418 if filepath.endswith(THIS_FILE_BASE_NAME) and \
419 line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
420 # Avoid false positives from the code in this class.
421 # Also skip the rest of this file, which is highly unlikely to
422 # contain any problematic statements since we put those near the
423 # top of files.
424 return False
425
426 m = self.COPYRIGHT_RE.match(line)
427 if m and m.group(1) != self.COPYRIGHT_HOLDER:
428 self.problem = 'Invalid copyright line'
429 return True
430
431 m = self.SPDX_RE.match(line)
432 if m:
433 if m.group(1) != self.SPDX_HEADER_KEY:
434 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
435 return True
436 if not m.group(3):
437 self.problem = 'Improperly formatted SPDX license identifier'
438 return True
439 if m.group(3) != self.LICENSE_IDENTIFIER:
440 self.problem = 'Wrong SPDX license identifier'
441 return True
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100442
443 m = self.LICENSE_MENTION_RE.match(line)
444 if m:
445 self.problem = 'Suspicious license mention'
446 return True
447
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100448 return False
449
450
Gilles Peskine8085f512024-09-15 12:45:44 +0200451class ErrorAddIssueTracker(LineIssueTracker):
452 """Signal direct additions of error codes.
453
454 Adding a low-level error code with a high-level error code is deprecated
455 and should use MBEDTLS_ERROR_ADD.
456 """
457
458 heading = "Direct addition of error codes"
459
460 _ERR_PLUS_RE = re.compile(br'MBEDTLS_ERR_\w+ *\+|'
461 br'\+ *MBEDTLS_ERR_')
462 _EXCLUDE_RE = re.compile(br' *case ')
463
464 def issue_with_line(self, line, filepath, line_number):
465 if self._ERR_PLUS_RE.search(line) and not self._EXCLUDE_RE.match(line):
466 return True
467 return False
468
469
Gilles Peskine184c0962020-03-24 18:25:17 +0100470class IntegrityChecker:
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100471 """Sanity-check files under the current directory."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000472
473 def __init__(self, log_file):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100474 """Instantiate the sanity checker.
475 Check files under the current directory.
476 Write a report of issues to log_file."""
Gilles Peskined9071e72022-09-18 21:17:09 +0200477 build_tree.check_repo_path()
Darryl Green10d9ce32018-02-28 10:02:55 +0000478 self.logger = None
479 self.setup_logger(log_file)
Darryl Green10d9ce32018-02-28 10:02:55 +0000480 self.issues_to_check = [
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200481 ShebangIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000482 EndOfFileNewlineIssueTracker(),
483 Utf8BomIssueTracker(),
Gilles Peskined11bb472023-01-05 20:28:57 +0100484 UnicodeIssueTracker(),
Gilles Peskine2c618732020-03-24 22:26:01 +0100485 UnixLineEndingIssueTracker(),
Gilles Peskine545e13f2020-03-24 22:29:11 +0100486 WindowsLineEndingIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000487 TrailingWhitespaceIssueTracker(),
488 TabIssueTracker(),
Gilles Peskinec117d592018-11-23 21:11:52 +0100489 MergeArtifactIssueTracker(),
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100490 LicenseIssueTracker(),
Gilles Peskine8085f512024-09-15 12:45:44 +0200491 ErrorAddIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000492 ]
493
Darryl Green10d9ce32018-02-28 10:02:55 +0000494 def setup_logger(self, log_file, level=logging.INFO):
Gilles Peskinede047b02024-03-04 11:51:31 +0100495 """Log to log_file if provided, or to stderr if None."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000496 self.logger = logging.getLogger()
497 self.logger.setLevel(level)
498 if log_file:
499 handler = logging.FileHandler(log_file)
500 self.logger.addHandler(handler)
501 else:
502 console = logging.StreamHandler()
503 self.logger.addHandler(console)
504
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200505 @staticmethod
506 def collect_files():
Gilles Peskinede047b02024-03-04 11:51:31 +0100507 """Return the list of files to check.
508
509 These are the regular files commited into Git.
510 """
Ronald Cron7661aa02024-05-03 10:12:01 +0200511 bytes_output = subprocess.check_output(['git', '-C', 'framework',
512 'ls-files', '-z'])
513 bytes_framework_filepaths = bytes_output.split(b'\0')[:-1]
514 bytes_framework_filepaths = ["framework/".encode() + filepath
515 for filepath in bytes_framework_filepaths]
516
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200517 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
Ronald Cron7661aa02024-05-03 10:12:01 +0200518 bytes_filepaths = bytes_output.split(b'\0')[:-1] + \
519 bytes_framework_filepaths
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200520 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
Ronald Cron7661aa02024-05-03 10:12:01 +0200521
Gilles Peskine2aa63ea2024-03-04 11:08:19 +0100522 # Filter out directories. Normally Git doesn't list directories
523 # (it only knows about the files inside them), but there is
524 # at least one case where 'git ls-files' includes a directory:
525 # submodules. Just skip submodules (and any other directories).
526 ascii_filepaths = [fp for fp in ascii_filepaths
527 if os.path.isfile(fp)]
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200528 # Prepend './' to files in the top-level directory so that
529 # something like `'/Makefile' in fp` matches in the top-level
530 # directory as well as in subdirectories.
531 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
532 for fp in ascii_filepaths]
Gilles Peskine95c55752018-09-28 11:48:10 +0200533
Darryl Green10d9ce32018-02-28 10:02:55 +0000534 def check_files(self):
Gilles Peskinede047b02024-03-04 11:51:31 +0100535 """Check all files for all issues."""
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200536 for issue_to_check in self.issues_to_check:
537 for filepath in self.collect_files():
538 if issue_to_check.should_check_file(filepath):
539 issue_to_check.check_file_for_issue(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000540
541 def output_issues(self):
Gilles Peskinede047b02024-03-04 11:51:31 +0100542 """Log the issues found and their locations.
543
544 Return 1 if there were issues, 0 otherwise.
545 """
Darryl Green10d9ce32018-02-28 10:02:55 +0000546 integrity_return_code = 0
547 for issue_to_check in self.issues_to_check:
548 if issue_to_check.files_with_issues:
549 integrity_return_code = 1
550 issue_to_check.output_file_issues(self.logger)
551 return integrity_return_code
552
553
554def run_main():
Gilles Peskine7dfcfce2019-07-04 19:31:02 +0200555 parser = argparse.ArgumentParser(description=__doc__)
Darryl Green10d9ce32018-02-28 10:02:55 +0000556 parser.add_argument(
557 "-l", "--log_file", type=str, help="path to optional output log",
558 )
559 check_args = parser.parse_args()
560 integrity_check = IntegrityChecker(check_args.log_file)
561 integrity_check.check_files()
562 return_code = integrity_check.output_issues()
563 sys.exit(return_code)
564
565
566if __name__ == "__main__":
567 run_main()