blob: 837905eada9dc54c5d8cc99a0dab89239c301252 [file] [log] [blame]
Darryl Green10d9ce32018-02-28 10:02:55 +00001#!/usr/bin/env python3
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02002
Bence Szépkúti1e148272020-08-07 13:07:28 +02003# Copyright The Mbed TLS Contributors
Dave Rodgman7ff79652023-11-03 12:04:52 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02005
Darryl Green10d9ce32018-02-28 10:02:55 +00006"""
Darryl Green10d9ce32018-02-28 10:02:55 +00007This script checks the current state of the source code for minor issues,
8including incorrect file permissions, presence of tabs, non-Unix line endings,
Gilles Peskine55b49ee2019-07-04 19:31:33 +02009trailing whitespace, and presence of UTF-8 BOM.
Darryl Green10d9ce32018-02-28 10:02:55 +000010Note: requires python 3, must be run from Mbed TLS root.
11"""
12
Darryl Green10d9ce32018-02-28 10:02:55 +000013import argparse
Darryl Green10d9ce32018-02-28 10:02:55 +000014import codecs
Gilles Peskine15db6822023-11-03 14:13:55 +010015import inspect
Gilles Peskine51361562023-11-03 13:55:00 +010016import logging
17import os
Gilles Peskine0598db82020-05-10 16:57:16 +020018import re
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +020019import subprocess
Darryl Green10d9ce32018-02-28 10:02:55 +000020import sys
Gilles Peskineac9e7c02020-08-11 15:11:50 +020021try:
22 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
23except ImportError:
24 pass
Darryl Green10d9ce32018-02-28 10:02:55 +000025
Gilles Peskine7ff47662022-09-18 21:17:09 +020026import scripts_path # pylint: disable=unused-import
27from mbedtls_dev import build_tree
28
Darryl Green10d9ce32018-02-28 10:02:55 +000029
Gilles Peskine184c0962020-03-24 18:25:17 +010030class FileIssueTracker:
Gilles Peskine6ee576e2019-02-25 20:59:05 +010031 """Base class for file-wide issue tracking.
32
33 To implement a checker that processes a file as a whole, inherit from
Gilles Peskine1e9698a2019-02-25 21:10:04 +010034 this class and implement `check_file_for_issue` and define ``heading``.
35
Gilles Peskine05a51a82020-05-10 16:52:44 +020036 ``suffix_exemptions``: files whose name ends with a string in this set
Gilles Peskine1e9698a2019-02-25 21:10:04 +010037 will not be checked.
38
Gilles Peskine0598db82020-05-10 16:57:16 +020039 ``path_exemptions``: files whose path (relative to the root of the source
40 tree) matches this regular expression will not be checked. This can be
41 ``None`` to match no path. Paths are normalized and converted to ``/``
42 separators before matching.
43
Gilles Peskine1e9698a2019-02-25 21:10:04 +010044 ``heading``: human-readable description of the issue
Gilles Peskine6ee576e2019-02-25 20:59:05 +010045 """
Darryl Green10d9ce32018-02-28 10:02:55 +000046
Gilles Peskineac9e7c02020-08-11 15:11:50 +020047 suffix_exemptions = frozenset() #type: FrozenSet[str]
48 path_exemptions = None #type: Optional[Pattern[str]]
Gilles Peskine1e9698a2019-02-25 21:10:04 +010049 # heading must be defined in derived classes.
50 # pylint: disable=no-member
51
Darryl Green10d9ce32018-02-28 10:02:55 +000052 def __init__(self):
Darryl Green10d9ce32018-02-28 10:02:55 +000053 self.files_with_issues = {}
54
Gilles Peskine0598db82020-05-10 16:57:16 +020055 @staticmethod
56 def normalize_path(filepath):
Gilles Peskineeca95db2020-05-28 18:19:20 +020057 """Normalize ``filepath`` with / as the directory separator."""
Gilles Peskine0598db82020-05-10 16:57:16 +020058 filepath = os.path.normpath(filepath)
Gilles Peskineeca95db2020-05-28 18:19:20 +020059 # On Windows, we may have backslashes to separate directories.
60 # We need slashes to match exemption lists.
Gilles Peskine0598db82020-05-10 16:57:16 +020061 seps = os.path.sep
62 if os.path.altsep is not None:
63 seps += os.path.altsep
64 return '/'.join(filepath.split(seps))
65
Darryl Green10d9ce32018-02-28 10:02:55 +000066 def should_check_file(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010067 """Whether the given file name should be checked.
68
Gilles Peskine05a51a82020-05-10 16:52:44 +020069 Files whose name ends with a string listed in ``self.suffix_exemptions``
70 or whose path matches ``self.path_exemptions`` will not be checked.
Gilles Peskineaaee4442020-03-24 16:49:21 +010071 """
Gilles Peskine05a51a82020-05-10 16:52:44 +020072 for files_exemption in self.suffix_exemptions:
Darryl Green10d9ce32018-02-28 10:02:55 +000073 if filepath.endswith(files_exemption):
74 return False
Gilles Peskine0598db82020-05-10 16:57:16 +020075 if self.path_exemptions and \
76 re.match(self.path_exemptions, self.normalize_path(filepath)):
77 return False
Darryl Green10d9ce32018-02-28 10:02:55 +000078 return True
79
Darryl Green10d9ce32018-02-28 10:02:55 +000080 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010081 """Check the specified file for the issue that this class is for.
82
83 Subclasses must implement this method.
84 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +010085 raise NotImplementedError
Darryl Green10d9ce32018-02-28 10:02:55 +000086
Gilles Peskine04398052018-11-23 21:11:30 +010087 def record_issue(self, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +010088 """Record that an issue was found at the specified location."""
Gilles Peskine04398052018-11-23 21:11:30 +010089 if filepath not in self.files_with_issues.keys():
90 self.files_with_issues[filepath] = []
91 self.files_with_issues[filepath].append(line_number)
92
Darryl Green10d9ce32018-02-28 10:02:55 +000093 def output_file_issues(self, logger):
Gilles Peskineaaee4442020-03-24 16:49:21 +010094 """Log all the locations where the issue was found."""
Darryl Green10d9ce32018-02-28 10:02:55 +000095 if self.files_with_issues.values():
96 logger.info(self.heading)
97 for filename, lines in sorted(self.files_with_issues.items()):
98 if lines:
99 logger.info("{}: {}".format(
100 filename, ", ".join(str(x) for x in lines)
101 ))
102 else:
103 logger.info(filename)
104 logger.info("")
105
Gilles Peskined4a853d2020-05-10 16:57:59 +0200106BINARY_FILE_PATH_RE_LIST = [
107 r'docs/.*\.pdf\Z',
108 r'programs/fuzz/corpuses/[^.]+\Z',
109 r'tests/data_files/[^.]+\Z',
110 r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
111 r'tests/data_files/.*\.req\.[^/]+\Z',
112 r'tests/data_files/.*malformed[^/]+\Z',
113 r'tests/data_files/format_pkcs12\.fmt\Z',
Gilles Peskine66548d12023-01-05 20:27:18 +0100114 r'tests/data_files/.*\.bin\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200115]
116BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
117
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100118class LineIssueTracker(FileIssueTracker):
119 """Base class for line-by-line issue tracking.
Darryl Green10d9ce32018-02-28 10:02:55 +0000120
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100121 To implement a checker that processes files line by line, inherit from
122 this class and implement `line_with_issue`.
123 """
124
Gilles Peskined4a853d2020-05-10 16:57:59 +0200125 # Exclude binary files.
126 path_exemptions = BINARY_FILE_PATH_RE
127
Gilles Peskineff723d82023-01-05 20:28:30 +0100128 def issue_with_line(self, line, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100129 """Check the specified line for the issue that this class is for.
130
131 Subclasses must implement this method.
132 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100133 raise NotImplementedError
134
135 def check_file_line(self, filepath, line, line_number):
Gilles Peskineff723d82023-01-05 20:28:30 +0100136 if self.issue_with_line(line, filepath, line_number):
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100137 self.record_issue(filepath, line_number)
138
139 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100140 """Check the lines of the specified file.
141
142 Subclasses must implement the ``issue_with_line`` method.
143 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100144 with open(filepath, "rb") as f:
145 for i, line in enumerate(iter(f.readline, b"")):
146 self.check_file_line(filepath, line, i + 1)
147
Gilles Peskine2c618732020-03-24 22:26:01 +0100148
149def is_windows_file(filepath):
150 _root, ext = os.path.splitext(filepath)
Gilles Peskined2df86f2020-05-10 17:36:51 +0200151 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
Gilles Peskine2c618732020-03-24 22:26:01 +0100152
153
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200154class ShebangIssueTracker(FileIssueTracker):
155 """Track files with a bad, missing or extraneous shebang line.
156
157 Executable scripts must start with a valid shebang (#!) line.
158 """
159
160 heading = "Invalid shebang line:"
161
162 # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
163 # Allow at most one argument (this is a Linux limitation).
164 # For sh and bash, the argument if present must be options.
Shaun Case0e7791f2021-12-20 21:14:10 -0800165 # For env, the argument must be the base name of the interpreter.
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200166 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
167 rb'|/usr/bin/env ([^\n /]+))$')
168 _extensions = {
169 b'bash': 'sh',
170 b'perl': 'pl',
171 b'python3': 'py',
172 b'sh': 'sh',
173 }
174
Dave Rodgman75da3132024-01-17 09:59:10 +0000175 path_exemptions = re.compile(r'tests/scripts/quiet/.*')
176
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200177 def is_valid_shebang(self, first_line, filepath):
178 m = re.match(self._shebang_re, first_line)
179 if not m:
180 return False
181 interpreter = m.group(1) or m.group(2)
182 if interpreter not in self._extensions:
183 return False
184 if not filepath.endswith('.' + self._extensions[interpreter]):
185 return False
186 return True
187
188 def check_file_for_issue(self, filepath):
189 is_executable = os.access(filepath, os.X_OK)
190 with open(filepath, "rb") as f:
191 first_line = f.readline()
192 if first_line.startswith(b'#!'):
193 if not is_executable:
194 # Shebang on a non-executable file
195 self.files_with_issues[filepath] = None
196 elif not self.is_valid_shebang(first_line, filepath):
197 self.files_with_issues[filepath] = [1]
198 elif is_executable:
199 # Executable without a shebang
200 self.files_with_issues[filepath] = None
201
202
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100203class EndOfFileNewlineIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100204 """Track files that end with an incomplete line
205 (no newline character at the end of the last line)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000206
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100207 heading = "Missing newline at end of file:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000208
Gilles Peskined4a853d2020-05-10 16:57:59 +0200209 path_exemptions = BINARY_FILE_PATH_RE
210
Darryl Green10d9ce32018-02-28 10:02:55 +0000211 def check_file_for_issue(self, filepath):
212 with open(filepath, "rb") as f:
Gilles Peskine12b180a2020-05-10 17:36:42 +0200213 try:
214 f.seek(-1, 2)
215 except OSError:
216 # This script only works on regular files. If we can't seek
217 # 1 before the end, it means that this position is before
218 # the beginning of the file, i.e. that the file is empty.
219 return
220 if f.read(1) != b"\n":
Darryl Green10d9ce32018-02-28 10:02:55 +0000221 self.files_with_issues[filepath] = None
222
223
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100224class Utf8BomIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100225 """Track files that start with a UTF-8 BOM.
226 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000227
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100228 heading = "UTF-8 BOM present:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000229
Gilles Peskine05a51a82020-05-10 16:52:44 +0200230 suffix_exemptions = frozenset([".vcxproj", ".sln"])
Gilles Peskined4a853d2020-05-10 16:57:59 +0200231 path_exemptions = BINARY_FILE_PATH_RE
Gilles Peskine2c618732020-03-24 22:26:01 +0100232
Darryl Green10d9ce32018-02-28 10:02:55 +0000233 def check_file_for_issue(self, filepath):
234 with open(filepath, "rb") as f:
235 if f.read().startswith(codecs.BOM_UTF8):
236 self.files_with_issues[filepath] = None
237
238
Gilles Peskineb60b7a32023-01-05 20:28:57 +0100239class UnicodeIssueTracker(LineIssueTracker):
240 """Track lines with invalid characters or invalid text encoding."""
241
242 heading = "Invalid UTF-8 or forbidden character:"
243
Aditya Deshpandee76dc392023-01-30 13:46:58 +0000244 # Only allow valid UTF-8, and only other explicitly allowed characters.
Gilles Peskineb60b7a32023-01-05 20:28:57 +0100245 # We deliberately exclude all characters that aren't a simple non-blank,
246 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
247 # line breaks, "basic" no-break space and soft hyphen). In particular,
248 # non-ASCII control characters, combinig characters, and Unicode state
249 # changes (e.g. right-to-left text) are forbidden.
250 # Note that we do allow some characters with a risk of visual confusion,
251 # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
252 # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
253 # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
254 GOOD_CHARACTERS = ''.join([
255 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
256 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
257 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
258 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
259 '\u2190-\u21FF', # Arrows
260 '\u2200-\u22FF', # Mathematical Symbols
Aditya Deshpandea9186f32023-02-01 13:30:26 +0000261 '\u2500-\u257F' # Box Drawings characters used in markdown trees
Gilles Peskineb60b7a32023-01-05 20:28:57 +0100262 ])
263 # Allow any of the characters and ranges above, and anything classified
264 # as a word constituent.
265 GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
266
267 def issue_with_line(self, line, _filepath, line_number):
268 try:
269 text = line.decode('utf-8')
270 except UnicodeDecodeError:
271 return True
272 if line_number == 1 and text.startswith('\uFEFF'):
273 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
274 # Which files are allowed to have a BOM is handled in
275 # Utf8BomIssueTracker.
276 text = text[1:]
277 return not self.GOOD_CHARACTERS_RE.match(text)
278
Gilles Peskine2c618732020-03-24 22:26:01 +0100279class UnixLineEndingIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100280 """Track files with non-Unix line endings (i.e. files with CR)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000281
Gilles Peskine2c618732020-03-24 22:26:01 +0100282 heading = "Non-Unix line endings:"
283
284 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200285 if not super().should_check_file(filepath):
286 return False
Gilles Peskine2c618732020-03-24 22:26:01 +0100287 return not is_windows_file(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000288
Gilles Peskineff723d82023-01-05 20:28:30 +0100289 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000290 return b"\r" in line
291
292
Gilles Peskine545e13f2020-03-24 22:29:11 +0100293class WindowsLineEndingIssueTracker(LineIssueTracker):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200294 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
Gilles Peskine545e13f2020-03-24 22:29:11 +0100295
296 heading = "Non-Windows line endings:"
297
298 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200299 if not super().should_check_file(filepath):
300 return False
Gilles Peskine545e13f2020-03-24 22:29:11 +0100301 return is_windows_file(filepath)
302
Gilles Peskineff723d82023-01-05 20:28:30 +0100303 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200304 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
Gilles Peskine545e13f2020-03-24 22:29:11 +0100305
306
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100307class TrailingWhitespaceIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100308 """Track lines with trailing whitespace."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000309
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100310 heading = "Trailing whitespace:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200311 suffix_exemptions = frozenset([".dsp", ".md"])
Darryl Green10d9ce32018-02-28 10:02:55 +0000312
Gilles Peskineff723d82023-01-05 20:28:30 +0100313 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000314 return line.rstrip(b"\r\n") != line.rstrip()
315
316
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100317class TabIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100318 """Track lines with tabs."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000319
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100320 heading = "Tabs present:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200321 suffix_exemptions = frozenset([
Gilles Peskine344da1c2020-05-10 17:37:02 +0200322 ".pem", # some openssl dumps have tabs
Gilles Peskine2c618732020-03-24 22:26:01 +0100323 ".sln",
Gilles Peskine6e8d5a02020-03-24 22:01:28 +0100324 "/Makefile",
325 "/Makefile.inc",
326 "/generate_visualc_files.pl",
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100327 ])
Darryl Green10d9ce32018-02-28 10:02:55 +0000328
Gilles Peskineff723d82023-01-05 20:28:30 +0100329 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000330 return b"\t" in line
331
332
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100333class MergeArtifactIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100334 """Track lines with merge artifacts.
335 These are leftovers from a ``git merge`` that wasn't fully edited."""
Gilles Peskinec117d592018-11-23 21:11:52 +0100336
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100337 heading = "Merge artifact:"
Gilles Peskinec117d592018-11-23 21:11:52 +0100338
Gilles Peskineff723d82023-01-05 20:28:30 +0100339 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskinec117d592018-11-23 21:11:52 +0100340 # Detect leftover git conflict markers.
341 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
342 return True
343 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
344 return True
345 if line.rstrip(b'\r\n') == b'=======' and \
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100346 not _filepath.endswith('.md'):
Gilles Peskinec117d592018-11-23 21:11:52 +0100347 return True
348 return False
349
Darryl Green10d9ce32018-02-28 10:02:55 +0000350
Gilles Peskine32ffbfd2023-11-03 14:49:12 +0100351def this_location():
352 frame = inspect.currentframe()
353 assert frame is not None
354 info = inspect.getframeinfo(frame)
355 return os.path.basename(info.filename), info.lineno
356THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
357
Gilles Peskine15db6822023-11-03 14:13:55 +0100358class LicenseIssueTracker(LineIssueTracker):
359 """Check copyright statements and license indications.
360
361 This class only checks that statements are correct if present. It does
362 not enforce the presence of statements in each file.
363 """
364
365 heading = "License issue:"
366
367 LICENSE_EXEMPTION_RE_LIST = [
368 # Third-party code, other than whitelisted third-party modules,
369 # may be under a different license.
370 r'3rdparty/(?!(p256-m)/.*)',
371 # Documentation explaining the license may have accidental
372 # false positives.
373 r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
374 # Files imported from TF-M, and not used except in test builds,
375 # may be under a different license.
376 r'configs/crypto_config_profile_medium\.h\Z',
377 r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
378 # Third-party file.
379 r'dco\.txt\Z',
380 ]
381 path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
382 LICENSE_EXEMPTION_RE_LIST))
383
384 COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
385 # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
386 COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
387
388 SPDX_HEADER_KEY = b'SPDX-License-Identifier'
389 LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
390 SPDX_RE = re.compile(br'.*?(' +
391 re.escape(SPDX_HEADER_KEY) +
392 br')(:\s*(.*?)\W*\Z|.*)', re.I)
393
Gilles Peskine1b4f0362023-11-03 14:35:28 +0100394 LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
395 rb'Apache License',
396 rb'General Public License',
397 ]) + rb')', re.I)
398
Gilles Peskine15db6822023-11-03 14:13:55 +0100399 def __init__(self):
400 super().__init__()
401 # Record what problem was caused. We can't easily report it due to
402 # the structure of the script. To be fixed after
403 # https://github.com/Mbed-TLS/mbedtls/pull/2506
404 self.problem = None
405
406 def issue_with_line(self, line, filepath, line_number):
Gilles Peskine1b4f0362023-11-03 14:35:28 +0100407 #pylint: disable=too-many-return-statements
408
Gilles Peskine15db6822023-11-03 14:13:55 +0100409 # Use endswith() rather than the more correct os.path.basename()
410 # because experimentally, it makes a significant difference to
411 # the running time.
412 if filepath.endswith(THIS_FILE_BASE_NAME) and \
413 line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
414 # Avoid false positives from the code in this class.
415 # Also skip the rest of this file, which is highly unlikely to
416 # contain any problematic statements since we put those near the
417 # top of files.
418 return False
419
420 m = self.COPYRIGHT_RE.match(line)
421 if m and m.group(1) != self.COPYRIGHT_HOLDER:
422 self.problem = 'Invalid copyright line'
423 return True
424
425 m = self.SPDX_RE.match(line)
426 if m:
427 if m.group(1) != self.SPDX_HEADER_KEY:
428 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
429 return True
430 if not m.group(3):
431 self.problem = 'Improperly formatted SPDX license identifier'
432 return True
433 if m.group(3) != self.LICENSE_IDENTIFIER:
434 self.problem = 'Wrong SPDX license identifier'
435 return True
Gilles Peskine1b4f0362023-11-03 14:35:28 +0100436
437 m = self.LICENSE_MENTION_RE.match(line)
438 if m:
439 self.problem = 'Suspicious license mention'
440 return True
441
Gilles Peskine15db6822023-11-03 14:13:55 +0100442 return False
443
444
Gilles Peskine184c0962020-03-24 18:25:17 +0100445class IntegrityChecker:
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100446 """Sanity-check files under the current directory."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000447
448 def __init__(self, log_file):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100449 """Instantiate the sanity checker.
450 Check files under the current directory.
451 Write a report of issues to log_file."""
Gilles Peskine7ff47662022-09-18 21:17:09 +0200452 build_tree.check_repo_path()
Darryl Green10d9ce32018-02-28 10:02:55 +0000453 self.logger = None
454 self.setup_logger(log_file)
Darryl Green10d9ce32018-02-28 10:02:55 +0000455 self.issues_to_check = [
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200456 ShebangIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000457 EndOfFileNewlineIssueTracker(),
458 Utf8BomIssueTracker(),
Gilles Peskineb60b7a32023-01-05 20:28:57 +0100459 UnicodeIssueTracker(),
Gilles Peskine2c618732020-03-24 22:26:01 +0100460 UnixLineEndingIssueTracker(),
Gilles Peskine545e13f2020-03-24 22:29:11 +0100461 WindowsLineEndingIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000462 TrailingWhitespaceIssueTracker(),
463 TabIssueTracker(),
Gilles Peskinec117d592018-11-23 21:11:52 +0100464 MergeArtifactIssueTracker(),
Gilles Peskine15db6822023-11-03 14:13:55 +0100465 LicenseIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000466 ]
467
Darryl Green10d9ce32018-02-28 10:02:55 +0000468 def setup_logger(self, log_file, level=logging.INFO):
469 self.logger = logging.getLogger()
470 self.logger.setLevel(level)
471 if log_file:
472 handler = logging.FileHandler(log_file)
473 self.logger.addHandler(handler)
474 else:
475 console = logging.StreamHandler()
476 self.logger.addHandler(console)
477
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200478 @staticmethod
479 def collect_files():
480 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
481 bytes_filepaths = bytes_output.split(b'\0')[:-1]
482 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
483 # Prepend './' to files in the top-level directory so that
484 # something like `'/Makefile' in fp` matches in the top-level
485 # directory as well as in subdirectories.
486 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
487 for fp in ascii_filepaths]
Gilles Peskine95c55752018-09-28 11:48:10 +0200488
Darryl Green10d9ce32018-02-28 10:02:55 +0000489 def check_files(self):
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200490 for issue_to_check in self.issues_to_check:
491 for filepath in self.collect_files():
492 if issue_to_check.should_check_file(filepath):
493 issue_to_check.check_file_for_issue(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000494
495 def output_issues(self):
496 integrity_return_code = 0
497 for issue_to_check in self.issues_to_check:
498 if issue_to_check.files_with_issues:
499 integrity_return_code = 1
500 issue_to_check.output_file_issues(self.logger)
501 return integrity_return_code
502
503
504def run_main():
Gilles Peskine7dfcfce2019-07-04 19:31:02 +0200505 parser = argparse.ArgumentParser(description=__doc__)
Darryl Green10d9ce32018-02-28 10:02:55 +0000506 parser.add_argument(
507 "-l", "--log_file", type=str, help="path to optional output log",
508 )
509 check_args = parser.parse_args()
510 integrity_check = IntegrityChecker(check_args.log_file)
511 integrity_check.check_files()
512 return_code = integrity_check.output_issues()
513 sys.exit(return_code)
514
515
516if __name__ == "__main__":
517 run_main()