blob: 3f19870309b33e73bc86eb1436a40fe79e244896 [file] [log] [blame]
Pengyu Lv7f6933a2023-04-04 16:05:54 +08001#!/usr/bin/env python3
2#
3# copyright the mbed tls contributors
4# spdx-license-identifier: apache-2.0
5#
6# licensed under the apache license, version 2.0 (the "license"); you may
7# not use this file except in compliance with the license.
8# you may obtain a copy of the license at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Audit validity date of X509 crt/crl/csr
19
20This script is used to audit the validity date of crt/crl/csr used for testing.
21The files are in tests/data_files/ while some data are in test suites data in
22tests/suites/*.data files.
23"""
24
25import os
26import sys
27import re
28import typing
Pengyu Lv7f6933a2023-04-04 16:05:54 +080029import argparse
30import datetime
Pengyu Lv45e32032023-04-06 14:33:41 +080031import glob
Pengyu Lv7f6933a2023-04-04 16:05:54 +080032from enum import Enum
33
Pengyu Lv31792322023-04-11 16:30:54 +080034# The script requires cryptography >= 35.0.0 which is only available
35# for Python >= 3.6. Disable the pylint error here until we were
36# using modern system on our CI.
37from cryptography import x509 #pylint: disable=import-error
Pengyu Lv7f6933a2023-04-04 16:05:54 +080038
Pengyu Lv30f26832023-04-07 18:04:07 +080039# reuse the function to parse *.data file in tests/suites/
40from generate_test_code import parse_test_data as parse_suite_data
41
Pengyu Lv7f6933a2023-04-04 16:05:54 +080042class DataType(Enum):
43 CRT = 1 # Certificate
44 CRL = 2 # Certificate Revocation List
45 CSR = 3 # Certificate Signing Request
46
47class DataFormat(Enum):
48 PEM = 1 # Privacy-Enhanced Mail
49 DER = 2 # Distinguished Encoding Rules
50
51class AuditData:
52 """Store file, type and expiration date for audit."""
53 #pylint: disable=too-few-public-methods
Pengyu Lvcb8fc322023-04-11 15:05:29 +080054 def __init__(self, data_type: DataType, x509_obj):
Pengyu Lv7f6933a2023-04-04 16:05:54 +080055 self.data_type = data_type
56 self.filename = ""
Pengyu Lvcb8fc322023-04-11 15:05:29 +080057 self.fill_validity_duration(x509_obj)
Pengyu Lv7f6933a2023-04-04 16:05:54 +080058
59 def fill_validity_duration(self, x509_obj):
60 """Fill expiration_date field from a x509 object"""
61 # Certificate expires after "not_valid_after"
62 # Certificate is invalid before "not_valid_before"
63 if self.data_type == DataType.CRT:
64 self.not_valid_after = x509_obj.not_valid_after
65 self.not_valid_before = x509_obj.not_valid_before
66 # CertificateRevocationList expires after "next_update"
67 # CertificateRevocationList is invalid before "last_update"
68 elif self.data_type == DataType.CRL:
69 self.not_valid_after = x509_obj.next_update
70 self.not_valid_before = x509_obj.last_update
71 # CertificateSigningRequest is always valid.
72 elif self.data_type == DataType.CSR:
73 self.not_valid_after = datetime.datetime.max
74 self.not_valid_before = datetime.datetime.min
75 else:
76 raise ValueError("Unsupported file_type: {}".format(self.data_type))
77
78class X509Parser():
79 """A parser class to parse crt/crl/csr file or data in PEM/DER format."""
80 PEM_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n(?P<data>.*?)-{5}END (?P=type)-{5}\n'
81 PEM_TAG_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n'
82 PEM_TAGS = {
83 DataType.CRT: 'CERTIFICATE',
84 DataType.CRL: 'X509 CRL',
85 DataType.CSR: 'CERTIFICATE REQUEST'
86 }
87
88 def __init__(self, backends: dict):
89 self.backends = backends
90 self.__generate_parsers()
91
92 def __generate_parser(self, data_type: DataType):
93 """Parser generator for a specific DataType"""
94 tag = self.PEM_TAGS[data_type]
95 pem_loader = self.backends[data_type][DataFormat.PEM]
96 der_loader = self.backends[data_type][DataFormat.DER]
97 def wrapper(data: bytes):
98 pem_type = X509Parser.pem_data_type(data)
99 # It is in PEM format with target tag
100 if pem_type == tag:
101 return pem_loader(data)
102 # It is in PEM format without target tag
103 if pem_type:
104 return None
105 # It might be in DER format
106 try:
107 result = der_loader(data)
108 except ValueError:
109 result = None
110 return result
111 wrapper.__name__ = "{}.parser[{}]".format(type(self).__name__, tag)
112 return wrapper
113
114 def __generate_parsers(self):
115 """Generate parsers for all support DataType"""
116 self.parsers = {}
117 for data_type, _ in self.PEM_TAGS.items():
118 self.parsers[data_type] = self.__generate_parser(data_type)
119
120 def __getitem__(self, item):
121 return self.parsers[item]
122
123 @staticmethod
124 def pem_data_type(data: bytes) -> str:
125 """Get the tag from the data in PEM format
126
127 :param data: data to be checked in binary mode.
128 :return: PEM tag or "" when no tag detected.
129 """
130 m = re.search(X509Parser.PEM_TAG_REGEX, data)
131 if m is not None:
132 return m.group('type').decode('UTF-8')
133 else:
134 return ""
135
Pengyu Lv30f26832023-04-07 18:04:07 +0800136 @staticmethod
137 def check_hex_string(hex_str: str) -> bool:
138 """Check if the hex string is possibly DER data."""
139 hex_len = len(hex_str)
140 # At least 6 hex char for 3 bytes: Type + Length + Content
141 if hex_len < 6:
142 return False
143 # Check if Type (1 byte) is SEQUENCE.
144 if hex_str[0:2] != '30':
145 return False
146 # Check LENGTH (1 byte) value
147 content_len = int(hex_str[2:4], base=16)
148 consumed = 4
149 if content_len in (128, 255):
150 # Indefinite or Reserved
151 return False
152 elif content_len > 127:
153 # Definite, Long
154 length_len = (content_len - 128) * 2
155 content_len = int(hex_str[consumed:consumed+length_len], base=16)
156 consumed += length_len
157 # Check LENGTH
158 if hex_len != content_len * 2 + consumed:
159 return False
160 return True
161
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800162class Auditor:
163 """A base class for audit."""
164 def __init__(self, verbose):
165 self.verbose = verbose
166 self.default_files = []
167 self.audit_data = []
168 self.parser = X509Parser({
169 DataType.CRT: {
170 DataFormat.PEM: x509.load_pem_x509_certificate,
171 DataFormat.DER: x509.load_der_x509_certificate
172 },
173 DataType.CRL: {
174 DataFormat.PEM: x509.load_pem_x509_crl,
175 DataFormat.DER: x509.load_der_x509_crl
176 },
177 DataType.CSR: {
178 DataFormat.PEM: x509.load_pem_x509_csr,
179 DataFormat.DER: x509.load_der_x509_csr
180 },
181 })
182
183 def error(self, *args):
184 #pylint: disable=no-self-use
185 print("Error: ", *args, file=sys.stderr)
186
187 def warn(self, *args):
188 if self.verbose:
189 print("Warn: ", *args, file=sys.stderr)
190
191 def parse_file(self, filename: str) -> typing.List[AuditData]:
192 """
193 Parse a list of AuditData from file.
194
195 :param filename: name of the file to parse.
196 :return list of AuditData parsed from the file.
197 """
198 with open(filename, 'rb') as f:
199 data = f.read()
200 result_list = []
201 result = self.parse_bytes(data)
202 if result is not None:
203 result.filename = filename
204 result_list.append(result)
205 return result_list
206
207 def parse_bytes(self, data: bytes):
208 """Parse AuditData from bytes."""
209 for data_type in list(DataType):
210 try:
211 result = self.parser[data_type](data)
212 except ValueError as val_error:
213 result = None
214 self.warn(val_error)
215 if result is not None:
Pengyu Lvcb8fc322023-04-11 15:05:29 +0800216 audit_data = AuditData(data_type, result)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800217 return audit_data
218 return None
219
220 def walk_all(self, file_list):
221 """
222 Iterate over all the files in the list and get audit data.
223 """
224 if not file_list:
225 file_list = self.default_files
226 for filename in file_list:
227 data_list = self.parse_file(filename)
228 self.audit_data.extend(data_list)
229
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800230 @staticmethod
231 def find_test_dir():
232 """Get the relative path for the MbedTLS test directory."""
233 if os.path.isdir('tests'):
234 tests_dir = 'tests'
235 elif os.path.isdir('suites'):
236 tests_dir = '.'
237 elif os.path.isdir('../suites'):
238 tests_dir = '..'
239 else:
240 raise Exception("Mbed TLS source tree not found")
241 return tests_dir
242
243class TestDataAuditor(Auditor):
244 """Class for auditing files in tests/data_files/"""
245 def __init__(self, verbose):
246 super().__init__(verbose)
247 self.default_files = self.collect_default_files()
248
249 def collect_default_files(self):
Pengyu Lv45e32032023-04-06 14:33:41 +0800250 """Collect all files in tests/data_files/"""
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800251 test_dir = self.find_test_dir()
252 test_data_folder = os.path.join(test_dir, 'data_files')
253 data_files = []
254 for (dir_path, _, file_names) in os.walk(test_data_folder):
255 data_files.extend(os.path.join(dir_path, file_name)
256 for file_name in file_names)
257 return data_files
258
Pengyu Lv30f26832023-04-07 18:04:07 +0800259class FileWrapper():
260 """
261 This a stub class of generate_test_code.FileWrapper.
262
263 This class reads the whole file to memory before iterating
264 over the lines.
265 """
266
267 def __init__(self, file_name):
268 """
269 Read the file and initialize the line number to 0.
270
271 :param file_name: File path to open.
272 """
273 with open(file_name, 'rb') as f:
274 self.buf = f.read()
275 self.buf_len = len(self.buf)
276 self._line_no = 0
277 self._line_start = 0
278
279 def __iter__(self):
280 """Make the class iterable."""
281 return self
282
283 def __next__(self):
284 """
285 This method for returning a line of the file per iteration.
286
287 :return: Line read from file.
288 """
289 # If we reach the end of the file.
290 if not self._line_start < self.buf_len:
291 raise StopIteration
292
293 line_end = self.buf.find(b'\n', self._line_start) + 1
294 if line_end > 0:
295 # Find the first LF as the end of the new line.
296 line = self.buf[self._line_start:line_end]
297 self._line_start = line_end
298 self._line_no += 1
299 else:
300 # No LF found. We are at the last line without LF.
301 line = self.buf[self._line_start:]
302 self._line_start = self.buf_len
303 self._line_no += 1
304
305 # Convert byte array to string with correct encoding and
306 # strip any whitespaces added in the decoding process.
307 return line.decode(sys.getdefaultencoding()).rstrip() + '\n'
308
309 def get_line_no(self):
310 """
311 Gives current line number.
312 """
313 return self._line_no
314
315 line_no = property(get_line_no)
316
Pengyu Lv45e32032023-04-06 14:33:41 +0800317class SuiteDataAuditor(Auditor):
318 """Class for auditing files in tests/suites/*.data"""
319 def __init__(self, options):
320 super().__init__(options)
321 self.default_files = self.collect_default_files()
322
323 def collect_default_files(self):
324 """Collect all files in tests/suites/*.data"""
325 test_dir = self.find_test_dir()
326 suites_data_folder = os.path.join(test_dir, 'suites')
Pengyu Lv45e32032023-04-06 14:33:41 +0800327 data_files = glob.glob(os.path.join(suites_data_folder, '*.data'))
328 return data_files
329
330 def parse_file(self, filename: str):
Pengyu Lv30f26832023-04-07 18:04:07 +0800331 """
332 Parse a list of AuditData from file.
333
334 :param filename: name of the file to parse.
335 :return list of AuditData parsed from the file.
336 """
Pengyu Lv45e32032023-04-06 14:33:41 +0800337 audit_data_list = []
Pengyu Lv30f26832023-04-07 18:04:07 +0800338 data_f = FileWrapper(filename)
339 for _, _, _, test_args in parse_suite_data(data_f):
340 for test_arg in test_args:
341 match = re.match(r'"(?P<data>[0-9a-fA-F]+)"', test_arg)
342 if not match:
343 continue
344 if not X509Parser.check_hex_string(match.group('data')):
345 continue
346 audit_data = self.parse_bytes(bytes.fromhex(match.group('data')))
347 if audit_data is None:
348 continue
349 audit_data.filename = filename
350 audit_data_list.append(audit_data)
351
Pengyu Lv45e32032023-04-06 14:33:41 +0800352 return audit_data_list
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800353
354def list_all(audit_data: AuditData):
355 print("{}\t{}\t{}\t{}".format(
356 audit_data.not_valid_before.isoformat(timespec='seconds'),
357 audit_data.not_valid_after.isoformat(timespec='seconds'),
358 audit_data.data_type.name,
359 audit_data.filename))
360
361def main():
362 """
363 Perform argument parsing.
364 """
365 parser = argparse.ArgumentParser(
366 description='Audit script for X509 crt/crl/csr files.'
367 )
368
369 parser.add_argument('-a', '--all',
370 action='store_true',
371 help='list the information of all files')
372 parser.add_argument('-v', '--verbose',
373 action='store_true', dest='verbose',
374 help='Show warnings')
Pengyu Lvebf011f2023-04-11 13:39:31 +0800375 parser.add_argument('--not-before', dest='not_before',
376 help='not valid before this date(UTC), YYYY-MM-DD',
377 metavar='DATE')
378 parser.add_argument('--not-after', dest='not_after',
379 help='not valid after this date(UTC), YYYY-MM-DD',
380 metavar='DATE')
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800381 parser.add_argument('-f', '--file', dest='file',
382 help='file to audit (Debug only)',
383 metavar='FILE')
384
385 args = parser.parse_args()
386
387 # start main routine
388 td_auditor = TestDataAuditor(args.verbose)
Pengyu Lv45e32032023-04-06 14:33:41 +0800389 sd_auditor = SuiteDataAuditor(args.verbose)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800390
391 if args.file:
392 data_files = [args.file]
Pengyu Lv45e32032023-04-06 14:33:41 +0800393 suite_data_files = [args.file]
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800394 else:
395 data_files = td_auditor.default_files
Pengyu Lv45e32032023-04-06 14:33:41 +0800396 suite_data_files = sd_auditor.default_files
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800397
Pengyu Lvebf011f2023-04-11 13:39:31 +0800398 if args.not_before:
399 not_before_date = datetime.datetime.fromisoformat(args.not_before)
400 else:
401 not_before_date = datetime.datetime.today()
402 if args.not_after:
403 not_after_date = datetime.datetime.fromisoformat(args.not_after)
404 else:
405 not_after_date = not_before_date
406
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800407 td_auditor.walk_all(data_files)
Pengyu Lv45e32032023-04-06 14:33:41 +0800408 sd_auditor.walk_all(suite_data_files)
Pengyu Lvebf011f2023-04-11 13:39:31 +0800409 audit_results = td_auditor.audit_data + sd_auditor.audit_data
410
411 # we filter out the files whose validity duration covers the provide
412 # duration.
413 filter_func = lambda d: (not_before_date < d.not_valid_before) or \
414 (d.not_valid_after < not_after_date)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800415
416 if args.all:
Pengyu Lvebf011f2023-04-11 13:39:31 +0800417 filter_func = None
418
419 for d in filter(filter_func, audit_results):
420 list_all(d)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800421
422 print("\nDone!\n")
423
424if __name__ == "__main__":
425 main()