blob: 5e22bfca908b80f5efb61d55596f14061fe4e1dc [file] [log] [blame]
Pengyu Lv7f6933a2023-04-04 16:05:54 +08001#!/usr/bin/env python3
2#
3# copyright the mbed tls contributors
4# spdx-license-identifier: apache-2.0
5#
6# licensed under the apache license, version 2.0 (the "license"); you may
7# not use this file except in compliance with the license.
8# you may obtain a copy of the license at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Audit validity date of X509 crt/crl/csr
19
20This script is used to audit the validity date of crt/crl/csr used for testing.
21The files are in tests/data_files/ while some data are in test suites data in
22tests/suites/*.data files.
23"""
24
25import os
26import sys
27import re
28import typing
29import types
30import argparse
31import datetime
Pengyu Lv45e32032023-04-06 14:33:41 +080032import glob
Pengyu Lv7f6933a2023-04-04 16:05:54 +080033from enum import Enum
34
35from cryptography import x509
36
Pengyu Lv30f26832023-04-07 18:04:07 +080037# reuse the function to parse *.data file in tests/suites/
38from generate_test_code import parse_test_data as parse_suite_data
39
Pengyu Lv7f6933a2023-04-04 16:05:54 +080040class DataType(Enum):
41 CRT = 1 # Certificate
42 CRL = 2 # Certificate Revocation List
43 CSR = 3 # Certificate Signing Request
44
45class DataFormat(Enum):
46 PEM = 1 # Privacy-Enhanced Mail
47 DER = 2 # Distinguished Encoding Rules
48
49class AuditData:
50 """Store file, type and expiration date for audit."""
51 #pylint: disable=too-few-public-methods
52 def __init__(self, data_type: DataType):
53 self.data_type = data_type
54 self.filename = ""
55 self.not_valid_after: datetime.datetime
56 self.not_valid_before: datetime.datetime
57
58 def fill_validity_duration(self, x509_obj):
59 """Fill expiration_date field from a x509 object"""
60 # Certificate expires after "not_valid_after"
61 # Certificate is invalid before "not_valid_before"
62 if self.data_type == DataType.CRT:
63 self.not_valid_after = x509_obj.not_valid_after
64 self.not_valid_before = x509_obj.not_valid_before
65 # CertificateRevocationList expires after "next_update"
66 # CertificateRevocationList is invalid before "last_update"
67 elif self.data_type == DataType.CRL:
68 self.not_valid_after = x509_obj.next_update
69 self.not_valid_before = x509_obj.last_update
70 # CertificateSigningRequest is always valid.
71 elif self.data_type == DataType.CSR:
72 self.not_valid_after = datetime.datetime.max
73 self.not_valid_before = datetime.datetime.min
74 else:
75 raise ValueError("Unsupported file_type: {}".format(self.data_type))
76
77class X509Parser():
78 """A parser class to parse crt/crl/csr file or data in PEM/DER format."""
79 PEM_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n(?P<data>.*?)-{5}END (?P=type)-{5}\n'
80 PEM_TAG_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n'
81 PEM_TAGS = {
82 DataType.CRT: 'CERTIFICATE',
83 DataType.CRL: 'X509 CRL',
84 DataType.CSR: 'CERTIFICATE REQUEST'
85 }
86
87 def __init__(self, backends: dict):
88 self.backends = backends
89 self.__generate_parsers()
90
91 def __generate_parser(self, data_type: DataType):
92 """Parser generator for a specific DataType"""
93 tag = self.PEM_TAGS[data_type]
94 pem_loader = self.backends[data_type][DataFormat.PEM]
95 der_loader = self.backends[data_type][DataFormat.DER]
96 def wrapper(data: bytes):
97 pem_type = X509Parser.pem_data_type(data)
98 # It is in PEM format with target tag
99 if pem_type == tag:
100 return pem_loader(data)
101 # It is in PEM format without target tag
102 if pem_type:
103 return None
104 # It might be in DER format
105 try:
106 result = der_loader(data)
107 except ValueError:
108 result = None
109 return result
110 wrapper.__name__ = "{}.parser[{}]".format(type(self).__name__, tag)
111 return wrapper
112
113 def __generate_parsers(self):
114 """Generate parsers for all support DataType"""
115 self.parsers = {}
116 for data_type, _ in self.PEM_TAGS.items():
117 self.parsers[data_type] = self.__generate_parser(data_type)
118
119 def __getitem__(self, item):
120 return self.parsers[item]
121
122 @staticmethod
123 def pem_data_type(data: bytes) -> str:
124 """Get the tag from the data in PEM format
125
126 :param data: data to be checked in binary mode.
127 :return: PEM tag or "" when no tag detected.
128 """
129 m = re.search(X509Parser.PEM_TAG_REGEX, data)
130 if m is not None:
131 return m.group('type').decode('UTF-8')
132 else:
133 return ""
134
Pengyu Lv30f26832023-04-07 18:04:07 +0800135 @staticmethod
136 def check_hex_string(hex_str: str) -> bool:
137 """Check if the hex string is possibly DER data."""
138 hex_len = len(hex_str)
139 # At least 6 hex char for 3 bytes: Type + Length + Content
140 if hex_len < 6:
141 return False
142 # Check if Type (1 byte) is SEQUENCE.
143 if hex_str[0:2] != '30':
144 return False
145 # Check LENGTH (1 byte) value
146 content_len = int(hex_str[2:4], base=16)
147 consumed = 4
148 if content_len in (128, 255):
149 # Indefinite or Reserved
150 return False
151 elif content_len > 127:
152 # Definite, Long
153 length_len = (content_len - 128) * 2
154 content_len = int(hex_str[consumed:consumed+length_len], base=16)
155 consumed += length_len
156 # Check LENGTH
157 if hex_len != content_len * 2 + consumed:
158 return False
159 return True
160
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800161class Auditor:
162 """A base class for audit."""
163 def __init__(self, verbose):
164 self.verbose = verbose
165 self.default_files = []
166 self.audit_data = []
167 self.parser = X509Parser({
168 DataType.CRT: {
169 DataFormat.PEM: x509.load_pem_x509_certificate,
170 DataFormat.DER: x509.load_der_x509_certificate
171 },
172 DataType.CRL: {
173 DataFormat.PEM: x509.load_pem_x509_crl,
174 DataFormat.DER: x509.load_der_x509_crl
175 },
176 DataType.CSR: {
177 DataFormat.PEM: x509.load_pem_x509_csr,
178 DataFormat.DER: x509.load_der_x509_csr
179 },
180 })
181
182 def error(self, *args):
183 #pylint: disable=no-self-use
184 print("Error: ", *args, file=sys.stderr)
185
186 def warn(self, *args):
187 if self.verbose:
188 print("Warn: ", *args, file=sys.stderr)
189
190 def parse_file(self, filename: str) -> typing.List[AuditData]:
191 """
192 Parse a list of AuditData from file.
193
194 :param filename: name of the file to parse.
195 :return list of AuditData parsed from the file.
196 """
197 with open(filename, 'rb') as f:
198 data = f.read()
199 result_list = []
200 result = self.parse_bytes(data)
201 if result is not None:
202 result.filename = filename
203 result_list.append(result)
204 return result_list
205
206 def parse_bytes(self, data: bytes):
207 """Parse AuditData from bytes."""
208 for data_type in list(DataType):
209 try:
210 result = self.parser[data_type](data)
211 except ValueError as val_error:
212 result = None
213 self.warn(val_error)
214 if result is not None:
215 audit_data = AuditData(data_type)
216 audit_data.fill_validity_duration(result)
217 return audit_data
218 return None
219
220 def walk_all(self, file_list):
221 """
222 Iterate over all the files in the list and get audit data.
223 """
224 if not file_list:
225 file_list = self.default_files
226 for filename in file_list:
227 data_list = self.parse_file(filename)
228 self.audit_data.extend(data_list)
229
230 def for_each(self, do, *args, **kwargs):
231 """
232 Sort the audit data and iterate over them.
233 """
234 if not isinstance(do, types.FunctionType):
235 return
236 for d in self.audit_data:
237 do(d, *args, **kwargs)
238
239 @staticmethod
240 def find_test_dir():
241 """Get the relative path for the MbedTLS test directory."""
242 if os.path.isdir('tests'):
243 tests_dir = 'tests'
244 elif os.path.isdir('suites'):
245 tests_dir = '.'
246 elif os.path.isdir('../suites'):
247 tests_dir = '..'
248 else:
249 raise Exception("Mbed TLS source tree not found")
250 return tests_dir
251
252class TestDataAuditor(Auditor):
253 """Class for auditing files in tests/data_files/"""
254 def __init__(self, verbose):
255 super().__init__(verbose)
256 self.default_files = self.collect_default_files()
257
258 def collect_default_files(self):
Pengyu Lv45e32032023-04-06 14:33:41 +0800259 """Collect all files in tests/data_files/"""
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800260 test_dir = self.find_test_dir()
261 test_data_folder = os.path.join(test_dir, 'data_files')
262 data_files = []
263 for (dir_path, _, file_names) in os.walk(test_data_folder):
264 data_files.extend(os.path.join(dir_path, file_name)
265 for file_name in file_names)
266 return data_files
267
Pengyu Lv30f26832023-04-07 18:04:07 +0800268class FileWrapper():
269 """
270 This a stub class of generate_test_code.FileWrapper.
271
272 This class reads the whole file to memory before iterating
273 over the lines.
274 """
275
276 def __init__(self, file_name):
277 """
278 Read the file and initialize the line number to 0.
279
280 :param file_name: File path to open.
281 """
282 with open(file_name, 'rb') as f:
283 self.buf = f.read()
284 self.buf_len = len(self.buf)
285 self._line_no = 0
286 self._line_start = 0
287
288 def __iter__(self):
289 """Make the class iterable."""
290 return self
291
292 def __next__(self):
293 """
294 This method for returning a line of the file per iteration.
295
296 :return: Line read from file.
297 """
298 # If we reach the end of the file.
299 if not self._line_start < self.buf_len:
300 raise StopIteration
301
302 line_end = self.buf.find(b'\n', self._line_start) + 1
303 if line_end > 0:
304 # Find the first LF as the end of the new line.
305 line = self.buf[self._line_start:line_end]
306 self._line_start = line_end
307 self._line_no += 1
308 else:
309 # No LF found. We are at the last line without LF.
310 line = self.buf[self._line_start:]
311 self._line_start = self.buf_len
312 self._line_no += 1
313
314 # Convert byte array to string with correct encoding and
315 # strip any whitespaces added in the decoding process.
316 return line.decode(sys.getdefaultencoding()).rstrip() + '\n'
317
318 def get_line_no(self):
319 """
320 Gives current line number.
321 """
322 return self._line_no
323
324 line_no = property(get_line_no)
325
Pengyu Lv45e32032023-04-06 14:33:41 +0800326class SuiteDataAuditor(Auditor):
327 """Class for auditing files in tests/suites/*.data"""
328 def __init__(self, options):
329 super().__init__(options)
330 self.default_files = self.collect_default_files()
331
332 def collect_default_files(self):
333 """Collect all files in tests/suites/*.data"""
334 test_dir = self.find_test_dir()
335 suites_data_folder = os.path.join(test_dir, 'suites')
Pengyu Lv45e32032023-04-06 14:33:41 +0800336 data_files = glob.glob(os.path.join(suites_data_folder, '*.data'))
337 return data_files
338
339 def parse_file(self, filename: str):
Pengyu Lv30f26832023-04-07 18:04:07 +0800340 """
341 Parse a list of AuditData from file.
342
343 :param filename: name of the file to parse.
344 :return list of AuditData parsed from the file.
345 """
Pengyu Lv45e32032023-04-06 14:33:41 +0800346 audit_data_list = []
Pengyu Lv30f26832023-04-07 18:04:07 +0800347 data_f = FileWrapper(filename)
348 for _, _, _, test_args in parse_suite_data(data_f):
349 for test_arg in test_args:
350 match = re.match(r'"(?P<data>[0-9a-fA-F]+)"', test_arg)
351 if not match:
352 continue
353 if not X509Parser.check_hex_string(match.group('data')):
354 continue
355 audit_data = self.parse_bytes(bytes.fromhex(match.group('data')))
356 if audit_data is None:
357 continue
358 audit_data.filename = filename
359 audit_data_list.append(audit_data)
360
Pengyu Lv45e32032023-04-06 14:33:41 +0800361 return audit_data_list
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800362
363def list_all(audit_data: AuditData):
364 print("{}\t{}\t{}\t{}".format(
365 audit_data.not_valid_before.isoformat(timespec='seconds'),
366 audit_data.not_valid_after.isoformat(timespec='seconds'),
367 audit_data.data_type.name,
368 audit_data.filename))
369
370def main():
371 """
372 Perform argument parsing.
373 """
374 parser = argparse.ArgumentParser(
375 description='Audit script for X509 crt/crl/csr files.'
376 )
377
378 parser.add_argument('-a', '--all',
379 action='store_true',
380 help='list the information of all files')
381 parser.add_argument('-v', '--verbose',
382 action='store_true', dest='verbose',
383 help='Show warnings')
384 parser.add_argument('-f', '--file', dest='file',
385 help='file to audit (Debug only)',
386 metavar='FILE')
387
388 args = parser.parse_args()
389
390 # start main routine
391 td_auditor = TestDataAuditor(args.verbose)
Pengyu Lv45e32032023-04-06 14:33:41 +0800392 sd_auditor = SuiteDataAuditor(args.verbose)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800393
394 if args.file:
395 data_files = [args.file]
Pengyu Lv45e32032023-04-06 14:33:41 +0800396 suite_data_files = [args.file]
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800397 else:
398 data_files = td_auditor.default_files
Pengyu Lv45e32032023-04-06 14:33:41 +0800399 suite_data_files = sd_auditor.default_files
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800400
401 td_auditor.walk_all(data_files)
Pengyu Lv45e32032023-04-06 14:33:41 +0800402 sd_auditor.walk_all(suite_data_files)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800403
404 if args.all:
405 td_auditor.for_each(list_all)
Pengyu Lv45e32032023-04-06 14:33:41 +0800406 sd_auditor.for_each(list_all)
Pengyu Lv7f6933a2023-04-04 16:05:54 +0800407
408 print("\nDone!\n")
409
410if __name__ == "__main__":
411 main()