blob: ba4a01d56c85edd7c67f2613a024941b4aab140c [file] [log] [blame]
Gilles Peskine40b3f412019-10-13 21:44:25 +02001#!/usr/bin/env python3
2
Gilles Peskine42f384c2020-03-27 09:23:38 +01003"""Assemble Mbed TLS change log entries into the change log file.
Gilles Peskinea2607962020-01-28 19:58:17 +01004
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
Gilles Peskine28af9582020-03-26 22:39:18 +01008
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
Gilles Peskine40b3f412019-10-13 21:44:25 +020019"""
20
21# Copyright (C) 2019, Arm Limited, All Rights Reserved
22# SPDX-License-Identifier: Apache-2.0
23#
24# Licensed under the Apache License, Version 2.0 (the "License"); you may
25# not use this file except in compliance with the License.
26# You may obtain a copy of the License at
27#
28# http://www.apache.org/licenses/LICENSE-2.0
29#
30# Unless required by applicable law or agreed to in writing, software
31# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
32# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33# See the License for the specific language governing permissions and
34# limitations under the License.
35#
Gilles Peskine42f384c2020-03-27 09:23:38 +010036# This file is part of Mbed TLS (https://tls.mbed.org)
Gilles Peskine40b3f412019-10-13 21:44:25 +020037
38import argparse
Gilles Peskine6e97c432020-03-27 19:05:18 +010039from collections import OrderedDict, namedtuple
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010040import datetime
41import functools
Gilles Peskine40b3f412019-10-13 21:44:25 +020042import glob
43import os
44import re
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010045import subprocess
Gilles Peskine40b3f412019-10-13 21:44:25 +020046import sys
47
48class InputFormatError(Exception):
49 def __init__(self, filename, line_number, message, *args, **kwargs):
Gilles Peskine566407d2020-01-22 15:55:36 +010050 message = '{}:{}: {}'.format(filename, line_number,
51 message.format(*args, **kwargs))
52 super().__init__(message)
Gilles Peskine40b3f412019-10-13 21:44:25 +020053
Gilles Peskine2b242492020-01-22 15:41:50 +010054class LostContent(Exception):
55 def __init__(self, filename, line):
56 message = ('Lost content from {}: "{}"'.format(filename, line))
57 super().__init__(message)
58
Gilles Peskine6e97c432020-03-27 19:05:18 +010059STANDARD_CATEGORIES = (
60 b'API changes',
Gilles Peskine40b3f412019-10-13 21:44:25 +020061 b'Default behavior changes',
62 b'Requirement changes',
63 b'New deprecations',
64 b'Removals',
Gilles Peskine6e97c432020-03-27 19:05:18 +010065 b'Features',
Gilles Peskine40b3f412019-10-13 21:44:25 +020066 b'Security',
Gilles Peskine6e97c432020-03-27 19:05:18 +010067 b'Bugfix',
68 b'Changes',
Gilles Peskine40b3f412019-10-13 21:44:25 +020069)
70
Gilles Peskine6e97c432020-03-27 19:05:18 +010071CategoryContent = namedtuple('CategoryContent', [
72 'name', 'title_line', # Title text and line number of the title
73 'body', 'body_line', # Body text and starting line number of the body
74])
75
76class ChangelogFormat:
77 """Virtual class documenting how to write a changelog format class."""
78
79 @classmethod
80 def extract_top_version(cls, changelog_file_content):
81 """Split out the top version section.
82
83 Return ``(header, top_version_title, top_version_body, trailer)``
84 where ``changelog_file_content == header + top_version_title +
85 top_version_body + trailer``.
86 """
87 raise NotImplementedError
88
89 @classmethod
90 def version_title_text(cls, version_title):
91 """Return the text of a formatted version section title."""
92 raise NotImplementedError
93
94 @classmethod
95 def split_categories(cls, version_body):
96 """Split a changelog version section body into categories.
97
98 Return a list of `CategoryContent` the name is category title
99 without any formatting.
100 """
101 raise NotImplementedError
102
103 @classmethod
104 def format_category(cls, title, body):
105 """Construct the text of a category section from its title and body."""
106 raise NotImplementedError
107
108class TextChangelogFormat(ChangelogFormat):
109 """The traditional Mbed TLS changelog format."""
110
111 _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
112 re.DOTALL)
113 @classmethod
114 def extract_top_version(cls, changelog_file_content):
115 """A version section starts with a line starting with '='."""
116 m = re.search(cls._top_version_re, changelog_file_content)
117 top_version_start = m.start(1)
118 top_version_end = m.end(2)
119 return (changelog_file_content[:top_version_start],
120 m.group(1), m.group(2),
121 changelog_file_content[top_version_end:])
122
123 @classmethod
124 def version_title_text(cls, version_title):
125 return re.sub(br'\n.*', version_title, re.DOTALL)
126
127 _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
128 @classmethod
129 def split_categories(cls, version_body):
130 """A category title is a line with the title in column 0."""
131 title_matches = list(re.finditer(cls._category_title_re, version_body))
132 if not title_matches:
133 return []
134 title_starts = [m.start(1) for m in title_matches]
135 body_starts = [m.end(0) for m in title_matches]
136 body_ends = title_starts[1:] + [len(version_body)]
137 bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
138 for (body_start, body_end) in zip(body_starts, body_ends)]
139 title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
140 body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
141 return [CategoryContent(title_match.group(1), title_line,
142 body, body_line)
143 for title_match, title_line, body, body_line
144 in zip(title_matches, title_lines, bodies, body_lines)]
145
146 @classmethod
147 def format_category(cls, title, body):
148 # `split_categories` ensures that each body ends with a newline.
149 # Make sure that there is additionally a blank line between categories.
150 if not body.endswith(b'\n\n'):
151 body += b'\n'
152 return title + b'\n' + body
153
Gilles Peskine40b3f412019-10-13 21:44:25 +0200154class ChangeLog:
Gilles Peskine42f384c2020-03-27 09:23:38 +0100155 """An Mbed TLS changelog.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200156
Gilles Peskine6e97c432020-03-27 19:05:18 +0100157 A changelog file consists of some header text followed by one or
158 more version sections. The version sections are in reverse
159 chronological order. Each version section consists of a title and a body.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200160
Gilles Peskine6e97c432020-03-27 19:05:18 +0100161 The body of a version section consists of zero or more category
162 subsections. Each category subsection consists of a title and a body.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200163
Gilles Peskine6e97c432020-03-27 19:05:18 +0100164 A changelog entry file has the same format as the body of a version section.
165
166 A `ChangelogFormat` object defines the concrete syntax of the changelog.
167 Entry files must have the same format as the changelog file.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200168 """
169
Gilles Peskinea2607962020-01-28 19:58:17 +0100170 # Only accept dotted version numbers (e.g. "3.1", not "3").
Gilles Peskineafc9db82020-01-30 11:38:01 +0100171 # Refuse ".x" in a version number where x is a letter: this indicates
172 # a version that is not yet released. Something like "3.1a" is accepted.
173 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
174 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Gilles Peskinea2607962020-01-28 19:58:17 +0100175
176 def section_is_released_version(self, title):
177 """Whether this section is for a released version.
178
179 True if the given level-2 section title indicates that this section
180 contains released changes, otherwise False.
181 """
182 # Assume that a released version has a numerical version number
183 # that follows a particular pattern. These criteria may be revised
184 # as needed in future versions of this script.
185 version_number = re.search(self._version_number_re, title)
Gilles Peskineafc9db82020-01-30 11:38:01 +0100186 if version_number:
187 return not re.search(self._incomplete_version_number_re,
188 version_number.group(0))
189 else:
190 return False
Gilles Peskinea2607962020-01-28 19:58:17 +0100191
192 def unreleased_version_title(self):
193 """The title to use if creating a new section for an unreleased version."""
194 # pylint: disable=no-self-use; this method may be overridden
195 return b'Unreleased changes'
196
Gilles Peskine6e97c432020-03-27 19:05:18 +0100197 def add_categories_from_text(self, filename, line_offset,
198 text, allow_unknown_category):
199 """Parse a version section or entry file."""
200 categories = self.format.split_categories(text)
201 for category in categories:
202 if not allow_unknown_category and \
203 category.name not in self.categories:
204 raise InputFormatError(filename,
205 line_offset + category.title_line,
206 'Unknown category: "{}"',
207 category.name.decode('utf8'))
208 self.categories[category.name] += category.body
209
210 def __init__(self, input_stream, changelog_format):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200211 """Create a changelog object.
212
Gilles Peskine974232f2020-01-22 12:43:29 +0100213 Populate the changelog object from the content of the file
Gilles Peskine6e97c432020-03-27 19:05:18 +0100214 input_stream.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200215 """
Gilles Peskine6e97c432020-03-27 19:05:18 +0100216 self.format = changelog_format
217 whole_file = input_stream.read()
218 (self.header,
219 self.top_version_title, top_version_body,
220 self.trailer) = self.format.extract_top_version(whole_file)
221 # Split the top version section into categories.
222 self.categories = OrderedDict()
223 for category in STANDARD_CATEGORIES:
224 self.categories[category] = b''
225 offset = (self.header + self.top_version_title).count(b'\n')
226 self.add_categories_from_text(input_stream.name, offset,
227 top_version_body, True)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200228
229 def add_file(self, input_stream):
230 """Add changelog entries from a file.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200231 """
Gilles Peskine6e97c432020-03-27 19:05:18 +0100232 self.add_categories_from_text(input_stream.name, 0,
233 input_stream.read(), False)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200234
235 def write(self, filename):
236 """Write the changelog to the specified file.
237 """
238 with open(filename, 'wb') as out:
Gilles Peskine6e97c432020-03-27 19:05:18 +0100239 out.write(self.header)
240 out.write(self.top_version_title)
241 for title, body in self.categories.items():
242 if not body:
Gilles Peskine40b3f412019-10-13 21:44:25 +0200243 continue
Gilles Peskine6e97c432020-03-27 19:05:18 +0100244 out.write(self.format.format_category(title, body))
245 out.write(self.trailer)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200246
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100247
248@functools.total_ordering
Gilles Peskine28af9582020-03-26 22:39:18 +0100249class EntryFileSortKey:
250 """This classes defines an ordering on changelog entry files: older < newer.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100251
Gilles Peskine28af9582020-03-26 22:39:18 +0100252 * Merged entry files are sorted according to their merge date (date of
253 the merge commit that brought the commit that created the file into
254 the target branch).
255 * Committed but unmerged entry files are sorted according to the date
256 of the commit that adds them.
257 * Uncommitted entry files are sorted according to their modification time.
258
259 This class assumes that the file is in a git working directory with
260 the target branch checked out.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100261 """
262
263 # Categories of files. A lower number is considered older.
264 MERGED = 0
265 COMMITTED = 1
266 LOCAL = 2
267
268 @staticmethod
269 def creation_hash(filename):
270 """Return the git commit id at which the given file was created.
271
272 Return None if the file was never checked into git.
273 """
Gilles Peskine98a53aa2020-03-26 22:47:07 +0100274 hashes = subprocess.check_output(['git', 'log', '--format=%H',
275 '--follow',
276 '--', filename])
Gilles Peskine13dc6342020-03-26 22:46:47 +0100277 m = re.search(b'(.+)$', hashes)
278 if not m:
279 # The git output is empty. This means that the file was
280 # never checked in.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100281 return None
Gilles Peskine13dc6342020-03-26 22:46:47 +0100282 # The last commit in the log is the oldest one, which is when the
283 # file was created.
284 return m.group(0)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100285
286 @staticmethod
287 def list_merges(some_hash, target, *options):
288 """List merge commits from some_hash to target.
289
290 Pass options to git to select which commits are included.
291 """
292 text = subprocess.check_output(['git', 'rev-list',
293 '--merges', *options,
294 b'..'.join([some_hash, target])])
295 return text.rstrip(b'\n').split(b'\n')
296
297 @classmethod
298 def merge_hash(cls, some_hash):
299 """Return the git commit id at which the given commit was merged.
300
301 Return None if the given commit was never merged.
302 """
303 target = b'HEAD'
304 # List the merges from some_hash to the target in two ways.
305 # The ancestry list is the ones that are both descendants of
306 # some_hash and ancestors of the target.
307 ancestry = frozenset(cls.list_merges(some_hash, target,
308 '--ancestry-path'))
309 # The first_parents list only contains merges that are directly
310 # on the target branch. We want it in reverse order (oldest first).
311 first_parents = cls.list_merges(some_hash, target,
312 '--first-parent', '--reverse')
313 # Look for the oldest merge commit that's both on the direct path
314 # and directly on the target branch. That's the place where some_hash
315 # was merged on the target branch. See
316 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
317 for commit in first_parents:
318 if commit in ancestry:
319 return commit
320 return None
321
322 @staticmethod
323 def commit_timestamp(commit_id):
Gilles Peskineac0f0862020-03-27 10:56:45 +0100324 """Return the timestamp of the given commit."""
325 text = subprocess.check_output(['git', 'show', '-s',
326 '--format=%ct',
327 commit_id])
328 return datetime.datetime.utcfromtimestamp(int(text))
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100329
330 @staticmethod
331 def file_timestamp(filename):
332 """Return the modification timestamp of the given file."""
333 mtime = os.stat(filename).st_mtime
334 return datetime.datetime.fromtimestamp(mtime)
335
336 def __init__(self, filename):
Gilles Peskine28af9582020-03-26 22:39:18 +0100337 """Determine position of the file in the changelog entry order.
338
339 This constructor returns an object that can be used with comparison
340 operators, with `sort` and `sorted`, etc. Older entries are sorted
341 before newer entries.
342 """
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100343 self.filename = filename
344 creation_hash = self.creation_hash(filename)
345 if not creation_hash:
346 self.category = self.LOCAL
347 self.datetime = self.file_timestamp(filename)
348 return
349 merge_hash = self.merge_hash(creation_hash)
350 if not merge_hash:
351 self.category = self.COMMITTED
352 self.datetime = self.commit_timestamp(creation_hash)
353 return
354 self.category = self.MERGED
355 self.datetime = self.commit_timestamp(merge_hash)
356
357 def sort_key(self):
Gilles Peskine28af9582020-03-26 22:39:18 +0100358 """"Return a concrete sort key for this entry file sort key object.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100359
Gilles Peskine28af9582020-03-26 22:39:18 +0100360 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100361 """
362 return (self.category, self.datetime, self.filename)
363
364 def __eq__(self, other):
365 return self.sort_key() == other.sort_key()
366
367 def __lt__(self, other):
368 return self.sort_key() < other.sort_key()
369
370
Gilles Peskine2b242492020-01-22 15:41:50 +0100371def check_output(generated_output_file, main_input_file, merged_files):
372 """Make sanity checks on the generated output.
373
374 The intent of these sanity checks is to have reasonable confidence
375 that no content has been lost.
376
377 The sanity check is that every line that is present in an input file
378 is also present in an output file. This is not perfect but good enough
379 for now.
380 """
381 generated_output = set(open(generated_output_file, 'rb'))
382 for line in open(main_input_file, 'rb'):
383 if line not in generated_output:
384 raise LostContent('original file', line)
385 for merged_file in merged_files:
386 for line in open(merged_file, 'rb'):
387 if line not in generated_output:
388 raise LostContent(merged_file, line)
389
390def finish_output(changelog, output_file, input_file, merged_files):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200391 """Write the changelog to the output file.
392
Gilles Peskine2b242492020-01-22 15:41:50 +0100393 The input file and the list of merged files are used only for sanity
394 checks on the output.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200395 """
396 if os.path.exists(output_file) and not os.path.isfile(output_file):
397 # The output is a non-regular file (e.g. pipe). Write to it directly.
398 output_temp = output_file
399 else:
400 # The output is a regular file. Write to a temporary file,
401 # then move it into place atomically.
402 output_temp = output_file + '.tmp'
403 changelog.write(output_temp)
Gilles Peskine2b242492020-01-22 15:41:50 +0100404 check_output(output_temp, input_file, merged_files)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200405 if output_temp != output_file:
406 os.rename(output_temp, output_file)
407
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100408def remove_merged_entries(files_to_remove):
409 for filename in files_to_remove:
410 os.remove(filename)
411
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100412def list_files_to_merge(options):
413 """List the entry files to merge, oldest first.
414
Gilles Peskine28af9582020-03-26 22:39:18 +0100415 "Oldest" is defined by `EntryFileSortKey`.
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100416 """
Gilles Peskine6e97c432020-03-27 19:05:18 +0100417 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
Gilles Peskine7fa3eb72020-03-26 22:41:32 +0100418 files_to_merge.sort(key=EntryFileSortKey)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100419 return files_to_merge
420
Gilles Peskine40b3f412019-10-13 21:44:25 +0200421def merge_entries(options):
422 """Merge changelog entries into the changelog file.
423
424 Read the changelog file from options.input.
425 Read entries to merge from the directory options.dir.
426 Write the new changelog to options.output.
427 Remove the merged entries if options.keep_entries is false.
428 """
429 with open(options.input, 'rb') as input_file:
Gilles Peskine6e97c432020-03-27 19:05:18 +0100430 changelog = ChangeLog(input_file, TextChangelogFormat)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100431 files_to_merge = list_files_to_merge(options)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200432 if not files_to_merge:
433 sys.stderr.write('There are no pending changelog entries.\n')
434 return
435 for filename in files_to_merge:
436 with open(filename, 'rb') as input_file:
437 changelog.add_file(input_file)
Gilles Peskine2b242492020-01-22 15:41:50 +0100438 finish_output(changelog, options.output, options.input, files_to_merge)
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100439 if not options.keep_entries:
440 remove_merged_entries(files_to_merge)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200441
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100442def show_file_timestamps(options):
443 """List the files to merge and their timestamp.
444
445 This is only intended for debugging purposes.
446 """
447 files = list_files_to_merge(options)
448 for filename in files:
Gilles Peskine28af9582020-03-26 22:39:18 +0100449 ts = EntryFileSortKey(filename)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100450 print(ts.category, ts.datetime, filename)
451
Gilles Peskine40b3f412019-10-13 21:44:25 +0200452def set_defaults(options):
453 """Add default values for missing options."""
454 output_file = getattr(options, 'output', None)
455 if output_file is None:
456 options.output = options.input
457 if getattr(options, 'keep_entries', None) is None:
458 options.keep_entries = (output_file is not None)
459
460def main():
461 """Command line entry point."""
462 parser = argparse.ArgumentParser(description=__doc__)
463 parser.add_argument('--dir', '-d', metavar='DIR',
464 default='ChangeLog.d',
Gilles Peskine6e910092020-01-22 15:58:18 +0100465 help='Directory to read entries from'
466 ' (default: ChangeLog.d)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200467 parser.add_argument('--input', '-i', metavar='FILE',
Gilles Peskine6e97c432020-03-27 19:05:18 +0100468 default='ChangeLog',
Gilles Peskine6e910092020-01-22 15:58:18 +0100469 help='Existing changelog file to read from and augment'
Gilles Peskine6e97c432020-03-27 19:05:18 +0100470 ' (default: ChangeLog)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200471 parser.add_argument('--keep-entries',
472 action='store_true', dest='keep_entries', default=None,
Gilles Peskine6e910092020-01-22 15:58:18 +0100473 help='Keep the files containing entries'
474 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200475 parser.add_argument('--no-keep-entries',
476 action='store_false', dest='keep_entries',
Gilles Peskine6e910092020-01-22 15:58:18 +0100477 help='Remove the files containing entries after they are merged'
478 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200479 parser.add_argument('--output', '-o', metavar='FILE',
Gilles Peskine6e910092020-01-22 15:58:18 +0100480 help='Output changelog file'
481 ' (default: overwrite the input)')
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100482 parser.add_argument('--list-files-only',
483 action='store_true',
Gilles Peskinec68c7c82020-03-27 19:01:35 +0100484 help=('Only list the files that would be processed '
Gilles Peskineac0f0862020-03-27 10:56:45 +0100485 '(with some debugging information)'))
Gilles Peskine40b3f412019-10-13 21:44:25 +0200486 options = parser.parse_args()
487 set_defaults(options)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100488 if options.list_files_only:
489 show_file_timestamps(options)
490 return
Gilles Peskine40b3f412019-10-13 21:44:25 +0200491 merge_entries(options)
492
493if __name__ == '__main__':
494 main()