blob: ea91578c938258e60fd4b2f90796736aaf265cc8 [file] [log] [blame]
Gilles Peskine40b3f412019-10-13 21:44:25 +02001#!/usr/bin/env python3
2
3"""Assemble Mbed Crypto change log entries into the change log file.
Gilles Peskinea2607962020-01-28 19:58:17 +01004
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
Gilles Peskine28af9582020-03-26 22:39:18 +01008
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
Gilles Peskine40b3f412019-10-13 21:44:25 +020019"""
20
21# Copyright (C) 2019, Arm Limited, All Rights Reserved
22# SPDX-License-Identifier: Apache-2.0
23#
24# Licensed under the Apache License, Version 2.0 (the "License"); you may
25# not use this file except in compliance with the License.
26# You may obtain a copy of the License at
27#
28# http://www.apache.org/licenses/LICENSE-2.0
29#
30# Unless required by applicable law or agreed to in writing, software
31# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
32# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33# See the License for the specific language governing permissions and
34# limitations under the License.
35#
36# This file is part of Mbed Crypto (https://tls.mbed.org)
37
38import argparse
Gilles Peskined8b6c772020-01-28 18:57:47 +010039from collections import OrderedDict
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010040import datetime
41import functools
Gilles Peskine40b3f412019-10-13 21:44:25 +020042import glob
43import os
44import re
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010045import subprocess
Gilles Peskine40b3f412019-10-13 21:44:25 +020046import sys
47
48class InputFormatError(Exception):
49 def __init__(self, filename, line_number, message, *args, **kwargs):
Gilles Peskine566407d2020-01-22 15:55:36 +010050 message = '{}:{}: {}'.format(filename, line_number,
51 message.format(*args, **kwargs))
52 super().__init__(message)
Gilles Peskine40b3f412019-10-13 21:44:25 +020053
Gilles Peskine2b242492020-01-22 15:41:50 +010054class LostContent(Exception):
55 def __init__(self, filename, line):
56 message = ('Lost content from {}: "{}"'.format(filename, line))
57 super().__init__(message)
58
Gilles Peskine40b3f412019-10-13 21:44:25 +020059STANDARD_SECTIONS = (
60 b'Interface changes',
61 b'Default behavior changes',
62 b'Requirement changes',
63 b'New deprecations',
64 b'Removals',
65 b'New features',
66 b'Security',
67 b'Bug fixes',
68 b'Performance improvements',
69 b'Other changes',
70)
71
72class ChangeLog:
73 """An Mbed Crypto changelog.
74
75 A changelog is a file in Markdown format. Each level 2 section title
76 starts a version, and versions are sorted in reverse chronological
77 order. Lines with a level 2 section title must start with '##'.
78
79 Within a version, there are multiple sections, each devoted to a kind
80 of change: bug fix, feature request, etc. Section titles should match
81 entries in STANDARD_SECTIONS exactly.
82
83 Within each section, each separate change should be on a line starting
84 with a '*' bullet. There may be blank lines surrounding titles, but
85 there should not be any blank line inside a section.
86 """
87
88 _title_re = re.compile(br'#*')
89 def title_level(self, line):
90 """Determine whether the line is a title.
91
92 Return (level, content) where level is the Markdown section level
93 (1 for '#', 2 for '##', etc.) and content is the section title
94 without leading or trailing whitespace. For a non-title line,
95 the level is 0.
96 """
97 level = re.match(self._title_re, line).end()
98 return level, line[level:].strip()
99
Gilles Peskinea2607962020-01-28 19:58:17 +0100100 # Only accept dotted version numbers (e.g. "3.1", not "3").
Gilles Peskineafc9db82020-01-30 11:38:01 +0100101 # Refuse ".x" in a version number where x is a letter: this indicates
102 # a version that is not yet released. Something like "3.1a" is accepted.
103 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
104 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Gilles Peskinea2607962020-01-28 19:58:17 +0100105
106 def section_is_released_version(self, title):
107 """Whether this section is for a released version.
108
109 True if the given level-2 section title indicates that this section
110 contains released changes, otherwise False.
111 """
112 # Assume that a released version has a numerical version number
113 # that follows a particular pattern. These criteria may be revised
114 # as needed in future versions of this script.
115 version_number = re.search(self._version_number_re, title)
Gilles Peskineafc9db82020-01-30 11:38:01 +0100116 if version_number:
117 return not re.search(self._incomplete_version_number_re,
118 version_number.group(0))
119 else:
120 return False
Gilles Peskinea2607962020-01-28 19:58:17 +0100121
122 def unreleased_version_title(self):
123 """The title to use if creating a new section for an unreleased version."""
124 # pylint: disable=no-self-use; this method may be overridden
125 return b'Unreleased changes'
126
Gilles Peskine40b3f412019-10-13 21:44:25 +0200127 def __init__(self, input_stream):
128 """Create a changelog object.
129
Gilles Peskine974232f2020-01-22 12:43:29 +0100130 Populate the changelog object from the content of the file
131 input_stream. This is typically a file opened for reading, but
132 can be any generator returning the lines to read.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200133 """
Gilles Peskine37d670a2020-01-28 19:14:15 +0100134 # Content before the level-2 section where the new entries are to be
135 # added.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200136 self.header = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100137 # Content of the level-3 sections of where the new entries are to
138 # be added.
Gilles Peskined8b6c772020-01-28 18:57:47 +0100139 self.section_content = OrderedDict()
140 for section in STANDARD_SECTIONS:
141 self.section_content[section] = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100142 # Content of level-2 sections for already-released versions.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200143 self.trailer = []
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100144 self.read_main_file(input_stream)
145
146 def read_main_file(self, input_stream):
147 """Populate the changelog object from the content of the file.
148
149 This method is only intended to be called as part of the constructor
150 of the class and may not act sensibly on an object that is already
151 partially populated.
152 """
Gilles Peskinea2607962020-01-28 19:58:17 +0100153 # Parse the first level-2 section, containing changelog entries
154 # for unreleased changes.
155 # If we'll be expanding this section, everything before the first
Gilles Peskine37d670a2020-01-28 19:14:15 +0100156 # level-3 section title ("###...") following the first level-2
157 # section title ("##...") is passed through as the header
158 # and everything after the second level-2 section title is passed
159 # through as the trailer. Inside the first level-2 section,
160 # split out the level-3 sections.
Gilles Peskinea2607962020-01-28 19:58:17 +0100161 # If we'll be creating a new version, the header is everything
162 # before the point where we want to add the level-2 section
163 # for this version, and the trailer is what follows.
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100164 level_2_seen = 0
165 current_section = None
Gilles Peskine40b3f412019-10-13 21:44:25 +0200166 for line in input_stream:
167 level, content = self.title_level(line)
168 if level == 2:
169 level_2_seen += 1
Gilles Peskinea2607962020-01-28 19:58:17 +0100170 if level_2_seen == 1:
171 if self.section_is_released_version(content):
172 self.header.append(b'## ' +
173 self.unreleased_version_title() +
174 b'\n\n')
175 level_2_seen = 2
Gilles Peskine40b3f412019-10-13 21:44:25 +0200176 elif level == 3 and level_2_seen == 1:
177 current_section = content
Gilles Peskined8b6c772020-01-28 18:57:47 +0100178 self.section_content.setdefault(content, [])
Gilles Peskine37d670a2020-01-28 19:14:15 +0100179 if level_2_seen == 1 and current_section is not None:
180 if level != 3 and line.strip():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200181 self.section_content[current_section].append(line)
182 elif level_2_seen <= 1:
183 self.header.append(line)
184 else:
185 self.trailer.append(line)
186
187 def add_file(self, input_stream):
188 """Add changelog entries from a file.
189
190 Read lines from input_stream, which is typically a file opened
191 for reading. These lines must contain a series of level 3
192 Markdown sections with recognized titles. The corresponding
193 content is injected into the respective sections in the changelog.
194 The section titles must be either one of the hard-coded values
Gilles Peskine974232f2020-01-22 12:43:29 +0100195 in STANDARD_SECTIONS in assemble_changelog.py or already present
196 in ChangeLog.md. Section titles must match byte-for-byte except that
197 leading or trailing whitespace is ignored.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200198 """
199 filename = input_stream.name
200 current_section = None
201 for line_number, line in enumerate(input_stream, 1):
202 if not line.strip():
203 continue
204 level, content = self.title_level(line)
205 if level == 3:
206 current_section = content
207 if current_section not in self.section_content:
208 raise InputFormatError(filename, line_number,
209 'Section {} is not recognized',
210 str(current_section)[1:])
211 elif level == 0:
212 if current_section is None:
213 raise InputFormatError(filename, line_number,
214 'Missing section title at the beginning of the file')
215 self.section_content[current_section].append(line)
216 else:
217 raise InputFormatError(filename, line_number,
218 'Only level 3 headers (###) are permitted')
219
220 def write(self, filename):
221 """Write the changelog to the specified file.
222 """
223 with open(filename, 'wb') as out:
224 for line in self.header:
225 out.write(line)
Gilles Peskined8b6c772020-01-28 18:57:47 +0100226 for section, lines in self.section_content.items():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200227 if not lines:
228 continue
229 out.write(b'### ' + section + b'\n\n')
230 for line in lines:
231 out.write(line)
232 out.write(b'\n')
233 for line in self.trailer:
234 out.write(line)
235
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100236
237@functools.total_ordering
Gilles Peskine28af9582020-03-26 22:39:18 +0100238class EntryFileSortKey:
239 """This classes defines an ordering on changelog entry files: older < newer.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100240
Gilles Peskine28af9582020-03-26 22:39:18 +0100241 * Merged entry files are sorted according to their merge date (date of
242 the merge commit that brought the commit that created the file into
243 the target branch).
244 * Committed but unmerged entry files are sorted according to the date
245 of the commit that adds them.
246 * Uncommitted entry files are sorted according to their modification time.
247
248 This class assumes that the file is in a git working directory with
249 the target branch checked out.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100250 """
251
252 # Categories of files. A lower number is considered older.
253 MERGED = 0
254 COMMITTED = 1
255 LOCAL = 2
256
257 @staticmethod
258 def creation_hash(filename):
259 """Return the git commit id at which the given file was created.
260
261 Return None if the file was never checked into git.
262 """
263 hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename])
264 if not hashes:
265 # The file was never checked in.
266 return None
267 hashes = hashes.rstrip(b'\n')
268 last_hash = hashes[hashes.rfind(b'\n')+1:]
269 return last_hash
270
271 @staticmethod
272 def list_merges(some_hash, target, *options):
273 """List merge commits from some_hash to target.
274
275 Pass options to git to select which commits are included.
276 """
277 text = subprocess.check_output(['git', 'rev-list',
278 '--merges', *options,
279 b'..'.join([some_hash, target])])
280 return text.rstrip(b'\n').split(b'\n')
281
282 @classmethod
283 def merge_hash(cls, some_hash):
284 """Return the git commit id at which the given commit was merged.
285
286 Return None if the given commit was never merged.
287 """
288 target = b'HEAD'
289 # List the merges from some_hash to the target in two ways.
290 # The ancestry list is the ones that are both descendants of
291 # some_hash and ancestors of the target.
292 ancestry = frozenset(cls.list_merges(some_hash, target,
293 '--ancestry-path'))
294 # The first_parents list only contains merges that are directly
295 # on the target branch. We want it in reverse order (oldest first).
296 first_parents = cls.list_merges(some_hash, target,
297 '--first-parent', '--reverse')
298 # Look for the oldest merge commit that's both on the direct path
299 # and directly on the target branch. That's the place where some_hash
300 # was merged on the target branch. See
301 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
302 for commit in first_parents:
303 if commit in ancestry:
304 return commit
305 return None
306
307 @staticmethod
308 def commit_timestamp(commit_id):
309 """Return the timestamp of the given commit."""
310 text = subprocess.check_output(['git', 'show', '-s',
311 '--format=%ct',
312 commit_id])
313 return datetime.datetime.utcfromtimestamp(int(text))
314
315 @staticmethod
316 def file_timestamp(filename):
317 """Return the modification timestamp of the given file."""
318 mtime = os.stat(filename).st_mtime
319 return datetime.datetime.fromtimestamp(mtime)
320
321 def __init__(self, filename):
Gilles Peskine28af9582020-03-26 22:39:18 +0100322 """Determine position of the file in the changelog entry order.
323
324 This constructor returns an object that can be used with comparison
325 operators, with `sort` and `sorted`, etc. Older entries are sorted
326 before newer entries.
327 """
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100328 self.filename = filename
329 creation_hash = self.creation_hash(filename)
330 if not creation_hash:
331 self.category = self.LOCAL
332 self.datetime = self.file_timestamp(filename)
333 return
334 merge_hash = self.merge_hash(creation_hash)
335 if not merge_hash:
336 self.category = self.COMMITTED
337 self.datetime = self.commit_timestamp(creation_hash)
338 return
339 self.category = self.MERGED
340 self.datetime = self.commit_timestamp(merge_hash)
341
342 def sort_key(self):
Gilles Peskine28af9582020-03-26 22:39:18 +0100343 """"Return a concrete sort key for this entry file sort key object.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100344
Gilles Peskine28af9582020-03-26 22:39:18 +0100345 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100346 """
347 return (self.category, self.datetime, self.filename)
348
349 def __eq__(self, other):
350 return self.sort_key() == other.sort_key()
351
352 def __lt__(self, other):
353 return self.sort_key() < other.sort_key()
354
355
Gilles Peskine2b242492020-01-22 15:41:50 +0100356def check_output(generated_output_file, main_input_file, merged_files):
357 """Make sanity checks on the generated output.
358
359 The intent of these sanity checks is to have reasonable confidence
360 that no content has been lost.
361
362 The sanity check is that every line that is present in an input file
363 is also present in an output file. This is not perfect but good enough
364 for now.
365 """
366 generated_output = set(open(generated_output_file, 'rb'))
367 for line in open(main_input_file, 'rb'):
368 if line not in generated_output:
369 raise LostContent('original file', line)
370 for merged_file in merged_files:
371 for line in open(merged_file, 'rb'):
372 if line not in generated_output:
373 raise LostContent(merged_file, line)
374
375def finish_output(changelog, output_file, input_file, merged_files):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200376 """Write the changelog to the output file.
377
Gilles Peskine2b242492020-01-22 15:41:50 +0100378 The input file and the list of merged files are used only for sanity
379 checks on the output.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200380 """
381 if os.path.exists(output_file) and not os.path.isfile(output_file):
382 # The output is a non-regular file (e.g. pipe). Write to it directly.
383 output_temp = output_file
384 else:
385 # The output is a regular file. Write to a temporary file,
386 # then move it into place atomically.
387 output_temp = output_file + '.tmp'
388 changelog.write(output_temp)
Gilles Peskine2b242492020-01-22 15:41:50 +0100389 check_output(output_temp, input_file, merged_files)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200390 if output_temp != output_file:
391 os.rename(output_temp, output_file)
392
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100393def remove_merged_entries(files_to_remove):
394 for filename in files_to_remove:
395 os.remove(filename)
396
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100397def list_files_to_merge(options):
398 """List the entry files to merge, oldest first.
399
Gilles Peskine28af9582020-03-26 22:39:18 +0100400 "Oldest" is defined by `EntryFileSortKey`.
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100401 """
402 files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
Gilles Peskine28af9582020-03-26 22:39:18 +0100403 files_to_merge.sort(key=lambda f: EntryFileSortKey(f).sort_key())
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100404 return files_to_merge
405
Gilles Peskine40b3f412019-10-13 21:44:25 +0200406def merge_entries(options):
407 """Merge changelog entries into the changelog file.
408
409 Read the changelog file from options.input.
410 Read entries to merge from the directory options.dir.
411 Write the new changelog to options.output.
412 Remove the merged entries if options.keep_entries is false.
413 """
414 with open(options.input, 'rb') as input_file:
415 changelog = ChangeLog(input_file)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100416 files_to_merge = list_files_to_merge(options)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200417 if not files_to_merge:
418 sys.stderr.write('There are no pending changelog entries.\n')
419 return
420 for filename in files_to_merge:
421 with open(filename, 'rb') as input_file:
422 changelog.add_file(input_file)
Gilles Peskine2b242492020-01-22 15:41:50 +0100423 finish_output(changelog, options.output, options.input, files_to_merge)
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100424 if not options.keep_entries:
425 remove_merged_entries(files_to_merge)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200426
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100427def show_file_timestamps(options):
428 """List the files to merge and their timestamp.
429
430 This is only intended for debugging purposes.
431 """
432 files = list_files_to_merge(options)
433 for filename in files:
Gilles Peskine28af9582020-03-26 22:39:18 +0100434 ts = EntryFileSortKey(filename)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100435 print(ts.category, ts.datetime, filename)
436
Gilles Peskine40b3f412019-10-13 21:44:25 +0200437def set_defaults(options):
438 """Add default values for missing options."""
439 output_file = getattr(options, 'output', None)
440 if output_file is None:
441 options.output = options.input
442 if getattr(options, 'keep_entries', None) is None:
443 options.keep_entries = (output_file is not None)
444
445def main():
446 """Command line entry point."""
447 parser = argparse.ArgumentParser(description=__doc__)
448 parser.add_argument('--dir', '-d', metavar='DIR',
449 default='ChangeLog.d',
Gilles Peskine6e910092020-01-22 15:58:18 +0100450 help='Directory to read entries from'
451 ' (default: ChangeLog.d)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200452 parser.add_argument('--input', '-i', metavar='FILE',
453 default='ChangeLog.md',
Gilles Peskine6e910092020-01-22 15:58:18 +0100454 help='Existing changelog file to read from and augment'
455 ' (default: ChangeLog.md)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200456 parser.add_argument('--keep-entries',
457 action='store_true', dest='keep_entries', default=None,
Gilles Peskine6e910092020-01-22 15:58:18 +0100458 help='Keep the files containing entries'
459 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200460 parser.add_argument('--no-keep-entries',
461 action='store_false', dest='keep_entries',
Gilles Peskine6e910092020-01-22 15:58:18 +0100462 help='Remove the files containing entries after they are merged'
463 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200464 parser.add_argument('--output', '-o', metavar='FILE',
Gilles Peskine6e910092020-01-22 15:58:18 +0100465 help='Output changelog file'
466 ' (default: overwrite the input)')
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100467 parser.add_argument('--list-files-only',
468 action='store_true',
469 help='Only list the files that would be processed (with some debugging information)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200470 options = parser.parse_args()
471 set_defaults(options)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100472 if options.list_files_only:
473 show_file_timestamps(options)
474 return
Gilles Peskine40b3f412019-10-13 21:44:25 +0200475 merge_entries(options)
476
477if __name__ == '__main__':
478 main()