blob: b7dee475365d035cf77f7d9c07276cc9fb184119 [file] [log] [blame]
Gilles Peskinecff94e32020-04-21 18:33:12 +02001#!/usr/bin/env python3
2
3"""Assemble Mbed TLS change log entries into the change log file.
4
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
8
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
19"""
20
Bence Szépkútia2947ac2020-08-19 16:37:36 +020021# Copyright The Mbed TLS Contributors
Bence Szépkútif744bd72020-06-05 13:02:18 +020022# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
23#
24# This file is provided under the Apache License 2.0, or the
25# GNU General Public License v2.0 or later.
26#
27# **********
28# Apache License 2.0:
Gilles Peskinecff94e32020-04-21 18:33:12 +020029#
30# Licensed under the Apache License, Version 2.0 (the "License"); you may
31# not use this file except in compliance with the License.
32# You may obtain a copy of the License at
33#
34# http://www.apache.org/licenses/LICENSE-2.0
35#
36# Unless required by applicable law or agreed to in writing, software
37# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
38# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39# See the License for the specific language governing permissions and
40# limitations under the License.
41#
Bence Szépkútif744bd72020-06-05 13:02:18 +020042# **********
43#
44# **********
45# GNU General Public License v2.0 or later:
46#
47# This program is free software; you can redistribute it and/or modify
48# it under the terms of the GNU General Public License as published by
49# the Free Software Foundation; either version 2 of the License, or
50# (at your option) any later version.
51#
52# This program is distributed in the hope that it will be useful,
53# but WITHOUT ANY WARRANTY; without even the implied warranty of
54# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
55# GNU General Public License for more details.
56#
57# You should have received a copy of the GNU General Public License along
58# with this program; if not, write to the Free Software Foundation, Inc.,
59# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
60#
61# **********
Gilles Peskinecff94e32020-04-21 18:33:12 +020062
63import argparse
64from collections import OrderedDict, namedtuple
65import datetime
66import functools
67import glob
68import os
69import re
70import subprocess
71import sys
72
73class InputFormatError(Exception):
74 def __init__(self, filename, line_number, message, *args, **kwargs):
75 message = '{}:{}: {}'.format(filename, line_number,
76 message.format(*args, **kwargs))
77 super().__init__(message)
78
79class CategoryParseError(Exception):
80 def __init__(self, line_offset, error_message):
81 self.line_offset = line_offset
82 self.error_message = error_message
83 super().__init__('{}: {}'.format(line_offset, error_message))
84
85class LostContent(Exception):
86 def __init__(self, filename, line):
87 message = ('Lost content from {}: "{}"'.format(filename, line))
88 super().__init__(message)
89
90# The category names we use in the changelog.
91# If you edit this, update ChangeLog.d/README.md.
92STANDARD_CATEGORIES = (
93 b'API changes',
94 b'Default behavior changes',
95 b'Requirement changes',
96 b'New deprecations',
97 b'Removals',
98 b'Features',
99 b'Security',
100 b'Bugfix',
101 b'Changes',
102)
103
Paul Elliottc24a1e82021-03-05 12:22:51 +0000104# The maximum line length for an entry
105MAX_LINE_LENGTH = 80
106
Gilles Peskinecff94e32020-04-21 18:33:12 +0200107CategoryContent = namedtuple('CategoryContent', [
108 'name', 'title_line', # Title text and line number of the title
109 'body', 'body_line', # Body text and starting line number of the body
110])
111
112class ChangelogFormat:
113 """Virtual class documenting how to write a changelog format class."""
114
115 @classmethod
116 def extract_top_version(cls, changelog_file_content):
117 """Split out the top version section.
118
119 If the top version is already released, create a new top
120 version section for an unreleased version.
121
122 Return ``(header, top_version_title, top_version_body, trailer)``
123 where the "top version" is the existing top version section if it's
124 for unreleased changes, and a newly created section otherwise.
125 To assemble the changelog after modifying top_version_body,
126 concatenate the four pieces.
127 """
128 raise NotImplementedError
129
130 @classmethod
131 def version_title_text(cls, version_title):
132 """Return the text of a formatted version section title."""
133 raise NotImplementedError
134
135 @classmethod
136 def split_categories(cls, version_body):
137 """Split a changelog version section body into categories.
138
139 Return a list of `CategoryContent` the name is category title
140 without any formatting.
141 """
142 raise NotImplementedError
143
144 @classmethod
145 def format_category(cls, title, body):
146 """Construct the text of a category section from its title and body."""
147 raise NotImplementedError
148
149class TextChangelogFormat(ChangelogFormat):
150 """The traditional Mbed TLS changelog format."""
151
152 _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
153 @classmethod
154 def is_released_version(cls, title):
155 # Look for an incomplete release date
156 return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
157
158 _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
159 re.DOTALL)
160 @classmethod
161 def extract_top_version(cls, changelog_file_content):
162 """A version section starts with a line starting with '='."""
163 m = re.search(cls._top_version_re, changelog_file_content)
164 top_version_start = m.start(1)
165 top_version_end = m.end(2)
166 top_version_title = m.group(1)
167 top_version_body = m.group(2)
168 if cls.is_released_version(top_version_title):
169 top_version_end = top_version_start
170 top_version_title = cls._unreleased_version_text + b'\n\n'
171 top_version_body = b''
172 return (changelog_file_content[:top_version_start],
173 top_version_title, top_version_body,
174 changelog_file_content[top_version_end:])
175
176 @classmethod
177 def version_title_text(cls, version_title):
178 return re.sub(br'\n.*', version_title, re.DOTALL)
179
180 _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
181 @classmethod
182 def split_categories(cls, version_body):
183 """A category title is a line with the title in column 0."""
184 if not version_body:
185 return []
186 title_matches = list(re.finditer(cls._category_title_re, version_body))
187 if not title_matches or title_matches[0].start() != 0:
188 # There is junk before the first category.
189 raise CategoryParseError(0, 'Junk found where category expected')
190 title_starts = [m.start(1) for m in title_matches]
191 body_starts = [m.end(0) for m in title_matches]
192 body_ends = title_starts[1:] + [len(version_body)]
193 bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
194 for (body_start, body_end) in zip(body_starts, body_ends)]
195 title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
196 body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
197 return [CategoryContent(title_match.group(1), title_line,
198 body, body_line)
199 for title_match, title_line, body, body_line
200 in zip(title_matches, title_lines, bodies, body_lines)]
201
202 @classmethod
203 def format_category(cls, title, body):
204 # `split_categories` ensures that each body ends with a newline.
205 # Make sure that there is additionally a blank line between categories.
206 if not body.endswith(b'\n\n'):
207 body += b'\n'
208 return title + b'\n' + body
209
210class ChangeLog:
211 """An Mbed TLS changelog.
212
213 A changelog file consists of some header text followed by one or
214 more version sections. The version sections are in reverse
215 chronological order. Each version section consists of a title and a body.
216
217 The body of a version section consists of zero or more category
218 subsections. Each category subsection consists of a title and a body.
219
220 A changelog entry file has the same format as the body of a version section.
221
222 A `ChangelogFormat` object defines the concrete syntax of the changelog.
223 Entry files must have the same format as the changelog file.
224 """
225
226 # Only accept dotted version numbers (e.g. "3.1", not "3").
227 # Refuse ".x" in a version number where x is a letter: this indicates
228 # a version that is not yet released. Something like "3.1a" is accepted.
229 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
230 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Mateusz Starzyk270626e2021-04-15 10:03:59 +0200231 _only_url_re = re.compile(br'^\s*\w+://\S+\s*$')
232 _has_url_re = re.compile(br'.*://.*')
Gilles Peskinecff94e32020-04-21 18:33:12 +0200233
234 def add_categories_from_text(self, filename, line_offset,
235 text, allow_unknown_category):
236 """Parse a version section or entry file."""
237 try:
238 categories = self.format.split_categories(text)
239 except CategoryParseError as e:
240 raise InputFormatError(filename, line_offset + e.line_offset,
241 e.error_message)
242 for category in categories:
243 if not allow_unknown_category and \
244 category.name not in self.categories:
245 raise InputFormatError(filename,
246 line_offset + category.title_line,
247 'Unknown category: "{}"',
248 category.name.decode('utf8'))
Paul Elliottc24a1e82021-03-05 12:22:51 +0000249
250 body_split = category.body.splitlines()
Mateusz Starzyk270626e2021-04-15 10:03:59 +0200251
Paul Elliott0ec59792021-03-18 18:07:46 +0000252 for line_number, line in enumerate(body_split, 1):
Mateusz Starzyk270626e2021-04-15 10:03:59 +0200253 if not self._only_url_re.match(line) and \
254 len(line) > MAX_LINE_LENGTH:
255 long_url_msg = '. URL exceeding length limit must be alone in its line.' \
256 if self._has_url_re.match(line) else ""
Paul Elliottc24a1e82021-03-05 12:22:51 +0000257 raise InputFormatError(filename,
Paul Elliott0ec59792021-03-18 18:07:46 +0000258 category.body_line + line_number,
Mateusz Starzyk270626e2021-04-15 10:03:59 +0200259 'Line is longer than allowed: '
260 'Length {} (Max {}){}',
261 len(line), MAX_LINE_LENGTH,
262 long_url_msg)
Paul Elliottc24a1e82021-03-05 12:22:51 +0000263
Gilles Peskinecff94e32020-04-21 18:33:12 +0200264 self.categories[category.name] += category.body
265
266 def __init__(self, input_stream, changelog_format):
267 """Create a changelog object.
268
269 Populate the changelog object from the content of the file
270 input_stream.
271 """
272 self.format = changelog_format
273 whole_file = input_stream.read()
274 (self.header,
275 self.top_version_title, top_version_body,
276 self.trailer) = self.format.extract_top_version(whole_file)
277 # Split the top version section into categories.
278 self.categories = OrderedDict()
279 for category in STANDARD_CATEGORIES:
280 self.categories[category] = b''
281 offset = (self.header + self.top_version_title).count(b'\n') + 1
282 self.add_categories_from_text(input_stream.name, offset,
283 top_version_body, True)
284
285 def add_file(self, input_stream):
286 """Add changelog entries from a file.
287 """
288 self.add_categories_from_text(input_stream.name, 1,
289 input_stream.read(), False)
290
291 def write(self, filename):
292 """Write the changelog to the specified file.
293 """
294 with open(filename, 'wb') as out:
295 out.write(self.header)
296 out.write(self.top_version_title)
297 for title, body in self.categories.items():
298 if not body:
299 continue
300 out.write(self.format.format_category(title, body))
301 out.write(self.trailer)
302
303
304@functools.total_ordering
305class EntryFileSortKey:
306 """This classes defines an ordering on changelog entry files: older < newer.
307
308 * Merged entry files are sorted according to their merge date (date of
309 the merge commit that brought the commit that created the file into
310 the target branch).
311 * Committed but unmerged entry files are sorted according to the date
312 of the commit that adds them.
313 * Uncommitted entry files are sorted according to their modification time.
314
315 This class assumes that the file is in a git working directory with
316 the target branch checked out.
317 """
318
319 # Categories of files. A lower number is considered older.
320 MERGED = 0
321 COMMITTED = 1
322 LOCAL = 2
323
324 @staticmethod
325 def creation_hash(filename):
326 """Return the git commit id at which the given file was created.
327
328 Return None if the file was never checked into git.
329 """
330 hashes = subprocess.check_output(['git', 'log', '--format=%H',
331 '--follow',
332 '--', filename])
333 m = re.search(b'(.+)$', hashes)
334 if not m:
335 # The git output is empty. This means that the file was
336 # never checked in.
337 return None
338 # The last commit in the log is the oldest one, which is when the
339 # file was created.
340 return m.group(0)
341
342 @staticmethod
343 def list_merges(some_hash, target, *options):
344 """List merge commits from some_hash to target.
345
346 Pass options to git to select which commits are included.
347 """
348 text = subprocess.check_output(['git', 'rev-list',
349 '--merges', *options,
350 b'..'.join([some_hash, target])])
351 return text.rstrip(b'\n').split(b'\n')
352
353 @classmethod
354 def merge_hash(cls, some_hash):
355 """Return the git commit id at which the given commit was merged.
356
357 Return None if the given commit was never merged.
358 """
359 target = b'HEAD'
360 # List the merges from some_hash to the target in two ways.
361 # The ancestry list is the ones that are both descendants of
362 # some_hash and ancestors of the target.
363 ancestry = frozenset(cls.list_merges(some_hash, target,
364 '--ancestry-path'))
365 # The first_parents list only contains merges that are directly
366 # on the target branch. We want it in reverse order (oldest first).
367 first_parents = cls.list_merges(some_hash, target,
368 '--first-parent', '--reverse')
369 # Look for the oldest merge commit that's both on the direct path
370 # and directly on the target branch. That's the place where some_hash
371 # was merged on the target branch. See
372 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
373 for commit in first_parents:
374 if commit in ancestry:
375 return commit
376 return None
377
378 @staticmethod
379 def commit_timestamp(commit_id):
380 """Return the timestamp of the given commit."""
381 text = subprocess.check_output(['git', 'show', '-s',
382 '--format=%ct',
383 commit_id])
384 return datetime.datetime.utcfromtimestamp(int(text))
385
386 @staticmethod
387 def file_timestamp(filename):
388 """Return the modification timestamp of the given file."""
389 mtime = os.stat(filename).st_mtime
390 return datetime.datetime.fromtimestamp(mtime)
391
392 def __init__(self, filename):
393 """Determine position of the file in the changelog entry order.
394
395 This constructor returns an object that can be used with comparison
396 operators, with `sort` and `sorted`, etc. Older entries are sorted
397 before newer entries.
398 """
399 self.filename = filename
400 creation_hash = self.creation_hash(filename)
401 if not creation_hash:
402 self.category = self.LOCAL
403 self.datetime = self.file_timestamp(filename)
404 return
405 merge_hash = self.merge_hash(creation_hash)
406 if not merge_hash:
407 self.category = self.COMMITTED
408 self.datetime = self.commit_timestamp(creation_hash)
409 return
410 self.category = self.MERGED
411 self.datetime = self.commit_timestamp(merge_hash)
412
413 def sort_key(self):
414 """"Return a concrete sort key for this entry file sort key object.
415
416 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
417 """
418 return (self.category, self.datetime, self.filename)
419
420 def __eq__(self, other):
421 return self.sort_key() == other.sort_key()
422
423 def __lt__(self, other):
424 return self.sort_key() < other.sort_key()
425
426
427def check_output(generated_output_file, main_input_file, merged_files):
428 """Make sanity checks on the generated output.
429
430 The intent of these sanity checks is to have reasonable confidence
431 that no content has been lost.
432
433 The sanity check is that every line that is present in an input file
434 is also present in an output file. This is not perfect but good enough
435 for now.
436 """
437 generated_output = set(open(generated_output_file, 'rb'))
438 for line in open(main_input_file, 'rb'):
439 if line not in generated_output:
440 raise LostContent('original file', line)
441 for merged_file in merged_files:
442 for line in open(merged_file, 'rb'):
443 if line not in generated_output:
444 raise LostContent(merged_file, line)
445
446def finish_output(changelog, output_file, input_file, merged_files):
447 """Write the changelog to the output file.
448
449 The input file and the list of merged files are used only for sanity
450 checks on the output.
451 """
452 if os.path.exists(output_file) and not os.path.isfile(output_file):
453 # The output is a non-regular file (e.g. pipe). Write to it directly.
454 output_temp = output_file
455 else:
456 # The output is a regular file. Write to a temporary file,
457 # then move it into place atomically.
458 output_temp = output_file + '.tmp'
459 changelog.write(output_temp)
460 check_output(output_temp, input_file, merged_files)
461 if output_temp != output_file:
462 os.rename(output_temp, output_file)
463
464def remove_merged_entries(files_to_remove):
465 for filename in files_to_remove:
466 os.remove(filename)
467
468def list_files_to_merge(options):
469 """List the entry files to merge, oldest first.
470
471 "Oldest" is defined by `EntryFileSortKey`.
472 """
473 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
474 files_to_merge.sort(key=EntryFileSortKey)
475 return files_to_merge
476
477def merge_entries(options):
478 """Merge changelog entries into the changelog file.
479
480 Read the changelog file from options.input.
481 Read entries to merge from the directory options.dir.
482 Write the new changelog to options.output.
483 Remove the merged entries if options.keep_entries is false.
484 """
485 with open(options.input, 'rb') as input_file:
486 changelog = ChangeLog(input_file, TextChangelogFormat)
487 files_to_merge = list_files_to_merge(options)
488 if not files_to_merge:
489 sys.stderr.write('There are no pending changelog entries.\n')
490 return
491 for filename in files_to_merge:
492 with open(filename, 'rb') as input_file:
493 changelog.add_file(input_file)
494 finish_output(changelog, options.output, options.input, files_to_merge)
495 if not options.keep_entries:
496 remove_merged_entries(files_to_merge)
497
498def show_file_timestamps(options):
499 """List the files to merge and their timestamp.
500
501 This is only intended for debugging purposes.
502 """
503 files = list_files_to_merge(options)
504 for filename in files:
505 ts = EntryFileSortKey(filename)
506 print(ts.category, ts.datetime, filename)
507
508def set_defaults(options):
509 """Add default values for missing options."""
510 output_file = getattr(options, 'output', None)
511 if output_file is None:
512 options.output = options.input
513 if getattr(options, 'keep_entries', None) is None:
514 options.keep_entries = (output_file is not None)
515
516def main():
517 """Command line entry point."""
518 parser = argparse.ArgumentParser(description=__doc__)
519 parser.add_argument('--dir', '-d', metavar='DIR',
520 default='ChangeLog.d',
521 help='Directory to read entries from'
522 ' (default: ChangeLog.d)')
523 parser.add_argument('--input', '-i', metavar='FILE',
524 default='ChangeLog',
525 help='Existing changelog file to read from and augment'
526 ' (default: ChangeLog)')
527 parser.add_argument('--keep-entries',
528 action='store_true', dest='keep_entries', default=None,
529 help='Keep the files containing entries'
530 ' (default: remove them if --output/-o is not specified)')
531 parser.add_argument('--no-keep-entries',
532 action='store_false', dest='keep_entries',
533 help='Remove the files containing entries after they are merged'
534 ' (default: remove them if --output/-o is not specified)')
535 parser.add_argument('--output', '-o', metavar='FILE',
536 help='Output changelog file'
537 ' (default: overwrite the input)')
538 parser.add_argument('--list-files-only',
539 action='store_true',
540 help=('Only list the files that would be processed '
541 '(with some debugging information)'))
542 options = parser.parse_args()
543 set_defaults(options)
544 if options.list_files_only:
545 show_file_timestamps(options)
546 return
547 merge_entries(options)
548
549if __name__ == '__main__':
550 main()