Blame - scripts/assemble_changelog.py - mirror/mbed-tls

blob: b7dee475365d035cf77f7d9c07276cc9fb184119 [file] [log] [blame]

Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	"""Assemble Mbed TLS change log entries into the change log file.
				4
				5	Add changelog entries to the first level-2 section.
				6	Create a new level-2 section for unreleased changes if needed.
				7	Remove the input files unless --keep-entries is specified.
				8
				9	In each level-3 section, entries are sorted in chronological order
				10	(oldest first). From oldest to newest:
				11	* Merged entry files are sorted according to their merge date (date of
				12	the merge commit that brought the commit that created the file into
				13	the target branch).
				14	* Committed but unmerged entry files are sorted according to the date
				15	of the commit that adds them.
				16	* Uncommitted entry files are sorted according to their modification time.
				17
				18	You must run this program from within a git working directory.
				19	"""
				20
Bence Szépkúti	a2947ac	2020-08-19 16:37:36 +0200	[diff] [blame]	21	# Copyright The Mbed TLS Contributors
Bence Szépkúti	f744bd7	2020-06-05 13:02:18 +0200	[diff] [blame]	22	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
				23	#
				24	# This file is provided under the Apache License 2.0, or the
				25	# GNU General Public License v2.0 or later.
				26	#
				27	# **********
				28	# Apache License 2.0:
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	29	#
				30	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				31	# not use this file except in compliance with the License.
				32	# You may obtain a copy of the License at
				33	#
				34	# http://www.apache.org/licenses/LICENSE-2.0
				35	#
				36	# Unless required by applicable law or agreed to in writing, software
				37	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				38	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				39	# See the License for the specific language governing permissions and
				40	# limitations under the License.
				41	#
Bence Szépkúti	f744bd7	2020-06-05 13:02:18 +0200	[diff] [blame]	42	# **********
				43	#
				44	# **********
				45	# GNU General Public License v2.0 or later:
				46	#
				47	# This program is free software; you can redistribute it and/or modify
				48	# it under the terms of the GNU General Public License as published by
				49	# the Free Software Foundation; either version 2 of the License, or
				50	# (at your option) any later version.
				51	#
				52	# This program is distributed in the hope that it will be useful,
				53	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				54	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				55	# GNU General Public License for more details.
				56	#
				57	# You should have received a copy of the GNU General Public License along
				58	# with this program; if not, write to the Free Software Foundation, Inc.,
				59	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
				60	#
				61	# **********
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	62
				63	import argparse
				64	from collections import OrderedDict, namedtuple
				65	import datetime
				66	import functools
				67	import glob
				68	import os
				69	import re
				70	import subprocess
				71	import sys
				72
				73	class InputFormatError(Exception):
				74	def __init__(self, filename, line_number, message, args, *kwargs):
				75	message = '{}:{}: {}'.format(filename, line_number,
				76	message.format(args, *kwargs))
				77	super().__init__(message)
				78
				79	class CategoryParseError(Exception):
				80	def __init__(self, line_offset, error_message):
				81	self.line_offset = line_offset
				82	self.error_message = error_message
				83	super().__init__('{}: {}'.format(line_offset, error_message))
				84
				85	class LostContent(Exception):
				86	def __init__(self, filename, line):
				87	message = ('Lost content from {}: "{}"'.format(filename, line))
				88	super().__init__(message)
				89
				90	# The category names we use in the changelog.
				91	# If you edit this, update ChangeLog.d/README.md.
				92	STANDARD_CATEGORIES = (
				93	b'API changes',
				94	b'Default behavior changes',
				95	b'Requirement changes',
				96	b'New deprecations',
				97	b'Removals',
				98	b'Features',
				99	b'Security',
				100	b'Bugfix',
				101	b'Changes',
				102	)
				103
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	104	# The maximum line length for an entry
				105	MAX_LINE_LENGTH = 80
				106
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	107	CategoryContent = namedtuple('CategoryContent', [
				108	'name', 'title_line', # Title text and line number of the title
				109	'body', 'body_line', # Body text and starting line number of the body
				110	])
				111
				112	class ChangelogFormat:
				113	"""Virtual class documenting how to write a changelog format class."""
				114
				115	@classmethod
				116	def extract_top_version(cls, changelog_file_content):
				117	"""Split out the top version section.
				118
				119	If the top version is already released, create a new top
				120	version section for an unreleased version.
				121
				122	Return ``(header, top_version_title, top_version_body, trailer)``
				123	where the "top version" is the existing top version section if it's
				124	for unreleased changes, and a newly created section otherwise.
				125	To assemble the changelog after modifying top_version_body,
				126	concatenate the four pieces.
				127	"""
				128	raise NotImplementedError
				129
				130	@classmethod
				131	def version_title_text(cls, version_title):
				132	"""Return the text of a formatted version section title."""
				133	raise NotImplementedError
				134
				135	@classmethod
				136	def split_categories(cls, version_body):
				137	"""Split a changelog version section body into categories.
				138
				139	Return a list of `CategoryContent` the name is category title
				140	without any formatting.
				141	"""
				142	raise NotImplementedError
				143
				144	@classmethod
				145	def format_category(cls, title, body):
				146	"""Construct the text of a category section from its title and body."""
				147	raise NotImplementedError
				148
				149	class TextChangelogFormat(ChangelogFormat):
				150	"""The traditional Mbed TLS changelog format."""
				151
				152	_unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
				153	@classmethod
				154	def is_released_version(cls, title):
				155	# Look for an incomplete release date
				156	return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
				157
				158	_top_version_re = re.compile(br'(?:\A\|\n)(=[^\n]\n+)(.?\n)(?:=\|$)',
				159	re.DOTALL)
				160	@classmethod
				161	def extract_top_version(cls, changelog_file_content):
				162	"""A version section starts with a line starting with '='."""
				163	m = re.search(cls._top_version_re, changelog_file_content)
				164	top_version_start = m.start(1)
				165	top_version_end = m.end(2)
				166	top_version_title = m.group(1)
				167	top_version_body = m.group(2)
				168	if cls.is_released_version(top_version_title):
				169	top_version_end = top_version_start
				170	top_version_title = cls._unreleased_version_text + b'\n\n'
				171	top_version_body = b''
				172	return (changelog_file_content[:top_version_start],
				173	top_version_title, top_version_body,
				174	changelog_file_content[top_version_end:])
				175
				176	@classmethod
				177	def version_title_text(cls, version_title):
				178	return re.sub(br'\n.*', version_title, re.DOTALL)
				179
				180	_category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
				181	@classmethod
				182	def split_categories(cls, version_body):
				183	"""A category title is a line with the title in column 0."""
				184	if not version_body:
				185	return []
				186	title_matches = list(re.finditer(cls._category_title_re, version_body))
				187	if not title_matches or title_matches[0].start() != 0:
				188	# There is junk before the first category.
				189	raise CategoryParseError(0, 'Junk found where category expected')
				190	title_starts = [m.start(1) for m in title_matches]
				191	body_starts = [m.end(0) for m in title_matches]
				192	body_ends = title_starts[1:] + [len(version_body)]
				193	bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
				194	for (body_start, body_end) in zip(body_starts, body_ends)]
				195	title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
				196	body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
				197	return [CategoryContent(title_match.group(1), title_line,
				198	body, body_line)
				199	for title_match, title_line, body, body_line
				200	in zip(title_matches, title_lines, bodies, body_lines)]
				201
				202	@classmethod
				203	def format_category(cls, title, body):
				204	# `split_categories` ensures that each body ends with a newline.
				205	# Make sure that there is additionally a blank line between categories.
				206	if not body.endswith(b'\n\n'):
				207	body += b'\n'
				208	return title + b'\n' + body
				209
				210	class ChangeLog:
				211	"""An Mbed TLS changelog.
				212
				213	A changelog file consists of some header text followed by one or
				214	more version sections. The version sections are in reverse
				215	chronological order. Each version section consists of a title and a body.
				216
				217	The body of a version section consists of zero or more category
				218	subsections. Each category subsection consists of a title and a body.
				219
				220	A changelog entry file has the same format as the body of a version section.
				221
				222	A `ChangelogFormat` object defines the concrete syntax of the changelog.
				223	Entry files must have the same format as the changelog file.
				224	"""
				225
				226	# Only accept dotted version numbers (e.g. "3.1", not "3").
				227	# Refuse ".x" in a version number where x is a letter: this indicates
				228	# a version that is not yet released. Something like "3.1a" is accepted.
				229	_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
				230	_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Mateusz Starzyk	270626e	2021-04-15 10:03:59 +0200	[diff] [blame^]	231	_only_url_re = re.compile(br'^\s\w+://\S+\s$')
				232	_has_url_re = re.compile(br'.://.')
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	233
				234	def add_categories_from_text(self, filename, line_offset,
				235	text, allow_unknown_category):
				236	"""Parse a version section or entry file."""
				237	try:
				238	categories = self.format.split_categories(text)
				239	except CategoryParseError as e:
				240	raise InputFormatError(filename, line_offset + e.line_offset,
				241	e.error_message)
				242	for category in categories:
				243	if not allow_unknown_category and \
				244	category.name not in self.categories:
				245	raise InputFormatError(filename,
				246	line_offset + category.title_line,
				247	'Unknown category: "{}"',
				248	category.name.decode('utf8'))
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	249
				250	body_split = category.body.splitlines()
Mateusz Starzyk	270626e	2021-04-15 10:03:59 +0200	[diff] [blame^]	251
Paul Elliott	0ec5979	2021-03-18 18:07:46 +0000	[diff] [blame]	252	for line_number, line in enumerate(body_split, 1):
Mateusz Starzyk	270626e	2021-04-15 10:03:59 +0200	[diff] [blame^]	253	if not self._only_url_re.match(line) and \
				254	len(line) > MAX_LINE_LENGTH:
				255	long_url_msg = '. URL exceeding length limit must be alone in its line.' \
				256	if self._has_url_re.match(line) else ""
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	257	raise InputFormatError(filename,
Paul Elliott	0ec5979	2021-03-18 18:07:46 +0000	[diff] [blame]	258	category.body_line + line_number,
Mateusz Starzyk	270626e	2021-04-15 10:03:59 +0200	[diff] [blame^]	259	'Line is longer than allowed: '
				260	'Length {} (Max {}){}',
				261	len(line), MAX_LINE_LENGTH,
				262	long_url_msg)
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	263
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	264	self.categories[category.name] += category.body
				265
				266	def __init__(self, input_stream, changelog_format):
				267	"""Create a changelog object.
				268
				269	Populate the changelog object from the content of the file
				270	input_stream.
				271	"""
				272	self.format = changelog_format
				273	whole_file = input_stream.read()
				274	(self.header,
				275	self.top_version_title, top_version_body,
				276	self.trailer) = self.format.extract_top_version(whole_file)
				277	# Split the top version section into categories.
				278	self.categories = OrderedDict()
				279	for category in STANDARD_CATEGORIES:
				280	self.categories[category] = b''
				281	offset = (self.header + self.top_version_title).count(b'\n') + 1
				282	self.add_categories_from_text(input_stream.name, offset,
				283	top_version_body, True)
				284
				285	def add_file(self, input_stream):
				286	"""Add changelog entries from a file.
				287	"""
				288	self.add_categories_from_text(input_stream.name, 1,
				289	input_stream.read(), False)
				290
				291	def write(self, filename):
				292	"""Write the changelog to the specified file.
				293	"""
				294	with open(filename, 'wb') as out:
				295	out.write(self.header)
				296	out.write(self.top_version_title)
				297	for title, body in self.categories.items():
				298	if not body:
				299	continue
				300	out.write(self.format.format_category(title, body))
				301	out.write(self.trailer)
				302
				303
				304	@functools.total_ordering
				305	class EntryFileSortKey:
				306	"""This classes defines an ordering on changelog entry files: older < newer.
				307
				308	* Merged entry files are sorted according to their merge date (date of
				309	the merge commit that brought the commit that created the file into
				310	the target branch).
				311	* Committed but unmerged entry files are sorted according to the date
				312	of the commit that adds them.
				313	* Uncommitted entry files are sorted according to their modification time.
				314
				315	This class assumes that the file is in a git working directory with
				316	the target branch checked out.
				317	"""
				318
				319	# Categories of files. A lower number is considered older.
				320	MERGED = 0
				321	COMMITTED = 1
				322	LOCAL = 2
				323
				324	@staticmethod
				325	def creation_hash(filename):
				326	"""Return the git commit id at which the given file was created.
				327
				328	Return None if the file was never checked into git.
				329	"""
				330	hashes = subprocess.check_output(['git', 'log', '--format=%H',
				331	'--follow',
				332	'--', filename])
				333	m = re.search(b'(.+)$', hashes)
				334	if not m:
				335	# The git output is empty. This means that the file was
				336	# never checked in.
				337	return None
				338	# The last commit in the log is the oldest one, which is when the
				339	# file was created.
				340	return m.group(0)
				341
				342	@staticmethod
				343	def list_merges(some_hash, target, *options):
				344	"""List merge commits from some_hash to target.
				345
				346	Pass options to git to select which commits are included.
				347	"""
				348	text = subprocess.check_output(['git', 'rev-list',
				349	'--merges', *options,
				350	b'..'.join([some_hash, target])])
				351	return text.rstrip(b'\n').split(b'\n')
				352
				353	@classmethod
				354	def merge_hash(cls, some_hash):
				355	"""Return the git commit id at which the given commit was merged.
				356
				357	Return None if the given commit was never merged.
				358	"""
				359	target = b'HEAD'
				360	# List the merges from some_hash to the target in two ways.
				361	# The ancestry list is the ones that are both descendants of
				362	# some_hash and ancestors of the target.
				363	ancestry = frozenset(cls.list_merges(some_hash, target,
				364	'--ancestry-path'))
				365	# The first_parents list only contains merges that are directly
				366	# on the target branch. We want it in reverse order (oldest first).
				367	first_parents = cls.list_merges(some_hash, target,
				368	'--first-parent', '--reverse')
				369	# Look for the oldest merge commit that's both on the direct path
				370	# and directly on the target branch. That's the place where some_hash
				371	# was merged on the target branch. See
				372	# https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
				373	for commit in first_parents:
				374	if commit in ancestry:
				375	return commit
				376	return None
				377
				378	@staticmethod
				379	def commit_timestamp(commit_id):
				380	"""Return the timestamp of the given commit."""
				381	text = subprocess.check_output(['git', 'show', '-s',
				382	'--format=%ct',
				383	commit_id])
				384	return datetime.datetime.utcfromtimestamp(int(text))
				385
				386	@staticmethod
				387	def file_timestamp(filename):
				388	"""Return the modification timestamp of the given file."""
				389	mtime = os.stat(filename).st_mtime
				390	return datetime.datetime.fromtimestamp(mtime)
				391
				392	def __init__(self, filename):
				393	"""Determine position of the file in the changelog entry order.
				394
				395	This constructor returns an object that can be used with comparison
				396	operators, with `sort` and `sorted`, etc. Older entries are sorted
				397	before newer entries.
				398	"""
				399	self.filename = filename
				400	creation_hash = self.creation_hash(filename)
				401	if not creation_hash:
				402	self.category = self.LOCAL
				403	self.datetime = self.file_timestamp(filename)
				404	return
				405	merge_hash = self.merge_hash(creation_hash)
				406	if not merge_hash:
				407	self.category = self.COMMITTED
				408	self.datetime = self.commit_timestamp(creation_hash)
				409	return
				410	self.category = self.MERGED
				411	self.datetime = self.commit_timestamp(merge_hash)
				412
				413	def sort_key(self):
				414	""""Return a concrete sort key for this entry file sort key object.
				415
				416	``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
				417	"""
				418	return (self.category, self.datetime, self.filename)
				419
				420	def __eq__(self, other):
				421	return self.sort_key() == other.sort_key()
				422
				423	def __lt__(self, other):
				424	return self.sort_key() < other.sort_key()
				425
				426
				427	def check_output(generated_output_file, main_input_file, merged_files):
				428	"""Make sanity checks on the generated output.
				429
				430	The intent of these sanity checks is to have reasonable confidence
				431	that no content has been lost.
				432
				433	The sanity check is that every line that is present in an input file
				434	is also present in an output file. This is not perfect but good enough
				435	for now.
				436	"""
				437	generated_output = set(open(generated_output_file, 'rb'))
				438	for line in open(main_input_file, 'rb'):
				439	if line not in generated_output:
				440	raise LostContent('original file', line)
				441	for merged_file in merged_files:
				442	for line in open(merged_file, 'rb'):
				443	if line not in generated_output:
				444	raise LostContent(merged_file, line)
				445
				446	def finish_output(changelog, output_file, input_file, merged_files):
				447	"""Write the changelog to the output file.
				448
				449	The input file and the list of merged files are used only for sanity
				450	checks on the output.
				451	"""
				452	if os.path.exists(output_file) and not os.path.isfile(output_file):
				453	# The output is a non-regular file (e.g. pipe). Write to it directly.
				454	output_temp = output_file
				455	else:
				456	# The output is a regular file. Write to a temporary file,
				457	# then move it into place atomically.
				458	output_temp = output_file + '.tmp'
				459	changelog.write(output_temp)
				460	check_output(output_temp, input_file, merged_files)
				461	if output_temp != output_file:
				462	os.rename(output_temp, output_file)
				463
				464	def remove_merged_entries(files_to_remove):
				465	for filename in files_to_remove:
				466	os.remove(filename)
				467
				468	def list_files_to_merge(options):
				469	"""List the entry files to merge, oldest first.
				470
				471	"Oldest" is defined by `EntryFileSortKey`.
				472	"""
				473	files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
				474	files_to_merge.sort(key=EntryFileSortKey)
				475	return files_to_merge
				476
				477	def merge_entries(options):
				478	"""Merge changelog entries into the changelog file.
				479
				480	Read the changelog file from options.input.
				481	Read entries to merge from the directory options.dir.
				482	Write the new changelog to options.output.
				483	Remove the merged entries if options.keep_entries is false.
				484	"""
				485	with open(options.input, 'rb') as input_file:
				486	changelog = ChangeLog(input_file, TextChangelogFormat)
				487	files_to_merge = list_files_to_merge(options)
				488	if not files_to_merge:
				489	sys.stderr.write('There are no pending changelog entries.\n')
				490	return
				491	for filename in files_to_merge:
				492	with open(filename, 'rb') as input_file:
				493	changelog.add_file(input_file)
				494	finish_output(changelog, options.output, options.input, files_to_merge)
				495	if not options.keep_entries:
				496	remove_merged_entries(files_to_merge)
				497
				498	def show_file_timestamps(options):
				499	"""List the files to merge and their timestamp.
				500
				501	This is only intended for debugging purposes.
				502	"""
				503	files = list_files_to_merge(options)
				504	for filename in files:
				505	ts = EntryFileSortKey(filename)
				506	print(ts.category, ts.datetime, filename)
				507
				508	def set_defaults(options):
				509	"""Add default values for missing options."""
				510	output_file = getattr(options, 'output', None)
				511	if output_file is None:
				512	options.output = options.input
				513	if getattr(options, 'keep_entries', None) is None:
				514	options.keep_entries = (output_file is not None)
				515
				516	def main():
				517	"""Command line entry point."""
				518	parser = argparse.ArgumentParser(description=__doc__)
				519	parser.add_argument('--dir', '-d', metavar='DIR',
				520	default='ChangeLog.d',
				521	help='Directory to read entries from'
				522	' (default: ChangeLog.d)')
				523	parser.add_argument('--input', '-i', metavar='FILE',
				524	default='ChangeLog',
				525	help='Existing changelog file to read from and augment'
				526	' (default: ChangeLog)')
				527	parser.add_argument('--keep-entries',
				528	action='store_true', dest='keep_entries', default=None,
				529	help='Keep the files containing entries'
				530	' (default: remove them if --output/-o is not specified)')
				531	parser.add_argument('--no-keep-entries',
				532	action='store_false', dest='keep_entries',
				533	help='Remove the files containing entries after they are merged'
				534	' (default: remove them if --output/-o is not specified)')
				535	parser.add_argument('--output', '-o', metavar='FILE',
				536	help='Output changelog file'
				537	' (default: overwrite the input)')
				538	parser.add_argument('--list-files-only',
				539	action='store_true',
				540	help=('Only list the files that would be processed '
				541	'(with some debugging information)'))
				542	options = parser.parse_args()
				543	set_defaults(options)
				544	if options.list_files_only:
				545	show_file_timestamps(options)
				546	return
				547	merge_entries(options)
				548
				549	if __name__ == '__main__':
				550	main()