Merge pull request #3135 from gilles-peskine-arm/changelog-assemble-text
Switch to the classic Mbed TLS ChangeLog format
diff --git a/ChangeLog.d/00README.md b/ChangeLog.d/00README.md
new file mode 100644
index 0000000..b559e23
--- /dev/null
+++ b/ChangeLog.d/00README.md
@@ -0,0 +1,67 @@
+# Pending changelog entry directory
+
+This directory contains changelog entries that have not yet been merged
+to the changelog file ([`../ChangeLog`](../ChangeLog)).
+
+## Changelog entry file format
+
+A changelog entry file must have the extension `*.txt` and must have the
+following format:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Security
+ * Change description.
+ * Another change description.
+
+Features
+ * Yet another change description. This is a long change description that
+ spans multiple lines.
+ * Yet again another change description.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The permitted changelog entry categories are as follows:
+<!-- Keep this synchronized with STANDARD_CATEGORIES in assemble_changelog.py! -->
+
+ API changes
+ Default behavior changes
+ Requirement changes
+ New deprecations
+ Removals
+ Features
+ Security
+ Bugfix
+ Changes
+
+Use “Changes” for anything that doesn't fit in the other categories, such as
+performance, documentation and test improvements.
+
+## How to write a changelog entry
+
+Each entry starts with three spaces, an asterisk and a space. Continuation
+lines start with 5 spaces. Lines wrap at 79 characters.
+
+Write full English sentences with proper capitalization and punctuation. Use
+the present tense. Use the imperative where applicable. For example: “Fix a
+bug in mbedtls_xxx() ….”
+
+Include GitHub issue numbers where relevant. Use the format “#1234” for an
+Mbed TLS issue. Add other external references such as CVE numbers where
+applicable.
+
+Credit the author of the contribution if the contribution is not a member of
+the Mbed TLS development team. Also credit bug reporters where applicable.
+
+**Explain why, not how**. Remember that the audience is the users of the
+library, not its developers. In particular, for a bug fix, explain the
+consequences of the bug, not how the bug was fixed. For a new feature, explain
+why one might be interested in the feature. For an API change or a deprecation,
+explain how to update existing applications.
+
+See [existing entries](../ChangeLog) for examples.
+
+## How `ChangeLog` is updated
+
+Run [`../scripts/assemble_changelog.py`](../scripts/assemble_changelog.py)
+from a Git working copy
+to move the entries from files in `ChangeLog.d` to the main `ChangeLog` file.
diff --git a/ChangeLog.d/README b/ChangeLog.d/README
deleted file mode 100644
index 2f9f049..0000000
--- a/ChangeLog.d/README
+++ /dev/null
@@ -1,21 +0,0 @@
-This directory contains changelog entries that have not yet been merged
-to the changelog file (../ChangeLog.md).
-
-A changelog entry file must have the extension *.md and must have the
-following format:
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-### Section title
-
-* Change descritpion.
-* Another change description.
-
-### Another section title
-
-* Yet another change description.
-* Yet again another change description.
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-See STANDARD_SECTIONS in ../scripts/assemble_changelog.py for
-recognized section titles.
diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py
index c868a6c..ffa3f16 100755
--- a/scripts/assemble_changelog.py
+++ b/scripts/assemble_changelog.py
@@ -36,7 +36,7 @@
# This file is part of Mbed TLS (https://tls.mbed.org)
import argparse
-from collections import OrderedDict
+from collections import OrderedDict, namedtuple
import datetime
import functools
import glob
@@ -51,187 +51,209 @@
message.format(*args, **kwargs))
super().__init__(message)
+class CategoryParseError(Exception):
+ def __init__(self, line_offset, error_message):
+ self.line_offset = line_offset
+ self.error_message = error_message
+ super().__init__('{}: {}'.format(line_offset, error_message))
+
class LostContent(Exception):
def __init__(self, filename, line):
message = ('Lost content from {}: "{}"'.format(filename, line))
super().__init__(message)
-STANDARD_SECTIONS = (
- b'Interface changes',
+# The category names we use in the changelog.
+# If you edit this, update ChangeLog.d/README.md.
+STANDARD_CATEGORIES = (
+ b'API changes',
b'Default behavior changes',
b'Requirement changes',
b'New deprecations',
b'Removals',
- b'New features',
+ b'Features',
b'Security',
- b'Bug fixes',
- b'Performance improvements',
- b'Other changes',
+ b'Bugfix',
+ b'Changes',
)
+CategoryContent = namedtuple('CategoryContent', [
+ 'name', 'title_line', # Title text and line number of the title
+ 'body', 'body_line', # Body text and starting line number of the body
+])
+
+class ChangelogFormat:
+ """Virtual class documenting how to write a changelog format class."""
+
+ @classmethod
+ def extract_top_version(cls, changelog_file_content):
+ """Split out the top version section.
+
+ If the top version is already released, create a new top
+ version section for an unreleased version.
+
+ Return ``(header, top_version_title, top_version_body, trailer)``
+ where the "top version" is the existing top version section if it's
+ for unreleased changes, and a newly created section otherwise.
+ To assemble the changelog after modifying top_version_body,
+ concatenate the four pieces.
+ """
+ raise NotImplementedError
+
+ @classmethod
+ def version_title_text(cls, version_title):
+ """Return the text of a formatted version section title."""
+ raise NotImplementedError
+
+ @classmethod
+ def split_categories(cls, version_body):
+ """Split a changelog version section body into categories.
+
+ Return a list of `CategoryContent` the name is category title
+ without any formatting.
+ """
+ raise NotImplementedError
+
+ @classmethod
+ def format_category(cls, title, body):
+ """Construct the text of a category section from its title and body."""
+ raise NotImplementedError
+
+class TextChangelogFormat(ChangelogFormat):
+ """The traditional Mbed TLS changelog format."""
+
+ _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
+ @classmethod
+ def is_released_version(cls, title):
+ # Look for an incomplete release date
+ return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
+
+ _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
+ re.DOTALL)
+ @classmethod
+ def extract_top_version(cls, changelog_file_content):
+ """A version section starts with a line starting with '='."""
+ m = re.search(cls._top_version_re, changelog_file_content)
+ top_version_start = m.start(1)
+ top_version_end = m.end(2)
+ top_version_title = m.group(1)
+ top_version_body = m.group(2)
+ if cls.is_released_version(top_version_title):
+ top_version_end = top_version_start
+ top_version_title = cls._unreleased_version_text + b'\n\n'
+ top_version_body = b''
+ return (changelog_file_content[:top_version_start],
+ top_version_title, top_version_body,
+ changelog_file_content[top_version_end:])
+
+ @classmethod
+ def version_title_text(cls, version_title):
+ return re.sub(br'\n.*', version_title, re.DOTALL)
+
+ _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
+ @classmethod
+ def split_categories(cls, version_body):
+ """A category title is a line with the title in column 0."""
+ if not version_body:
+ return []
+ title_matches = list(re.finditer(cls._category_title_re, version_body))
+ if not title_matches or title_matches[0].start() != 0:
+ # There is junk before the first category.
+ raise CategoryParseError(0, 'Junk found where category expected')
+ title_starts = [m.start(1) for m in title_matches]
+ body_starts = [m.end(0) for m in title_matches]
+ body_ends = title_starts[1:] + [len(version_body)]
+ bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
+ for (body_start, body_end) in zip(body_starts, body_ends)]
+ title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
+ body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
+ return [CategoryContent(title_match.group(1), title_line,
+ body, body_line)
+ for title_match, title_line, body, body_line
+ in zip(title_matches, title_lines, bodies, body_lines)]
+
+ @classmethod
+ def format_category(cls, title, body):
+ # `split_categories` ensures that each body ends with a newline.
+ # Make sure that there is additionally a blank line between categories.
+ if not body.endswith(b'\n\n'):
+ body += b'\n'
+ return title + b'\n' + body
+
class ChangeLog:
"""An Mbed TLS changelog.
- A changelog is a file in Markdown format. Each level 2 section title
- starts a version, and versions are sorted in reverse chronological
- order. Lines with a level 2 section title must start with '##'.
+ A changelog file consists of some header text followed by one or
+ more version sections. The version sections are in reverse
+ chronological order. Each version section consists of a title and a body.
- Within a version, there are multiple sections, each devoted to a kind
- of change: bug fix, feature request, etc. Section titles should match
- entries in STANDARD_SECTIONS exactly.
+ The body of a version section consists of zero or more category
+ subsections. Each category subsection consists of a title and a body.
- Within each section, each separate change should be on a line starting
- with a '*' bullet. There may be blank lines surrounding titles, but
- there should not be any blank line inside a section.
+ A changelog entry file has the same format as the body of a version section.
+
+ A `ChangelogFormat` object defines the concrete syntax of the changelog.
+ Entry files must have the same format as the changelog file.
"""
- _title_re = re.compile(br'#*')
- def title_level(self, line):
- """Determine whether the line is a title.
-
- Return (level, content) where level is the Markdown section level
- (1 for '#', 2 for '##', etc.) and content is the section title
- without leading or trailing whitespace. For a non-title line,
- the level is 0.
- """
- level = re.match(self._title_re, line).end()
- return level, line[level:].strip()
-
# Only accept dotted version numbers (e.g. "3.1", not "3").
# Refuse ".x" in a version number where x is a letter: this indicates
# a version that is not yet released. Something like "3.1a" is accepted.
_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
- def section_is_released_version(self, title):
- """Whether this section is for a released version.
+ def add_categories_from_text(self, filename, line_offset,
+ text, allow_unknown_category):
+ """Parse a version section or entry file."""
+ try:
+ categories = self.format.split_categories(text)
+ except CategoryParseError as e:
+ raise InputFormatError(filename, line_offset + e.line_offset,
+ e.error_message)
+ for category in categories:
+ if not allow_unknown_category and \
+ category.name not in self.categories:
+ raise InputFormatError(filename,
+ line_offset + category.title_line,
+ 'Unknown category: "{}"',
+ category.name.decode('utf8'))
+ self.categories[category.name] += category.body
- True if the given level-2 section title indicates that this section
- contains released changes, otherwise False.
- """
- # Assume that a released version has a numerical version number
- # that follows a particular pattern. These criteria may be revised
- # as needed in future versions of this script.
- version_number = re.search(self._version_number_re, title)
- if version_number:
- return not re.search(self._incomplete_version_number_re,
- version_number.group(0))
- else:
- return False
-
- def unreleased_version_title(self):
- """The title to use if creating a new section for an unreleased version."""
- # pylint: disable=no-self-use; this method may be overridden
- return b'Unreleased changes'
-
- def __init__(self, input_stream):
+ def __init__(self, input_stream, changelog_format):
"""Create a changelog object.
Populate the changelog object from the content of the file
- input_stream. This is typically a file opened for reading, but
- can be any generator returning the lines to read.
+ input_stream.
"""
- # Content before the level-2 section where the new entries are to be
- # added.
- self.header = []
- # Content of the level-3 sections of where the new entries are to
- # be added.
- self.section_content = OrderedDict()
- for section in STANDARD_SECTIONS:
- self.section_content[section] = []
- # Content of level-2 sections for already-released versions.
- self.trailer = []
- self.read_main_file(input_stream)
-
- def read_main_file(self, input_stream):
- """Populate the changelog object from the content of the file.
-
- This method is only intended to be called as part of the constructor
- of the class and may not act sensibly on an object that is already
- partially populated.
- """
- # Parse the first level-2 section, containing changelog entries
- # for unreleased changes.
- # If we'll be expanding this section, everything before the first
- # level-3 section title ("###...") following the first level-2
- # section title ("##...") is passed through as the header
- # and everything after the second level-2 section title is passed
- # through as the trailer. Inside the first level-2 section,
- # split out the level-3 sections.
- # If we'll be creating a new version, the header is everything
- # before the point where we want to add the level-2 section
- # for this version, and the trailer is what follows.
- level_2_seen = 0
- current_section = None
- for line in input_stream:
- level, content = self.title_level(line)
- if level == 2:
- level_2_seen += 1
- if level_2_seen == 1:
- if self.section_is_released_version(content):
- self.header.append(b'## ' +
- self.unreleased_version_title() +
- b'\n\n')
- level_2_seen = 2
- elif level == 3 and level_2_seen == 1:
- current_section = content
- self.section_content.setdefault(content, [])
- if level_2_seen == 1 and current_section is not None:
- if level != 3 and line.strip():
- self.section_content[current_section].append(line)
- elif level_2_seen <= 1:
- self.header.append(line)
- else:
- self.trailer.append(line)
+ self.format = changelog_format
+ whole_file = input_stream.read()
+ (self.header,
+ self.top_version_title, top_version_body,
+ self.trailer) = self.format.extract_top_version(whole_file)
+ # Split the top version section into categories.
+ self.categories = OrderedDict()
+ for category in STANDARD_CATEGORIES:
+ self.categories[category] = b''
+ offset = (self.header + self.top_version_title).count(b'\n') + 1
+ self.add_categories_from_text(input_stream.name, offset,
+ top_version_body, True)
def add_file(self, input_stream):
"""Add changelog entries from a file.
-
- Read lines from input_stream, which is typically a file opened
- for reading. These lines must contain a series of level 3
- Markdown sections with recognized titles. The corresponding
- content is injected into the respective sections in the changelog.
- The section titles must be either one of the hard-coded values
- in STANDARD_SECTIONS in assemble_changelog.py or already present
- in ChangeLog.md. Section titles must match byte-for-byte except that
- leading or trailing whitespace is ignored.
"""
- filename = input_stream.name
- current_section = None
- for line_number, line in enumerate(input_stream, 1):
- if not line.strip():
- continue
- level, content = self.title_level(line)
- if level == 3:
- current_section = content
- if current_section not in self.section_content:
- raise InputFormatError(filename, line_number,
- 'Section {} is not recognized',
- str(current_section)[1:])
- elif level == 0:
- if current_section is None:
- raise InputFormatError(filename, line_number,
- 'Missing section title at the beginning of the file')
- self.section_content[current_section].append(line)
- else:
- raise InputFormatError(filename, line_number,
- 'Only level 3 headers (###) are permitted')
+ self.add_categories_from_text(input_stream.name, 1,
+ input_stream.read(), False)
def write(self, filename):
"""Write the changelog to the specified file.
"""
with open(filename, 'wb') as out:
- for line in self.header:
- out.write(line)
- for section, lines in self.section_content.items():
- if not lines:
+ out.write(self.header)
+ out.write(self.top_version_title)
+ for title, body in self.categories.items():
+ if not body:
continue
- out.write(b'### ' + section + b'\n\n')
- for line in lines:
- out.write(line)
- out.write(b'\n')
- for line in self.trailer:
- out.write(line)
+ out.write(self.format.format_category(title, body))
+ out.write(self.trailer)
@functools.total_ordering
@@ -403,7 +425,7 @@
"Oldest" is defined by `EntryFileSortKey`.
"""
- files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
+ files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
files_to_merge.sort(key=EntryFileSortKey)
return files_to_merge
@@ -416,7 +438,7 @@
Remove the merged entries if options.keep_entries is false.
"""
with open(options.input, 'rb') as input_file:
- changelog = ChangeLog(input_file)
+ changelog = ChangeLog(input_file, TextChangelogFormat)
files_to_merge = list_files_to_merge(options)
if not files_to_merge:
sys.stderr.write('There are no pending changelog entries.\n')
@@ -454,9 +476,9 @@
help='Directory to read entries from'
' (default: ChangeLog.d)')
parser.add_argument('--input', '-i', metavar='FILE',
- default='ChangeLog.md',
+ default='ChangeLog',
help='Existing changelog file to read from and augment'
- ' (default: ChangeLog.md)')
+ ' (default: ChangeLog)')
parser.add_argument('--keep-entries',
action='store_true', dest='keep_entries', default=None,
help='Keep the files containing entries'
@@ -470,7 +492,7 @@
' (default: overwrite the input)')
parser.add_argument('--list-files-only',
action='store_true',
- help=('Only list the files that would be processed'
+ help=('Only list the files that would be processed '
'(with some debugging information)'))
options = parser.parse_args()
set_defaults(options)