|  | #!/usr/bin/env python3 | 
|  |  | 
|  | """Assemble Mbed TLS change log entries into the change log file. | 
|  |  | 
|  | Add changelog entries to the first level-2 section. | 
|  | Create a new level-2 section for unreleased changes if needed. | 
|  | Remove the input files unless --keep-entries is specified. | 
|  |  | 
|  | In each level-3 section, entries are sorted in chronological order | 
|  | (oldest first). From oldest to newest: | 
|  | * Merged entry files are sorted according to their merge date (date of | 
|  | the merge commit that brought the commit that created the file into | 
|  | the target branch). | 
|  | * Committed but unmerged entry files are sorted according to the date | 
|  | of the commit that adds them. | 
|  | * Uncommitted entry files are sorted according to their modification time. | 
|  |  | 
|  | You must run this program from within a git working directory. | 
|  | """ | 
|  |  | 
|  | # Copyright (C) 2019, Arm Limited, All Rights Reserved | 
|  | # SPDX-License-Identifier: Apache-2.0 | 
|  | # | 
|  | # Licensed under the Apache License, Version 2.0 (the "License"); you may | 
|  | # not use this file except in compliance with the License. | 
|  | # You may obtain a copy of the License at | 
|  | # | 
|  | # http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | 
|  | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  | # | 
|  | # This file is part of Mbed TLS (https://tls.mbed.org) | 
|  |  | 
|  | import argparse | 
|  | from collections import OrderedDict, namedtuple | 
|  | import datetime | 
|  | import functools | 
|  | import glob | 
|  | import os | 
|  | import re | 
|  | import subprocess | 
|  | import sys | 
|  |  | 
|  | class InputFormatError(Exception): | 
|  | def __init__(self, filename, line_number, message, *args, **kwargs): | 
|  | message = '{}:{}: {}'.format(filename, line_number, | 
|  | message.format(*args, **kwargs)) | 
|  | super().__init__(message) | 
|  |  | 
|  | class LostContent(Exception): | 
|  | def __init__(self, filename, line): | 
|  | message = ('Lost content from {}: "{}"'.format(filename, line)) | 
|  | super().__init__(message) | 
|  |  | 
|  | STANDARD_CATEGORIES = ( | 
|  | b'API changes', | 
|  | b'Default behavior changes', | 
|  | b'Requirement changes', | 
|  | b'New deprecations', | 
|  | b'Removals', | 
|  | b'Features', | 
|  | b'Security', | 
|  | b'Bugfix', | 
|  | b'Changes', | 
|  | ) | 
|  |  | 
|  | CategoryContent = namedtuple('CategoryContent', [ | 
|  | 'name', 'title_line', # Title text and line number of the title | 
|  | 'body', 'body_line', # Body text and starting line number of the body | 
|  | ]) | 
|  |  | 
|  | class ChangelogFormat: | 
|  | """Virtual class documenting how to write a changelog format class.""" | 
|  |  | 
|  | @classmethod | 
|  | def extract_top_version(cls, changelog_file_content): | 
|  | """Split out the top version section. | 
|  |  | 
|  | Return ``(header, top_version_title, top_version_body, trailer)`` | 
|  | where ``changelog_file_content == header + top_version_title + | 
|  | top_version_body + trailer``. | 
|  | """ | 
|  | raise NotImplementedError | 
|  |  | 
|  | @classmethod | 
|  | def version_title_text(cls, version_title): | 
|  | """Return the text of a formatted version section title.""" | 
|  | raise NotImplementedError | 
|  |  | 
|  | @classmethod | 
|  | def split_categories(cls, version_body): | 
|  | """Split a changelog version section body into categories. | 
|  |  | 
|  | Return a list of `CategoryContent` the name is category title | 
|  | without any formatting. | 
|  | """ | 
|  | raise NotImplementedError | 
|  |  | 
|  | @classmethod | 
|  | def format_category(cls, title, body): | 
|  | """Construct the text of a category section from its title and body.""" | 
|  | raise NotImplementedError | 
|  |  | 
|  | class TextChangelogFormat(ChangelogFormat): | 
|  | """The traditional Mbed TLS changelog format.""" | 
|  |  | 
|  | _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', | 
|  | re.DOTALL) | 
|  | @classmethod | 
|  | def extract_top_version(cls, changelog_file_content): | 
|  | """A version section starts with a line starting with '='.""" | 
|  | m = re.search(cls._top_version_re, changelog_file_content) | 
|  | top_version_start = m.start(1) | 
|  | top_version_end = m.end(2) | 
|  | return (changelog_file_content[:top_version_start], | 
|  | m.group(1), m.group(2), | 
|  | changelog_file_content[top_version_end:]) | 
|  |  | 
|  | @classmethod | 
|  | def version_title_text(cls, version_title): | 
|  | return re.sub(br'\n.*', version_title, re.DOTALL) | 
|  |  | 
|  | _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE) | 
|  | @classmethod | 
|  | def split_categories(cls, version_body): | 
|  | """A category title is a line with the title in column 0.""" | 
|  | title_matches = list(re.finditer(cls._category_title_re, version_body)) | 
|  | if not title_matches: | 
|  | return [] | 
|  | title_starts = [m.start(1) for m in title_matches] | 
|  | body_starts = [m.end(0) for m in title_matches] | 
|  | body_ends = title_starts[1:] + [len(version_body)] | 
|  | bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n' | 
|  | for (body_start, body_end) in zip(body_starts, body_ends)] | 
|  | title_lines = [version_body[:pos].count(b'\n') for pos in title_starts] | 
|  | body_lines = [version_body[:pos].count(b'\n') for pos in body_starts] | 
|  | return [CategoryContent(title_match.group(1), title_line, | 
|  | body, body_line) | 
|  | for title_match, title_line, body, body_line | 
|  | in zip(title_matches, title_lines, bodies, body_lines)] | 
|  |  | 
|  | @classmethod | 
|  | def format_category(cls, title, body): | 
|  | # `split_categories` ensures that each body ends with a newline. | 
|  | # Make sure that there is additionally a blank line between categories. | 
|  | if not body.endswith(b'\n\n'): | 
|  | body += b'\n' | 
|  | return title + b'\n' + body | 
|  |  | 
|  | class ChangeLog: | 
|  | """An Mbed TLS changelog. | 
|  |  | 
|  | A changelog file consists of some header text followed by one or | 
|  | more version sections. The version sections are in reverse | 
|  | chronological order. Each version section consists of a title and a body. | 
|  |  | 
|  | The body of a version section consists of zero or more category | 
|  | subsections. Each category subsection consists of a title and a body. | 
|  |  | 
|  | A changelog entry file has the same format as the body of a version section. | 
|  |  | 
|  | A `ChangelogFormat` object defines the concrete syntax of the changelog. | 
|  | Entry files must have the same format as the changelog file. | 
|  | """ | 
|  |  | 
|  | # Only accept dotted version numbers (e.g. "3.1", not "3"). | 
|  | # Refuse ".x" in a version number where x is a letter: this indicates | 
|  | # a version that is not yet released. Something like "3.1a" is accepted. | 
|  | _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+') | 
|  | _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]') | 
|  |  | 
|  | def section_is_released_version(self, title): | 
|  | """Whether this section is for a released version. | 
|  |  | 
|  | True if the given level-2 section title indicates that this section | 
|  | contains released changes, otherwise False. | 
|  | """ | 
|  | # Assume that a released version has a numerical version number | 
|  | # that follows a particular pattern. These criteria may be revised | 
|  | # as needed in future versions of this script. | 
|  | version_number = re.search(self._version_number_re, title) | 
|  | if version_number: | 
|  | return not re.search(self._incomplete_version_number_re, | 
|  | version_number.group(0)) | 
|  | else: | 
|  | return False | 
|  |  | 
|  | def unreleased_version_title(self): | 
|  | """The title to use if creating a new section for an unreleased version.""" | 
|  | # pylint: disable=no-self-use; this method may be overridden | 
|  | return b'Unreleased changes' | 
|  |  | 
|  | def add_categories_from_text(self, filename, line_offset, | 
|  | text, allow_unknown_category): | 
|  | """Parse a version section or entry file.""" | 
|  | categories = self.format.split_categories(text) | 
|  | for category in categories: | 
|  | if not allow_unknown_category and \ | 
|  | category.name not in self.categories: | 
|  | raise InputFormatError(filename, | 
|  | line_offset + category.title_line, | 
|  | 'Unknown category: "{}"', | 
|  | category.name.decode('utf8')) | 
|  | self.categories[category.name] += category.body | 
|  |  | 
|  | def __init__(self, input_stream, changelog_format): | 
|  | """Create a changelog object. | 
|  |  | 
|  | Populate the changelog object from the content of the file | 
|  | input_stream. | 
|  | """ | 
|  | self.format = changelog_format | 
|  | whole_file = input_stream.read() | 
|  | (self.header, | 
|  | self.top_version_title, top_version_body, | 
|  | self.trailer) = self.format.extract_top_version(whole_file) | 
|  | # Split the top version section into categories. | 
|  | self.categories = OrderedDict() | 
|  | for category in STANDARD_CATEGORIES: | 
|  | self.categories[category] = b'' | 
|  | offset = (self.header + self.top_version_title).count(b'\n') | 
|  | self.add_categories_from_text(input_stream.name, offset, | 
|  | top_version_body, True) | 
|  |  | 
|  | def add_file(self, input_stream): | 
|  | """Add changelog entries from a file. | 
|  | """ | 
|  | self.add_categories_from_text(input_stream.name, 0, | 
|  | input_stream.read(), False) | 
|  |  | 
|  | def write(self, filename): | 
|  | """Write the changelog to the specified file. | 
|  | """ | 
|  | with open(filename, 'wb') as out: | 
|  | out.write(self.header) | 
|  | out.write(self.top_version_title) | 
|  | for title, body in self.categories.items(): | 
|  | if not body: | 
|  | continue | 
|  | out.write(self.format.format_category(title, body)) | 
|  | out.write(self.trailer) | 
|  |  | 
|  |  | 
|  | @functools.total_ordering | 
|  | class EntryFileSortKey: | 
|  | """This classes defines an ordering on changelog entry files: older < newer. | 
|  |  | 
|  | * Merged entry files are sorted according to their merge date (date of | 
|  | the merge commit that brought the commit that created the file into | 
|  | the target branch). | 
|  | * Committed but unmerged entry files are sorted according to the date | 
|  | of the commit that adds them. | 
|  | * Uncommitted entry files are sorted according to their modification time. | 
|  |  | 
|  | This class assumes that the file is in a git working directory with | 
|  | the target branch checked out. | 
|  | """ | 
|  |  | 
|  | # Categories of files. A lower number is considered older. | 
|  | MERGED = 0 | 
|  | COMMITTED = 1 | 
|  | LOCAL = 2 | 
|  |  | 
|  | @staticmethod | 
|  | def creation_hash(filename): | 
|  | """Return the git commit id at which the given file was created. | 
|  |  | 
|  | Return None if the file was never checked into git. | 
|  | """ | 
|  | hashes = subprocess.check_output(['git', 'log', '--format=%H', | 
|  | '--follow', | 
|  | '--', filename]) | 
|  | m = re.search(b'(.+)$', hashes) | 
|  | if not m: | 
|  | # The git output is empty. This means that the file was | 
|  | # never checked in. | 
|  | return None | 
|  | # The last commit in the log is the oldest one, which is when the | 
|  | # file was created. | 
|  | return m.group(0) | 
|  |  | 
|  | @staticmethod | 
|  | def list_merges(some_hash, target, *options): | 
|  | """List merge commits from some_hash to target. | 
|  |  | 
|  | Pass options to git to select which commits are included. | 
|  | """ | 
|  | text = subprocess.check_output(['git', 'rev-list', | 
|  | '--merges', *options, | 
|  | b'..'.join([some_hash, target])]) | 
|  | return text.rstrip(b'\n').split(b'\n') | 
|  |  | 
|  | @classmethod | 
|  | def merge_hash(cls, some_hash): | 
|  | """Return the git commit id at which the given commit was merged. | 
|  |  | 
|  | Return None if the given commit was never merged. | 
|  | """ | 
|  | target = b'HEAD' | 
|  | # List the merges from some_hash to the target in two ways. | 
|  | # The ancestry list is the ones that are both descendants of | 
|  | # some_hash and ancestors of the target. | 
|  | ancestry = frozenset(cls.list_merges(some_hash, target, | 
|  | '--ancestry-path')) | 
|  | # The first_parents list only contains merges that are directly | 
|  | # on the target branch. We want it in reverse order (oldest first). | 
|  | first_parents = cls.list_merges(some_hash, target, | 
|  | '--first-parent', '--reverse') | 
|  | # Look for the oldest merge commit that's both on the direct path | 
|  | # and directly on the target branch. That's the place where some_hash | 
|  | # was merged on the target branch. See | 
|  | # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit | 
|  | for commit in first_parents: | 
|  | if commit in ancestry: | 
|  | return commit | 
|  | return None | 
|  |  | 
|  | @staticmethod | 
|  | def commit_timestamp(commit_id): | 
|  | """Return the timestamp of the given commit.""" | 
|  | text = subprocess.check_output(['git', 'show', '-s', | 
|  | '--format=%ct', | 
|  | commit_id]) | 
|  | return datetime.datetime.utcfromtimestamp(int(text)) | 
|  |  | 
|  | @staticmethod | 
|  | def file_timestamp(filename): | 
|  | """Return the modification timestamp of the given file.""" | 
|  | mtime = os.stat(filename).st_mtime | 
|  | return datetime.datetime.fromtimestamp(mtime) | 
|  |  | 
|  | def __init__(self, filename): | 
|  | """Determine position of the file in the changelog entry order. | 
|  |  | 
|  | This constructor returns an object that can be used with comparison | 
|  | operators, with `sort` and `sorted`, etc. Older entries are sorted | 
|  | before newer entries. | 
|  | """ | 
|  | self.filename = filename | 
|  | creation_hash = self.creation_hash(filename) | 
|  | if not creation_hash: | 
|  | self.category = self.LOCAL | 
|  | self.datetime = self.file_timestamp(filename) | 
|  | return | 
|  | merge_hash = self.merge_hash(creation_hash) | 
|  | if not merge_hash: | 
|  | self.category = self.COMMITTED | 
|  | self.datetime = self.commit_timestamp(creation_hash) | 
|  | return | 
|  | self.category = self.MERGED | 
|  | self.datetime = self.commit_timestamp(merge_hash) | 
|  |  | 
|  | def sort_key(self): | 
|  | """"Return a concrete sort key for this entry file sort key object. | 
|  |  | 
|  | ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. | 
|  | """ | 
|  | return (self.category, self.datetime, self.filename) | 
|  |  | 
|  | def __eq__(self, other): | 
|  | return self.sort_key() == other.sort_key() | 
|  |  | 
|  | def __lt__(self, other): | 
|  | return self.sort_key() < other.sort_key() | 
|  |  | 
|  |  | 
|  | def check_output(generated_output_file, main_input_file, merged_files): | 
|  | """Make sanity checks on the generated output. | 
|  |  | 
|  | The intent of these sanity checks is to have reasonable confidence | 
|  | that no content has been lost. | 
|  |  | 
|  | The sanity check is that every line that is present in an input file | 
|  | is also present in an output file. This is not perfect but good enough | 
|  | for now. | 
|  | """ | 
|  | generated_output = set(open(generated_output_file, 'rb')) | 
|  | for line in open(main_input_file, 'rb'): | 
|  | if line not in generated_output: | 
|  | raise LostContent('original file', line) | 
|  | for merged_file in merged_files: | 
|  | for line in open(merged_file, 'rb'): | 
|  | if line not in generated_output: | 
|  | raise LostContent(merged_file, line) | 
|  |  | 
|  | def finish_output(changelog, output_file, input_file, merged_files): | 
|  | """Write the changelog to the output file. | 
|  |  | 
|  | The input file and the list of merged files are used only for sanity | 
|  | checks on the output. | 
|  | """ | 
|  | if os.path.exists(output_file) and not os.path.isfile(output_file): | 
|  | # The output is a non-regular file (e.g. pipe). Write to it directly. | 
|  | output_temp = output_file | 
|  | else: | 
|  | # The output is a regular file. Write to a temporary file, | 
|  | # then move it into place atomically. | 
|  | output_temp = output_file + '.tmp' | 
|  | changelog.write(output_temp) | 
|  | check_output(output_temp, input_file, merged_files) | 
|  | if output_temp != output_file: | 
|  | os.rename(output_temp, output_file) | 
|  |  | 
|  | def remove_merged_entries(files_to_remove): | 
|  | for filename in files_to_remove: | 
|  | os.remove(filename) | 
|  |  | 
|  | def list_files_to_merge(options): | 
|  | """List the entry files to merge, oldest first. | 
|  |  | 
|  | "Oldest" is defined by `EntryFileSortKey`. | 
|  | """ | 
|  | files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) | 
|  | files_to_merge.sort(key=EntryFileSortKey) | 
|  | return files_to_merge | 
|  |  | 
|  | def merge_entries(options): | 
|  | """Merge changelog entries into the changelog file. | 
|  |  | 
|  | Read the changelog file from options.input. | 
|  | Read entries to merge from the directory options.dir. | 
|  | Write the new changelog to options.output. | 
|  | Remove the merged entries if options.keep_entries is false. | 
|  | """ | 
|  | with open(options.input, 'rb') as input_file: | 
|  | changelog = ChangeLog(input_file, TextChangelogFormat) | 
|  | files_to_merge = list_files_to_merge(options) | 
|  | if not files_to_merge: | 
|  | sys.stderr.write('There are no pending changelog entries.\n') | 
|  | return | 
|  | for filename in files_to_merge: | 
|  | with open(filename, 'rb') as input_file: | 
|  | changelog.add_file(input_file) | 
|  | finish_output(changelog, options.output, options.input, files_to_merge) | 
|  | if not options.keep_entries: | 
|  | remove_merged_entries(files_to_merge) | 
|  |  | 
|  | def show_file_timestamps(options): | 
|  | """List the files to merge and their timestamp. | 
|  |  | 
|  | This is only intended for debugging purposes. | 
|  | """ | 
|  | files = list_files_to_merge(options) | 
|  | for filename in files: | 
|  | ts = EntryFileSortKey(filename) | 
|  | print(ts.category, ts.datetime, filename) | 
|  |  | 
|  | def set_defaults(options): | 
|  | """Add default values for missing options.""" | 
|  | output_file = getattr(options, 'output', None) | 
|  | if output_file is None: | 
|  | options.output = options.input | 
|  | if getattr(options, 'keep_entries', None) is None: | 
|  | options.keep_entries = (output_file is not None) | 
|  |  | 
|  | def main(): | 
|  | """Command line entry point.""" | 
|  | parser = argparse.ArgumentParser(description=__doc__) | 
|  | parser.add_argument('--dir', '-d', metavar='DIR', | 
|  | default='ChangeLog.d', | 
|  | help='Directory to read entries from' | 
|  | ' (default: ChangeLog.d)') | 
|  | parser.add_argument('--input', '-i', metavar='FILE', | 
|  | default='ChangeLog', | 
|  | help='Existing changelog file to read from and augment' | 
|  | ' (default: ChangeLog)') | 
|  | parser.add_argument('--keep-entries', | 
|  | action='store_true', dest='keep_entries', default=None, | 
|  | help='Keep the files containing entries' | 
|  | ' (default: remove them if --output/-o is not specified)') | 
|  | parser.add_argument('--no-keep-entries', | 
|  | action='store_false', dest='keep_entries', | 
|  | help='Remove the files containing entries after they are merged' | 
|  | ' (default: remove them if --output/-o is not specified)') | 
|  | parser.add_argument('--output', '-o', metavar='FILE', | 
|  | help='Output changelog file' | 
|  | ' (default: overwrite the input)') | 
|  | parser.add_argument('--list-files-only', | 
|  | action='store_true', | 
|  | help=('Only list the files that would be processed ' | 
|  | '(with some debugging information)')) | 
|  | options = parser.parse_args() | 
|  | set_defaults(options) | 
|  | if options.list_files_only: | 
|  | show_file_timestamps(options) | 
|  | return | 
|  | merge_entries(options) | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main() |