| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 | 
|  | 2 |  | 
| Gilles Peskine | 42f384c | 2020-03-27 09:23:38 +0100 | [diff] [blame] | 3 | """Assemble Mbed TLS change log entries into the change log file. | 
| Gilles Peskine | a260796 | 2020-01-28 19:58:17 +0100 | [diff] [blame] | 4 |  | 
|  | 5 | Add changelog entries to the first level-2 section. | 
|  | 6 | Create a new level-2 section for unreleased changes if needed. | 
|  | 7 | Remove the input files unless --keep-entries is specified. | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 8 |  | 
|  | 9 | In each level-3 section, entries are sorted in chronological order | 
|  | 10 | (oldest first). From oldest to newest: | 
|  | 11 | * Merged entry files are sorted according to their merge date (date of | 
|  | 12 | the merge commit that brought the commit that created the file into | 
|  | 13 | the target branch). | 
|  | 14 | * Committed but unmerged entry files are sorted according to the date | 
|  | 15 | of the commit that adds them. | 
|  | 16 | * Uncommitted entry files are sorted according to their modification time. | 
|  | 17 |  | 
|  | 18 | You must run this program from within a git working directory. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 19 | """ | 
|  | 20 |  | 
| Bence Szépkúti | 1e14827 | 2020-08-07 13:07:28 +0200 | [diff] [blame] | 21 | # Copyright The Mbed TLS Contributors | 
| Dave Rodgman | 16799db | 2023-11-02 19:47:20 +0000 | [diff] [blame] | 22 | # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 23 |  | 
|  | 24 | import argparse | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 25 | from collections import OrderedDict, namedtuple | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 26 | import datetime | 
|  | 27 | import functools | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 28 | import glob | 
|  | 29 | import os | 
|  | 30 | import re | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 31 | import subprocess | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 32 | import sys | 
|  | 33 |  | 
|  | 34 | class InputFormatError(Exception): | 
|  | 35 | def __init__(self, filename, line_number, message, *args, **kwargs): | 
| Gilles Peskine | 566407d | 2020-01-22 15:55:36 +0100 | [diff] [blame] | 36 | message = '{}:{}: {}'.format(filename, line_number, | 
|  | 37 | message.format(*args, **kwargs)) | 
|  | 38 | super().__init__(message) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 39 |  | 
| Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 40 | class CategoryParseError(Exception): | 
|  | 41 | def __init__(self, line_offset, error_message): | 
|  | 42 | self.line_offset = line_offset | 
|  | 43 | self.error_message = error_message | 
|  | 44 | super().__init__('{}: {}'.format(line_offset, error_message)) | 
|  | 45 |  | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 46 | class LostContent(Exception): | 
|  | 47 | def __init__(self, filename, line): | 
|  | 48 | message = ('Lost content from {}: "{}"'.format(filename, line)) | 
|  | 49 | super().__init__(message) | 
|  | 50 |  | 
| Dave Rodgman | 3901e2e | 2023-10-02 16:40:57 +0100 | [diff] [blame] | 51 | class FilePathError(Exception): | 
|  | 52 | def __init__(self, filenames): | 
|  | 53 | message = ('Changelog filenames do not end with .txt: {}'.format(", ".join(filenames))) | 
|  | 54 | super().__init__(message) | 
|  | 55 |  | 
| Gilles Peskine | b695d5e | 2020-03-27 20:06:12 +0100 | [diff] [blame] | 56 | # The category names we use in the changelog. | 
|  | 57 | # If you edit this, update ChangeLog.d/README.md. | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 58 | STANDARD_CATEGORIES = ( | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 59 | 'API changes', | 
|  | 60 | 'Default behavior changes', | 
|  | 61 | 'Requirement changes', | 
|  | 62 | 'New deprecations', | 
|  | 63 | 'Removals', | 
|  | 64 | 'Features', | 
|  | 65 | 'Security', | 
|  | 66 | 'Bugfix', | 
|  | 67 | 'Changes', | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 68 | ) | 
|  | 69 |  | 
| Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 70 | # The maximum line length for an entry | 
|  | 71 | MAX_LINE_LENGTH = 80 | 
|  | 72 |  | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 73 | CategoryContent = namedtuple('CategoryContent', [ | 
|  | 74 | 'name', 'title_line', # Title text and line number of the title | 
|  | 75 | 'body', 'body_line', # Body text and starting line number of the body | 
|  | 76 | ]) | 
|  | 77 |  | 
|  | 78 | class ChangelogFormat: | 
|  | 79 | """Virtual class documenting how to write a changelog format class.""" | 
|  | 80 |  | 
|  | 81 | @classmethod | 
|  | 82 | def extract_top_version(cls, changelog_file_content): | 
|  | 83 | """Split out the top version section. | 
|  | 84 |  | 
| Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 85 | If the top version is already released, create a new top | 
|  | 86 | version section for an unreleased version. | 
| Gilles Peskine | dba4de0 | 2020-03-30 11:37:26 +0200 | [diff] [blame] | 87 |  | 
|  | 88 | Return ``(header, top_version_title, top_version_body, trailer)`` | 
|  | 89 | where the "top version" is the existing top version section if it's | 
|  | 90 | for unreleased changes, and a newly created section otherwise. | 
|  | 91 | To assemble the changelog after modifying top_version_body, | 
|  | 92 | concatenate the four pieces. | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 93 | """ | 
|  | 94 | raise NotImplementedError | 
|  | 95 |  | 
|  | 96 | @classmethod | 
|  | 97 | def version_title_text(cls, version_title): | 
|  | 98 | """Return the text of a formatted version section title.""" | 
|  | 99 | raise NotImplementedError | 
|  | 100 |  | 
|  | 101 | @classmethod | 
|  | 102 | def split_categories(cls, version_body): | 
|  | 103 | """Split a changelog version section body into categories. | 
|  | 104 |  | 
|  | 105 | Return a list of `CategoryContent` the name is category title | 
|  | 106 | without any formatting. | 
|  | 107 | """ | 
|  | 108 | raise NotImplementedError | 
|  | 109 |  | 
|  | 110 | @classmethod | 
|  | 111 | def format_category(cls, title, body): | 
|  | 112 | """Construct the text of a category section from its title and body.""" | 
|  | 113 | raise NotImplementedError | 
|  | 114 |  | 
|  | 115 | class TextChangelogFormat(ChangelogFormat): | 
|  | 116 | """The traditional Mbed TLS changelog format.""" | 
|  | 117 |  | 
| Dave Rodgman | ca9556b | 2022-07-11 11:39:21 +0100 | [diff] [blame] | 118 | _unreleased_version_text = '= Mbed TLS x.x.x branch released xxxx-xx-xx' | 
| Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 119 | @classmethod | 
|  | 120 | def is_released_version(cls, title): | 
|  | 121 | # Look for an incomplete release date | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 122 | return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) | 
| Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 123 |  | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 124 | _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 125 | re.DOTALL) | 
|  | 126 | @classmethod | 
|  | 127 | def extract_top_version(cls, changelog_file_content): | 
|  | 128 | """A version section starts with a line starting with '='.""" | 
|  | 129 | m = re.search(cls._top_version_re, changelog_file_content) | 
|  | 130 | top_version_start = m.start(1) | 
|  | 131 | top_version_end = m.end(2) | 
| Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 132 | top_version_title = m.group(1) | 
|  | 133 | top_version_body = m.group(2) | 
|  | 134 | if cls.is_released_version(top_version_title): | 
|  | 135 | top_version_end = top_version_start | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 136 | top_version_title = cls._unreleased_version_text + '\n\n' | 
|  | 137 | top_version_body = '' | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 138 | return (changelog_file_content[:top_version_start], | 
| Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 139 | top_version_title, top_version_body, | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 140 | changelog_file_content[top_version_end:]) | 
|  | 141 |  | 
|  | 142 | @classmethod | 
|  | 143 | def version_title_text(cls, version_title): | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 144 | return re.sub(r'\n.*', version_title, re.DOTALL) | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 145 |  | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 146 | _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE) | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 147 | @classmethod | 
|  | 148 | def split_categories(cls, version_body): | 
|  | 149 | """A category title is a line with the title in column 0.""" | 
| Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 150 | if not version_body: | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 151 | return [] | 
| Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 152 | title_matches = list(re.finditer(cls._category_title_re, version_body)) | 
|  | 153 | if not title_matches or title_matches[0].start() != 0: | 
|  | 154 | # There is junk before the first category. | 
|  | 155 | raise CategoryParseError(0, 'Junk found where category expected') | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 156 | title_starts = [m.start(1) for m in title_matches] | 
|  | 157 | body_starts = [m.end(0) for m in title_matches] | 
|  | 158 | body_ends = title_starts[1:] + [len(version_body)] | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 159 | bodies = [version_body[body_start:body_end].rstrip('\n') + '\n' | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 160 | for (body_start, body_end) in zip(body_starts, body_ends)] | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 161 | title_lines = [version_body[:pos].count('\n') for pos in title_starts] | 
|  | 162 | body_lines = [version_body[:pos].count('\n') for pos in body_starts] | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 163 | return [CategoryContent(title_match.group(1), title_line, | 
|  | 164 | body, body_line) | 
|  | 165 | for title_match, title_line, body, body_line | 
|  | 166 | in zip(title_matches, title_lines, bodies, body_lines)] | 
|  | 167 |  | 
|  | 168 | @classmethod | 
|  | 169 | def format_category(cls, title, body): | 
|  | 170 | # `split_categories` ensures that each body ends with a newline. | 
|  | 171 | # Make sure that there is additionally a blank line between categories. | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 172 | if not body.endswith('\n\n'): | 
|  | 173 | body += '\n' | 
|  | 174 | return title + '\n' + body | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 175 |  | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 176 | class ChangeLog: | 
| Gilles Peskine | 42f384c | 2020-03-27 09:23:38 +0100 | [diff] [blame] | 177 | """An Mbed TLS changelog. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 178 |  | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 179 | A changelog file consists of some header text followed by one or | 
|  | 180 | more version sections. The version sections are in reverse | 
|  | 181 | chronological order. Each version section consists of a title and a body. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 182 |  | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 183 | The body of a version section consists of zero or more category | 
|  | 184 | subsections. Each category subsection consists of a title and a body. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 185 |  | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 186 | A changelog entry file has the same format as the body of a version section. | 
|  | 187 |  | 
|  | 188 | A `ChangelogFormat` object defines the concrete syntax of the changelog. | 
|  | 189 | Entry files must have the same format as the changelog file. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 190 | """ | 
|  | 191 |  | 
| Gilles Peskine | a260796 | 2020-01-28 19:58:17 +0100 | [diff] [blame] | 192 | # Only accept dotted version numbers (e.g. "3.1", not "3"). | 
| Gilles Peskine | afc9db8 | 2020-01-30 11:38:01 +0100 | [diff] [blame] | 193 | # Refuse ".x" in a version number where x is a letter: this indicates | 
|  | 194 | # a version that is not yet released. Something like "3.1a" is accepted. | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 195 | _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+') | 
|  | 196 | _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]') | 
|  | 197 | _only_url_re = re.compile(r'^\s*\w+://\S+\s*$') | 
|  | 198 | _has_url_re = re.compile(r'.*://.*') | 
| Gilles Peskine | a260796 | 2020-01-28 19:58:17 +0100 | [diff] [blame] | 199 |  | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 200 | def add_categories_from_text(self, filename, line_offset, | 
|  | 201 | text, allow_unknown_category): | 
|  | 202 | """Parse a version section or entry file.""" | 
| Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 203 | try: | 
|  | 204 | categories = self.format.split_categories(text) | 
|  | 205 | except CategoryParseError as e: | 
|  | 206 | raise InputFormatError(filename, line_offset + e.line_offset, | 
|  | 207 | e.error_message) | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 208 | for category in categories: | 
|  | 209 | if not allow_unknown_category and \ | 
|  | 210 | category.name not in self.categories: | 
|  | 211 | raise InputFormatError(filename, | 
|  | 212 | line_offset + category.title_line, | 
|  | 213 | 'Unknown category: "{}"', | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 214 | category.name) | 
| Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 215 |  | 
|  | 216 | body_split = category.body.splitlines() | 
| Mateusz Starzyk | 3cfed58 | 2021-03-31 11:09:21 +0200 | [diff] [blame] | 217 |  | 
| Paul Elliott | d75773e | 2021-03-18 18:07:46 +0000 | [diff] [blame] | 218 | for line_number, line in enumerate(body_split, 1): | 
| Mateusz Starzyk | 3cfed58 | 2021-03-31 11:09:21 +0200 | [diff] [blame] | 219 | if not self._only_url_re.match(line) and \ | 
| Mateusz Starzyk | 6e47055 | 2021-03-24 12:13:33 +0100 | [diff] [blame] | 220 | len(line) > MAX_LINE_LENGTH: | 
| Mateusz Starzyk | 9b31ad6 | 2021-03-31 11:18:28 +0200 | [diff] [blame] | 221 | long_url_msg = '. URL exceeding length limit must be alone in its line.' \ | 
|  | 222 | if self._has_url_re.match(line) else "" | 
| Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 223 | raise InputFormatError(filename, | 
| Paul Elliott | d75773e | 2021-03-18 18:07:46 +0000 | [diff] [blame] | 224 | category.body_line + line_number, | 
| Mateusz Starzyk | c8f4489 | 2021-03-25 14:06:50 +0100 | [diff] [blame] | 225 | 'Line is longer than allowed: ' | 
|  | 226 | 'Length {} (Max {}){}', | 
|  | 227 | len(line), MAX_LINE_LENGTH, | 
|  | 228 | long_url_msg) | 
| Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 229 |  | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 230 | self.categories[category.name] += category.body | 
|  | 231 |  | 
|  | 232 | def __init__(self, input_stream, changelog_format): | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 233 | """Create a changelog object. | 
|  | 234 |  | 
| Gilles Peskine | 974232f | 2020-01-22 12:43:29 +0100 | [diff] [blame] | 235 | Populate the changelog object from the content of the file | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 236 | input_stream. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 237 | """ | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 238 | self.format = changelog_format | 
|  | 239 | whole_file = input_stream.read() | 
|  | 240 | (self.header, | 
|  | 241 | self.top_version_title, top_version_body, | 
|  | 242 | self.trailer) = self.format.extract_top_version(whole_file) | 
|  | 243 | # Split the top version section into categories. | 
|  | 244 | self.categories = OrderedDict() | 
|  | 245 | for category in STANDARD_CATEGORIES: | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 246 | self.categories[category] = '' | 
|  | 247 | offset = (self.header + self.top_version_title).count('\n') + 1 | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 248 | self.add_categories_from_text(input_stream.name, offset, | 
|  | 249 | top_version_body, True) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 250 |  | 
|  | 251 | def add_file(self, input_stream): | 
|  | 252 | """Add changelog entries from a file. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 253 | """ | 
| Gilles Peskine | e248e83 | 2020-03-27 19:42:38 +0100 | [diff] [blame] | 254 | self.add_categories_from_text(input_stream.name, 1, | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 255 | input_stream.read(), False) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 256 |  | 
|  | 257 | def write(self, filename): | 
|  | 258 | """Write the changelog to the specified file. | 
|  | 259 | """ | 
| Gilles Peskine | e151e21 | 2021-05-18 14:49:02 +0200 | [diff] [blame] | 260 | with open(filename, 'w', encoding='utf-8') as out: | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 261 | out.write(self.header) | 
|  | 262 | out.write(self.top_version_title) | 
|  | 263 | for title, body in self.categories.items(): | 
|  | 264 | if not body: | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 265 | continue | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 266 | out.write(self.format.format_category(title, body)) | 
|  | 267 | out.write(self.trailer) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 268 |  | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 269 |  | 
|  | 270 | @functools.total_ordering | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 271 | class EntryFileSortKey: | 
|  | 272 | """This classes defines an ordering on changelog entry files: older < newer. | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 273 |  | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 274 | * Merged entry files are sorted according to their merge date (date of | 
|  | 275 | the merge commit that brought the commit that created the file into | 
|  | 276 | the target branch). | 
|  | 277 | * Committed but unmerged entry files are sorted according to the date | 
|  | 278 | of the commit that adds them. | 
|  | 279 | * Uncommitted entry files are sorted according to their modification time. | 
|  | 280 |  | 
|  | 281 | This class assumes that the file is in a git working directory with | 
|  | 282 | the target branch checked out. | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 283 | """ | 
|  | 284 |  | 
|  | 285 | # Categories of files. A lower number is considered older. | 
|  | 286 | MERGED = 0 | 
|  | 287 | COMMITTED = 1 | 
|  | 288 | LOCAL = 2 | 
|  | 289 |  | 
|  | 290 | @staticmethod | 
|  | 291 | def creation_hash(filename): | 
|  | 292 | """Return the git commit id at which the given file was created. | 
|  | 293 |  | 
|  | 294 | Return None if the file was never checked into git. | 
|  | 295 | """ | 
| Gilles Peskine | 98a53aa | 2020-03-26 22:47:07 +0100 | [diff] [blame] | 296 | hashes = subprocess.check_output(['git', 'log', '--format=%H', | 
|  | 297 | '--follow', | 
|  | 298 | '--', filename]) | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 299 | m = re.search('(.+)$', hashes.decode('ascii')) | 
| Gilles Peskine | 13dc634 | 2020-03-26 22:46:47 +0100 | [diff] [blame] | 300 | if not m: | 
|  | 301 | # The git output is empty. This means that the file was | 
|  | 302 | # never checked in. | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 303 | return None | 
| Gilles Peskine | 13dc634 | 2020-03-26 22:46:47 +0100 | [diff] [blame] | 304 | # The last commit in the log is the oldest one, which is when the | 
|  | 305 | # file was created. | 
|  | 306 | return m.group(0) | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 307 |  | 
|  | 308 | @staticmethod | 
|  | 309 | def list_merges(some_hash, target, *options): | 
|  | 310 | """List merge commits from some_hash to target. | 
|  | 311 |  | 
|  | 312 | Pass options to git to select which commits are included. | 
|  | 313 | """ | 
|  | 314 | text = subprocess.check_output(['git', 'rev-list', | 
|  | 315 | '--merges', *options, | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 316 | '..'.join([some_hash, target])]) | 
|  | 317 | return text.decode('ascii').rstrip('\n').split('\n') | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 318 |  | 
|  | 319 | @classmethod | 
|  | 320 | def merge_hash(cls, some_hash): | 
|  | 321 | """Return the git commit id at which the given commit was merged. | 
|  | 322 |  | 
|  | 323 | Return None if the given commit was never merged. | 
|  | 324 | """ | 
| Gilles Peskine | 7261fff | 2021-05-18 14:39:40 +0200 | [diff] [blame] | 325 | target = 'HEAD' | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 326 | # List the merges from some_hash to the target in two ways. | 
|  | 327 | # The ancestry list is the ones that are both descendants of | 
|  | 328 | # some_hash and ancestors of the target. | 
|  | 329 | ancestry = frozenset(cls.list_merges(some_hash, target, | 
|  | 330 | '--ancestry-path')) | 
|  | 331 | # The first_parents list only contains merges that are directly | 
|  | 332 | # on the target branch. We want it in reverse order (oldest first). | 
|  | 333 | first_parents = cls.list_merges(some_hash, target, | 
|  | 334 | '--first-parent', '--reverse') | 
|  | 335 | # Look for the oldest merge commit that's both on the direct path | 
|  | 336 | # and directly on the target branch. That's the place where some_hash | 
|  | 337 | # was merged on the target branch. See | 
|  | 338 | # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit | 
|  | 339 | for commit in first_parents: | 
|  | 340 | if commit in ancestry: | 
|  | 341 | return commit | 
|  | 342 | return None | 
|  | 343 |  | 
|  | 344 | @staticmethod | 
|  | 345 | def commit_timestamp(commit_id): | 
| Gilles Peskine | ac0f086 | 2020-03-27 10:56:45 +0100 | [diff] [blame] | 346 | """Return the timestamp of the given commit.""" | 
|  | 347 | text = subprocess.check_output(['git', 'show', '-s', | 
|  | 348 | '--format=%ct', | 
|  | 349 | commit_id]) | 
|  | 350 | return datetime.datetime.utcfromtimestamp(int(text)) | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 351 |  | 
|  | 352 | @staticmethod | 
|  | 353 | def file_timestamp(filename): | 
|  | 354 | """Return the modification timestamp of the given file.""" | 
|  | 355 | mtime = os.stat(filename).st_mtime | 
|  | 356 | return datetime.datetime.fromtimestamp(mtime) | 
|  | 357 |  | 
|  | 358 | def __init__(self, filename): | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 359 | """Determine position of the file in the changelog entry order. | 
|  | 360 |  | 
|  | 361 | This constructor returns an object that can be used with comparison | 
|  | 362 | operators, with `sort` and `sorted`, etc. Older entries are sorted | 
|  | 363 | before newer entries. | 
|  | 364 | """ | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 365 | self.filename = filename | 
|  | 366 | creation_hash = self.creation_hash(filename) | 
|  | 367 | if not creation_hash: | 
|  | 368 | self.category = self.LOCAL | 
|  | 369 | self.datetime = self.file_timestamp(filename) | 
|  | 370 | return | 
|  | 371 | merge_hash = self.merge_hash(creation_hash) | 
|  | 372 | if not merge_hash: | 
|  | 373 | self.category = self.COMMITTED | 
|  | 374 | self.datetime = self.commit_timestamp(creation_hash) | 
|  | 375 | return | 
|  | 376 | self.category = self.MERGED | 
|  | 377 | self.datetime = self.commit_timestamp(merge_hash) | 
|  | 378 |  | 
|  | 379 | def sort_key(self): | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 380 | """"Return a concrete sort key for this entry file sort key object. | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 381 |  | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 382 | ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 383 | """ | 
|  | 384 | return (self.category, self.datetime, self.filename) | 
|  | 385 |  | 
|  | 386 | def __eq__(self, other): | 
|  | 387 | return self.sort_key() == other.sort_key() | 
|  | 388 |  | 
|  | 389 | def __lt__(self, other): | 
|  | 390 | return self.sort_key() < other.sort_key() | 
|  | 391 |  | 
|  | 392 |  | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 393 | def check_output(generated_output_file, main_input_file, merged_files): | 
|  | 394 | """Make sanity checks on the generated output. | 
|  | 395 |  | 
|  | 396 | The intent of these sanity checks is to have reasonable confidence | 
|  | 397 | that no content has been lost. | 
|  | 398 |  | 
|  | 399 | The sanity check is that every line that is present in an input file | 
|  | 400 | is also present in an output file. This is not perfect but good enough | 
|  | 401 | for now. | 
|  | 402 | """ | 
| Gilles Peskine | dcf2ff5 | 2022-03-04 20:02:00 +0100 | [diff] [blame] | 403 | with open(generated_output_file, 'r', encoding='utf-8') as fd: | 
|  | 404 | generated_output = set(fd) | 
|  | 405 | for line in open(main_input_file, 'r', encoding='utf-8'): | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 406 | if line not in generated_output: | 
| Gilles Peskine | dcf2ff5 | 2022-03-04 20:02:00 +0100 | [diff] [blame] | 407 | raise LostContent('original file', line) | 
|  | 408 | for merged_file in merged_files: | 
|  | 409 | for line in open(merged_file, 'r', encoding='utf-8'): | 
|  | 410 | if line not in generated_output: | 
|  | 411 | raise LostContent(merged_file, line) | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 412 |  | 
|  | 413 | def finish_output(changelog, output_file, input_file, merged_files): | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 414 | """Write the changelog to the output file. | 
|  | 415 |  | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 416 | The input file and the list of merged files are used only for sanity | 
|  | 417 | checks on the output. | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 418 | """ | 
|  | 419 | if os.path.exists(output_file) and not os.path.isfile(output_file): | 
|  | 420 | # The output is a non-regular file (e.g. pipe). Write to it directly. | 
|  | 421 | output_temp = output_file | 
|  | 422 | else: | 
|  | 423 | # The output is a regular file. Write to a temporary file, | 
|  | 424 | # then move it into place atomically. | 
|  | 425 | output_temp = output_file + '.tmp' | 
|  | 426 | changelog.write(output_temp) | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 427 | check_output(output_temp, input_file, merged_files) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 428 | if output_temp != output_file: | 
|  | 429 | os.rename(output_temp, output_file) | 
|  | 430 |  | 
| Gilles Peskine | 5e39c9e | 2020-01-22 14:55:37 +0100 | [diff] [blame] | 431 | def remove_merged_entries(files_to_remove): | 
|  | 432 | for filename in files_to_remove: | 
|  | 433 | os.remove(filename) | 
|  | 434 |  | 
| Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 435 | def list_files_to_merge(options): | 
|  | 436 | """List the entry files to merge, oldest first. | 
|  | 437 |  | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 438 | "Oldest" is defined by `EntryFileSortKey`. | 
| Dave Rodgman | 65d8ec1 | 2023-10-02 17:19:51 +0100 | [diff] [blame] | 439 |  | 
|  | 440 | Also check for required .txt extension | 
| Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 441 | """ | 
| Dave Rodgman | 65d8ec1 | 2023-10-02 17:19:51 +0100 | [diff] [blame] | 442 | files_to_merge = glob.glob(os.path.join(options.dir, '*')) | 
|  | 443 |  | 
|  | 444 | # Ignore 00README.md | 
|  | 445 | readme = os.path.join(options.dir, "00README.md") | 
|  | 446 | if readme in files_to_merge: | 
|  | 447 | files_to_merge.remove(readme) | 
|  | 448 |  | 
|  | 449 | # Identify files without the required .txt extension | 
|  | 450 | bad_files = [x for x in files_to_merge if not x.endswith(".txt")] | 
|  | 451 | if bad_files: | 
|  | 452 | raise FilePathError(bad_files) | 
|  | 453 |  | 
| Gilles Peskine | 7fa3eb7 | 2020-03-26 22:41:32 +0100 | [diff] [blame] | 454 | files_to_merge.sort(key=EntryFileSortKey) | 
| Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 455 | return files_to_merge | 
|  | 456 |  | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 457 | def merge_entries(options): | 
|  | 458 | """Merge changelog entries into the changelog file. | 
|  | 459 |  | 
|  | 460 | Read the changelog file from options.input. | 
| Dave Rodgman | 3901e2e | 2023-10-02 16:40:57 +0100 | [diff] [blame] | 461 | Check that all entries have a .txt extension | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 462 | Read entries to merge from the directory options.dir. | 
|  | 463 | Write the new changelog to options.output. | 
|  | 464 | Remove the merged entries if options.keep_entries is false. | 
|  | 465 | """ | 
| Gilles Peskine | e151e21 | 2021-05-18 14:49:02 +0200 | [diff] [blame] | 466 | with open(options.input, 'r', encoding='utf-8') as input_file: | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 467 | changelog = ChangeLog(input_file, TextChangelogFormat) | 
| Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 468 | files_to_merge = list_files_to_merge(options) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 469 | if not files_to_merge: | 
|  | 470 | sys.stderr.write('There are no pending changelog entries.\n') | 
|  | 471 | return | 
|  | 472 | for filename in files_to_merge: | 
| Gilles Peskine | e151e21 | 2021-05-18 14:49:02 +0200 | [diff] [blame] | 473 | with open(filename, 'r', encoding='utf-8') as input_file: | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 474 | changelog.add_file(input_file) | 
| Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 475 | finish_output(changelog, options.output, options.input, files_to_merge) | 
| Gilles Peskine | 5e39c9e | 2020-01-22 14:55:37 +0100 | [diff] [blame] | 476 | if not options.keep_entries: | 
|  | 477 | remove_merged_entries(files_to_merge) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 478 |  | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 479 | def show_file_timestamps(options): | 
|  | 480 | """List the files to merge and their timestamp. | 
|  | 481 |  | 
|  | 482 | This is only intended for debugging purposes. | 
|  | 483 | """ | 
|  | 484 | files = list_files_to_merge(options) | 
|  | 485 | for filename in files: | 
| Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 486 | ts = EntryFileSortKey(filename) | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 487 | print(ts.category, ts.datetime, filename) | 
|  | 488 |  | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 489 | def set_defaults(options): | 
|  | 490 | """Add default values for missing options.""" | 
|  | 491 | output_file = getattr(options, 'output', None) | 
|  | 492 | if output_file is None: | 
|  | 493 | options.output = options.input | 
|  | 494 | if getattr(options, 'keep_entries', None) is None: | 
|  | 495 | options.keep_entries = (output_file is not None) | 
|  | 496 |  | 
|  | 497 | def main(): | 
|  | 498 | """Command line entry point.""" | 
|  | 499 | parser = argparse.ArgumentParser(description=__doc__) | 
|  | 500 | parser.add_argument('--dir', '-d', metavar='DIR', | 
|  | 501 | default='ChangeLog.d', | 
| Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 502 | help='Directory to read entries from' | 
|  | 503 | ' (default: ChangeLog.d)') | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 504 | parser.add_argument('--input', '-i', metavar='FILE', | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 505 | default='ChangeLog', | 
| Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 506 | help='Existing changelog file to read from and augment' | 
| Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 507 | ' (default: ChangeLog)') | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 508 | parser.add_argument('--keep-entries', | 
|  | 509 | action='store_true', dest='keep_entries', default=None, | 
| Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 510 | help='Keep the files containing entries' | 
|  | 511 | ' (default: remove them if --output/-o is not specified)') | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 512 | parser.add_argument('--no-keep-entries', | 
|  | 513 | action='store_false', dest='keep_entries', | 
| Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 514 | help='Remove the files containing entries after they are merged' | 
|  | 515 | ' (default: remove them if --output/-o is not specified)') | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 516 | parser.add_argument('--output', '-o', metavar='FILE', | 
| Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 517 | help='Output changelog file' | 
|  | 518 | ' (default: overwrite the input)') | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 519 | parser.add_argument('--list-files-only', | 
|  | 520 | action='store_true', | 
| Gilles Peskine | c68c7c8 | 2020-03-27 19:01:35 +0100 | [diff] [blame] | 521 | help=('Only list the files that would be processed ' | 
| Gilles Peskine | ac0f086 | 2020-03-27 10:56:45 +0100 | [diff] [blame] | 522 | '(with some debugging information)')) | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 523 | options = parser.parse_args() | 
|  | 524 | set_defaults(options) | 
| Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 525 | if options.list_files_only: | 
|  | 526 | show_file_timestamps(options) | 
|  | 527 | return | 
| Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 528 | merge_entries(options) | 
|  | 529 |  | 
|  | 530 | if __name__ == '__main__': | 
|  | 531 | main() |