]>
Commit | Line | Data |
---|---|---|
d400e261 SS |
1 | from __future__ import annotations |
2 | ||
392389b7 | 3 | # Allow direct execution |
4 | import os | |
5 | import sys | |
6 | ||
7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
8 | ||
d400e261 SS |
9 | import enum |
10 | import itertools | |
11 | import json | |
12 | import logging | |
13 | import re | |
d400e261 SS |
14 | from collections import defaultdict |
15 | from dataclasses import dataclass | |
16 | from functools import lru_cache | |
17 | from pathlib import Path | |
18 | ||
392389b7 | 19 | from devscripts.utils import read_file, run_process, write_file |
20 | ||
d400e261 SS |
21 | BASE_URL = 'https://github.com' |
22 | LOCATION_PATH = Path(__file__).parent | |
392389b7 | 23 | HASH_LENGTH = 7 |
d400e261 SS |
24 | |
25 | logger = logging.getLogger(__name__) | |
26 | ||
27 | ||
28 | class CommitGroup(enum.Enum): | |
d400e261 SS |
29 | PRIORITY = 'Important' |
30 | CORE = 'Core' | |
31 | EXTRACTOR = 'Extractor' | |
32 | DOWNLOADER = 'Downloader' | |
33 | POSTPROCESSOR = 'Postprocessor' | |
30ba233d | 34 | NETWORKING = 'Networking' |
d400e261 SS |
35 | MISC = 'Misc.' |
36 | ||
37 | @classmethod | |
38 | @lru_cache | |
30ba233d | 39 | def subgroup_lookup(cls): |
d400e261 SS |
40 | return { |
41 | name: group | |
42 | for group, names in { | |
d400e261 SS |
43 | cls.MISC: { |
44 | 'build', | |
700444c2 | 45 | 'ci', |
d400e261 SS |
46 | 'cleanup', |
47 | 'devscripts', | |
48 | 'docs', | |
d400e261 SS |
49 | 'test', |
50 | }, | |
30ba233d SS |
51 | cls.NETWORKING: { |
52 | 'rh', | |
53 | }, | |
d400e261 SS |
54 | }.items() |
55 | for name in names | |
56 | } | |
57 | ||
58 | @classmethod | |
30ba233d SS |
59 | @lru_cache |
60 | def group_lookup(cls): | |
61 | result = { | |
62 | 'fd': cls.DOWNLOADER, | |
63 | 'ie': cls.EXTRACTOR, | |
64 | 'pp': cls.POSTPROCESSOR, | |
65 | 'upstream': cls.CORE, | |
66 | } | |
67 | result.update({item.name.lower(): item for item in iter(cls)}) | |
d400e261 SS |
68 | return result |
69 | ||
30ba233d SS |
70 | @classmethod |
71 | def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: | |
72 | group, _, subgroup = (group.strip().lower() for group in value.partition('/')) | |
73 | ||
74 | result = cls.group_lookup().get(group) | |
75 | if not result: | |
76 | if subgroup: | |
77 | return None, value | |
78 | subgroup = group | |
79 | result = cls.subgroup_lookup().get(subgroup) | |
80 | ||
81 | return result, subgroup or None | |
82 | ||
d400e261 SS |
83 | |
84 | @dataclass | |
85 | class Commit: | |
86 | hash: str | None | |
87 | short: str | |
88 | authors: list[str] | |
89 | ||
90 | def __str__(self): | |
91 | result = f'{self.short!r}' | |
92 | ||
93 | if self.hash: | |
392389b7 | 94 | result += f' ({self.hash[:HASH_LENGTH]})' |
d400e261 SS |
95 | |
96 | if self.authors: | |
97 | authors = ', '.join(self.authors) | |
98 | result += f' by {authors}' | |
99 | ||
100 | return result | |
101 | ||
102 | ||
103 | @dataclass | |
104 | class CommitInfo: | |
105 | details: str | None | |
106 | sub_details: tuple[str, ...] | |
107 | message: str | |
108 | issues: list[str] | |
109 | commit: Commit | |
110 | fixes: list[Commit] | |
111 | ||
112 | def key(self): | |
113 | return ((self.details or '').lower(), self.sub_details, self.message) | |
114 | ||
115 | ||
23c39a4b SS |
116 | def unique(items): |
117 | return sorted({item.strip().lower(): item for item in items if item}.values()) | |
118 | ||
119 | ||
d400e261 SS |
120 | class Changelog: |
121 | MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) | |
23c39a4b | 122 | ALWAYS_SHOWN = (CommitGroup.PRIORITY,) |
d400e261 | 123 | |
23c39a4b | 124 | def __init__(self, groups, repo, collapsible=False): |
d400e261 SS |
125 | self._groups = groups |
126 | self._repo = repo | |
23c39a4b | 127 | self._collapsible = collapsible |
d400e261 SS |
128 | |
129 | def __str__(self): | |
130 | return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') | |
131 | ||
132 | def _format_groups(self, groups): | |
23c39a4b | 133 | first = True |
d400e261 | 134 | for item in CommitGroup: |
23c39a4b SS |
135 | if self._collapsible and item not in self.ALWAYS_SHOWN and first: |
136 | first = False | |
137 | yield '\n<details><summary><h3>Changelog</h3></summary>\n' | |
138 | ||
d400e261 SS |
139 | group = groups[item] |
140 | if group: | |
141 | yield self.format_module(item.value, group) | |
142 | ||
23c39a4b SS |
143 | if self._collapsible: |
144 | yield '\n</details>' | |
145 | ||
d400e261 SS |
146 | def format_module(self, name, group): |
147 | result = f'\n#### {name} changes\n' if name else '\n' | |
148 | return result + '\n'.join(self._format_group(group)) | |
149 | ||
150 | def _format_group(self, group): | |
151 | sorted_group = sorted(group, key=CommitInfo.key) | |
152 | detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) | |
93449642 SS |
153 | for _, items in detail_groups: |
154 | items = list(items) | |
155 | details = items[0].details | |
d400e261 SS |
156 | |
157 | if details == 'cleanup': | |
23c39a4b SS |
158 | items = self._prepare_cleanup_misc_items(items) |
159 | ||
160 | prefix = '-' | |
161 | if details: | |
162 | if len(items) == 1: | |
163 | prefix = f'- **{details}**:' | |
164 | else: | |
165 | yield f'- **{details}**' | |
166 | prefix = '\t-' | |
d400e261 | 167 | |
93449642 | 168 | sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) |
d400e261 SS |
169 | for sub_details, entries in sub_detail_groups: |
170 | if not sub_details: | |
171 | for entry in entries: | |
23c39a4b | 172 | yield f'{prefix} {self.format_single_change(entry)}' |
d400e261 SS |
173 | continue |
174 | ||
d400e261 | 175 | entries = list(entries) |
23c39a4b | 176 | sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}' |
d400e261 | 177 | if len(entries) == 1: |
23c39a4b | 178 | yield f'{sub_prefix}: {self.format_single_change(entries[0])}' |
d400e261 SS |
179 | continue |
180 | ||
23c39a4b | 181 | yield sub_prefix |
d400e261 | 182 | for entry in entries: |
23c39a4b | 183 | yield f'\t{prefix} {self.format_single_change(entry)}' |
d400e261 | 184 | |
23c39a4b | 185 | def _prepare_cleanup_misc_items(self, items): |
d400e261 | 186 | cleanup_misc_items = defaultdict(list) |
23c39a4b | 187 | sorted_items = [] |
d400e261 SS |
188 | for item in items: |
189 | if self.MISC_RE.search(item.message): | |
190 | cleanup_misc_items[tuple(item.commit.authors)].append(item) | |
191 | else: | |
23c39a4b | 192 | sorted_items.append(item) |
d400e261 | 193 | |
23c39a4b SS |
194 | for commit_infos in cleanup_misc_items.values(): |
195 | sorted_items.append(CommitInfo( | |
196 | 'cleanup', ('Miscellaneous',), ', '.join( | |
30ba233d | 197 | self._format_message_link(None, info.commit.hash) |
23c39a4b SS |
198 | for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')), |
199 | [], Commit(None, '', commit_infos[0].commit.authors), [])) | |
d400e261 | 200 | |
23c39a4b | 201 | return sorted_items |
d400e261 | 202 | |
30ba233d SS |
203 | def format_single_change(self, info: CommitInfo): |
204 | message, sep, rest = info.message.partition('\n') | |
205 | if '[' not in message: | |
206 | # If the message doesn't already contain markdown links, try to add a link to the commit | |
207 | message = self._format_message_link(message, info.commit.hash) | |
208 | ||
d400e261 | 209 | if info.issues: |
30ba233d | 210 | message = f'{message} ({self._format_issues(info.issues)})' |
d400e261 SS |
211 | |
212 | if info.commit.authors: | |
30ba233d | 213 | message = f'{message} by {self._format_authors(info.commit.authors)}' |
d400e261 SS |
214 | |
215 | if info.fixes: | |
216 | fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) | |
217 | ||
218 | authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) | |
219 | if authors != info.commit.authors: | |
220 | fix_message = f'{fix_message} by {self._format_authors(authors)}' | |
221 | ||
30ba233d | 222 | message = f'{message} (With fixes in {fix_message})' |
d400e261 | 223 | |
30ba233d | 224 | return message if not sep else f'{message}{sep}{rest}' |
d400e261 SS |
225 | |
226 | def _format_message_link(self, message, hash): | |
227 | assert message or hash, 'Improperly defined commit message or override' | |
392389b7 | 228 | message = message if message else hash[:HASH_LENGTH] |
30ba233d | 229 | return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message |
d400e261 SS |
230 | |
231 | def _format_issues(self, issues): | |
232 | return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) | |
233 | ||
234 | @staticmethod | |
235 | def _format_authors(authors): | |
236 | return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) | |
237 | ||
238 | @property | |
239 | def repo_url(self): | |
240 | return f'{BASE_URL}/{self._repo}' | |
241 | ||
242 | ||
243 | class CommitRange: | |
244 | COMMAND = 'git' | |
245 | COMMIT_SEPARATOR = '-----' | |
246 | ||
247 | AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) | |
248 | MESSAGE_RE = re.compile(r''' | |
23c39a4b | 249 | (?:\[(?P<prefix>[^\]]+)\]\ )? |
5ca095cb | 250 | (?:(?P<sub_details>`?[\w.-]+`?): )? |
d400e261 SS |
251 | (?P<message>.+?) |
252 | (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))? | |
253 | ''', re.VERBOSE | re.DOTALL) | |
254 | EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) | |
62b5c94c | 255 | REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') |
93449642 | 256 | FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})') |
d400e261 SS |
257 | UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') |
258 | ||
392389b7 | 259 | def __init__(self, start, end, default_author=None): |
260 | self._start, self._end = start, end | |
d400e261 SS |
261 | self._commits, self._fixes = self._get_commits_and_fixes(default_author) |
262 | self._commits_added = [] | |
263 | ||
d400e261 SS |
264 | def __iter__(self): |
265 | return iter(itertools.chain(self._commits.values(), self._commits_added)) | |
266 | ||
267 | def __len__(self): | |
268 | return len(self._commits) + len(self._commits_added) | |
269 | ||
270 | def __contains__(self, commit): | |
271 | if isinstance(commit, Commit): | |
272 | if not commit.hash: | |
273 | return False | |
274 | commit = commit.hash | |
275 | ||
276 | return commit in self._commits | |
277 | ||
d400e261 | 278 | def _get_commits_and_fixes(self, default_author): |
392389b7 | 279 | result = run_process( |
d400e261 | 280 | self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', |
392389b7 | 281 | f'{self._start}..{self._end}' if self._start else self._end).stdout |
d400e261 | 282 | |
fa448028 | 283 | commits, reverts = {}, {} |
d400e261 SS |
284 | fixes = defaultdict(list) |
285 | lines = iter(result.splitlines(False)) | |
7accdd98 | 286 | for i, commit_hash in enumerate(lines): |
d400e261 SS |
287 | short = next(lines) |
288 | skip = short.startswith('Release ') or short == '[version] update' | |
289 | ||
290 | authors = [default_author] if default_author else [] | |
291 | for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): | |
292 | match = self.AUTHOR_INDICATOR_RE.match(line) | |
293 | if match: | |
294 | authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) | |
295 | ||
296 | commit = Commit(commit_hash, short, authors) | |
7accdd98 | 297 | if skip and (self._start or not i): |
d400e261 SS |
298 | logger.debug(f'Skipped commit: {commit}') |
299 | continue | |
7accdd98 | 300 | elif skip: |
301 | logger.debug(f'Reached Release commit, breaking: {commit}') | |
302 | break | |
d400e261 | 303 | |
fa448028 | 304 | revert_match = self.REVERT_RE.fullmatch(commit.short) |
305 | if revert_match: | |
306 | reverts[revert_match.group(1)] = commit | |
307 | continue | |
308 | ||
d400e261 SS |
309 | fix_match = self.FIXES_RE.search(commit.short) |
310 | if fix_match: | |
311 | commitish = fix_match.group(1) | |
312 | fixes[commitish].append(commit) | |
313 | ||
314 | commits[commit.hash] = commit | |
315 | ||
fa448028 | 316 | for commitish, revert_commit in reverts.items(): |
317 | reverted = commits.pop(commitish, None) | |
318 | if reverted: | |
30ba233d | 319 | logger.debug(f'{commitish} fully reverted {reverted}') |
fa448028 | 320 | else: |
321 | commits[revert_commit.hash] = revert_commit | |
322 | ||
d400e261 SS |
323 | for commitish, fix_commits in fixes.items(): |
324 | if commitish in commits: | |
392389b7 | 325 | hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) |
326 | logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') | |
d400e261 SS |
327 | for fix_commit in fix_commits: |
328 | del commits[fix_commit.hash] | |
329 | else: | |
392389b7 | 330 | logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') |
d400e261 SS |
331 | |
332 | return commits, fixes | |
333 | ||
334 | def apply_overrides(self, overrides): | |
335 | for override in overrides: | |
336 | when = override.get('when') | |
337 | if when and when not in self and when != self._start: | |
30ba233d | 338 | logger.debug(f'Ignored {when!r} override') |
d400e261 SS |
339 | continue |
340 | ||
ad54c913 | 341 | override_hash = override.get('hash') or when |
d400e261 SS |
342 | if override['action'] == 'add': |
343 | commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) | |
344 | logger.info(f'ADD {commit}') | |
345 | self._commits_added.append(commit) | |
346 | ||
347 | elif override['action'] == 'remove': | |
348 | if override_hash in self._commits: | |
349 | logger.info(f'REMOVE {self._commits[override_hash]}') | |
350 | del self._commits[override_hash] | |
351 | ||
352 | elif override['action'] == 'change': | |
353 | if override_hash not in self._commits: | |
354 | continue | |
ad54c913 | 355 | commit = Commit(override_hash, override['short'], override.get('authors') or []) |
d400e261 SS |
356 | logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') |
357 | self._commits[commit.hash] = commit | |
358 | ||
359 | self._commits = {key: value for key, value in reversed(self._commits.items())} | |
360 | ||
361 | def groups(self): | |
23c39a4b | 362 | group_dict = defaultdict(list) |
d400e261 | 363 | for commit in self: |
23c39a4b | 364 | upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) |
d400e261 | 365 | if upstream_re: |
30ba233d | 366 | commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}' |
d400e261 SS |
367 | |
368 | match = self.MESSAGE_RE.fullmatch(commit.short) | |
369 | if not match: | |
370 | logger.error(f'Error parsing short commit message: {commit.short!r}') | |
371 | continue | |
372 | ||
23c39a4b SS |
373 | prefix, sub_details_alt, message, issues = match.groups() |
374 | issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] | |
d400e261 | 375 | |
23c39a4b SS |
376 | if prefix: |
377 | groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(','))) | |
378 | group = next(iter(filter(None, groups)), None) | |
379 | details = ', '.join(unique(details)) | |
380 | sub_details = list(itertools.chain.from_iterable(sub_details)) | |
d400e261 SS |
381 | else: |
382 | group = CommitGroup.CORE | |
23c39a4b SS |
383 | details = None |
384 | sub_details = [] | |
d400e261 | 385 | |
23c39a4b SS |
386 | if sub_details_alt: |
387 | sub_details.append(sub_details_alt) | |
388 | sub_details = tuple(unique(sub_details)) | |
d400e261 SS |
389 | |
390 | if not group: | |
23c39a4b SS |
391 | if self.EXTRACTOR_INDICATOR_RE.search(commit.short): |
392 | group = CommitGroup.EXTRACTOR | |
f9fb3ce8 | 393 | logger.error(f'Assuming [ie] group for {commit.short!r}') |
23c39a4b | 394 | else: |
f9fb3ce8 | 395 | group = CommitGroup.CORE |
d400e261 SS |
396 | |
397 | commit_info = CommitInfo( | |
398 | details, sub_details, message.strip(), | |
399 | issues, commit, self._fixes[commit.hash]) | |
23c39a4b | 400 | |
d400e261 | 401 | logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') |
23c39a4b SS |
402 | group_dict[group].append(commit_info) |
403 | ||
404 | return group_dict | |
405 | ||
406 | @staticmethod | |
407 | def details_from_prefix(prefix): | |
408 | if not prefix: | |
409 | return CommitGroup.CORE, None, () | |
d400e261 | 410 | |
30ba233d | 411 | prefix, *sub_details = prefix.split(':') |
23c39a4b | 412 | |
30ba233d SS |
413 | group, details = CommitGroup.get(prefix) |
414 | if group is CommitGroup.PRIORITY and details: | |
415 | details = details.partition('/')[2].strip() | |
23c39a4b | 416 | |
30ba233d SS |
417 | if details and '/' in details: |
418 | logger.error(f'Prefix is overnested, using first part: {prefix}') | |
419 | details = details.partition('/')[0].strip() | |
23c39a4b SS |
420 | |
421 | if details == 'common': | |
422 | details = None | |
30ba233d SS |
423 | elif group is CommitGroup.NETWORKING and details == 'rh': |
424 | details = 'Request Handler' | |
23c39a4b SS |
425 | |
426 | return group, details, sub_details | |
d400e261 SS |
427 | |
428 | ||
429 | def get_new_contributors(contributors_path, commits): | |
430 | contributors = set() | |
431 | if contributors_path.exists(): | |
392389b7 | 432 | for line in read_file(contributors_path).splitlines(): |
433 | author, _, _ = line.strip().partition(' (') | |
434 | authors = author.split('/') | |
435 | contributors.update(map(str.casefold, authors)) | |
d400e261 SS |
436 | |
437 | new_contributors = set() | |
438 | for commit in commits: | |
439 | for author in commit.authors: | |
440 | author_folded = author.casefold() | |
441 | if author_folded not in contributors: | |
442 | contributors.add(author_folded) | |
443 | new_contributors.add(author) | |
444 | ||
445 | return sorted(new_contributors, key=str.casefold) | |
446 | ||
447 | ||
448 | if __name__ == '__main__': | |
449 | import argparse | |
450 | ||
451 | parser = argparse.ArgumentParser( | |
452 | description='Create a changelog markdown from a git commit range') | |
453 | parser.add_argument( | |
454 | 'commitish', default='HEAD', nargs='?', | |
455 | help='The commitish to create the range from (default: %(default)s)') | |
456 | parser.add_argument( | |
457 | '-v', '--verbosity', action='count', default=0, | |
458 | help='increase verbosity (can be used twice)') | |
459 | parser.add_argument( | |
460 | '-c', '--contributors', action='store_true', | |
461 | help='update CONTRIBUTORS file (default: %(default)s)') | |
462 | parser.add_argument( | |
463 | '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', | |
464 | help='path to the CONTRIBUTORS file') | |
465 | parser.add_argument( | |
466 | '--no-override', action='store_true', | |
467 | help='skip override json in commit generation (default: %(default)s)') | |
468 | parser.add_argument( | |
469 | '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', | |
470 | help='path to the changelog_override.json file') | |
471 | parser.add_argument( | |
472 | '--default-author', default='pukkandan', | |
473 | help='the author to use without a author indicator (default: %(default)s)') | |
474 | parser.add_argument( | |
475 | '--repo', default='yt-dlp/yt-dlp', | |
476 | help='the github repository to use for the operations (default: %(default)s)') | |
23c39a4b SS |
477 | parser.add_argument( |
478 | '--collapsible', action='store_true', | |
479 | help='make changelog collapsible (default: %(default)s)') | |
d400e261 SS |
480 | args = parser.parse_args() |
481 | ||
482 | logging.basicConfig( | |
483 | datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', | |
484 | level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) | |
485 | ||
7accdd98 | 486 | commits = CommitRange(None, args.commitish, args.default_author) |
d400e261 SS |
487 | |
488 | if not args.no_override: | |
489 | if args.override_path.exists(): | |
392389b7 | 490 | overrides = json.loads(read_file(args.override_path)) |
d400e261 SS |
491 | commits.apply_overrides(overrides) |
492 | else: | |
493 | logger.warning(f'File {args.override_path.as_posix()} does not exist') | |
494 | ||
495 | logger.info(f'Loaded {len(commits)} commits') | |
496 | ||
497 | new_contributors = get_new_contributors(args.contributors_path, commits) | |
498 | if new_contributors: | |
499 | if args.contributors: | |
392389b7 | 500 | write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') |
d400e261 SS |
501 | logger.info(f'New contributors: {", ".join(new_contributors)}') |
502 | ||
23c39a4b | 503 | print(Changelog(commits.groups(), args.repo, args.collapsible)) |