]> jfr.im git - yt-dlp.git/blame - devscripts/make_changelog.py
[cleanup] Misc
[yt-dlp.git] / devscripts / make_changelog.py
CommitLineData
d400e261
SS
1from __future__ import annotations
2
392389b7 3# Allow direct execution
4import os
5import sys
6
7sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
d400e261
SS
9import enum
10import itertools
11import json
12import logging
13import re
d400e261
SS
14from collections import defaultdict
15from dataclasses import dataclass
16from functools import lru_cache
17from pathlib import Path
18
392389b7 19from devscripts.utils import read_file, run_process, write_file
20
d400e261
SS
21BASE_URL = 'https://github.com'
22LOCATION_PATH = Path(__file__).parent
392389b7 23HASH_LENGTH = 7
d400e261
SS
24
25logger = logging.getLogger(__name__)
26
27
28class CommitGroup(enum.Enum):
d400e261
SS
29 PRIORITY = 'Important'
30 CORE = 'Core'
31 EXTRACTOR = 'Extractor'
32 DOWNLOADER = 'Downloader'
33 POSTPROCESSOR = 'Postprocessor'
34 MISC = 'Misc.'
35
23c39a4b
SS
36 @classmethod
37 @property
38 def ignorable_prefixes(cls):
39 return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
40
d400e261
SS
41 @classmethod
42 @lru_cache
43 def commit_lookup(cls):
44 return {
45 name: group
46 for group, names in {
ad54c913 47 cls.PRIORITY: {'priority'},
d400e261
SS
48 cls.CORE: {
49 'aes',
50 'cache',
51 'compat_utils',
52 'compat',
53 'cookies',
54 'core',
55 'dependencies',
56 'jsinterp',
57 'outtmpl',
58 'plugins',
59 'update',
23c39a4b 60 'upstream',
d400e261
SS
61 'utils',
62 },
63 cls.MISC: {
64 'build',
65 'cleanup',
66 'devscripts',
67 'docs',
68 'misc',
69 'test',
70 },
ad54c913 71 cls.EXTRACTOR: {'extractor'},
d400e261
SS
72 cls.DOWNLOADER: {'downloader'},
73 cls.POSTPROCESSOR: {'postprocessor'},
74 }.items()
75 for name in names
76 }
77
78 @classmethod
79 def get(cls, value):
80 result = cls.commit_lookup().get(value)
81 if result:
82 logger.debug(f'Mapped {value!r} => {result.name}')
83 return result
84
85
86@dataclass
87class Commit:
88 hash: str | None
89 short: str
90 authors: list[str]
91
92 def __str__(self):
93 result = f'{self.short!r}'
94
95 if self.hash:
392389b7 96 result += f' ({self.hash[:HASH_LENGTH]})'
d400e261
SS
97
98 if self.authors:
99 authors = ', '.join(self.authors)
100 result += f' by {authors}'
101
102 return result
103
104
105@dataclass
106class CommitInfo:
107 details: str | None
108 sub_details: tuple[str, ...]
109 message: str
110 issues: list[str]
111 commit: Commit
112 fixes: list[Commit]
113
114 def key(self):
115 return ((self.details or '').lower(), self.sub_details, self.message)
116
117
23c39a4b
SS
118def unique(items):
119 return sorted({item.strip().lower(): item for item in items if item}.values())
120
121
d400e261
SS
122class Changelog:
123 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
23c39a4b 124 ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
d400e261 125
23c39a4b 126 def __init__(self, groups, repo, collapsible=False):
d400e261
SS
127 self._groups = groups
128 self._repo = repo
23c39a4b 129 self._collapsible = collapsible
d400e261
SS
130
131 def __str__(self):
132 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
133
134 def _format_groups(self, groups):
23c39a4b 135 first = True
d400e261 136 for item in CommitGroup:
23c39a4b
SS
137 if self._collapsible and item not in self.ALWAYS_SHOWN and first:
138 first = False
139 yield '\n<details><summary><h3>Changelog</h3></summary>\n'
140
d400e261
SS
141 group = groups[item]
142 if group:
143 yield self.format_module(item.value, group)
144
23c39a4b
SS
145 if self._collapsible:
146 yield '\n</details>'
147
d400e261
SS
148 def format_module(self, name, group):
149 result = f'\n#### {name} changes\n' if name else '\n'
150 return result + '\n'.join(self._format_group(group))
151
152 def _format_group(self, group):
153 sorted_group = sorted(group, key=CommitInfo.key)
154 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
93449642
SS
155 for _, items in detail_groups:
156 items = list(items)
157 details = items[0].details
d400e261
SS
158
159 if details == 'cleanup':
23c39a4b
SS
160 items = self._prepare_cleanup_misc_items(items)
161
162 prefix = '-'
163 if details:
164 if len(items) == 1:
165 prefix = f'- **{details}**:'
166 else:
167 yield f'- **{details}**'
168 prefix = '\t-'
d400e261 169
93449642 170 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
d400e261
SS
171 for sub_details, entries in sub_detail_groups:
172 if not sub_details:
173 for entry in entries:
23c39a4b 174 yield f'{prefix} {self.format_single_change(entry)}'
d400e261
SS
175 continue
176
d400e261 177 entries = list(entries)
23c39a4b 178 sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
d400e261 179 if len(entries) == 1:
23c39a4b 180 yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
d400e261
SS
181 continue
182
23c39a4b 183 yield sub_prefix
d400e261 184 for entry in entries:
23c39a4b 185 yield f'\t{prefix} {self.format_single_change(entry)}'
d400e261 186
23c39a4b 187 def _prepare_cleanup_misc_items(self, items):
d400e261 188 cleanup_misc_items = defaultdict(list)
23c39a4b 189 sorted_items = []
d400e261
SS
190 for item in items:
191 if self.MISC_RE.search(item.message):
192 cleanup_misc_items[tuple(item.commit.authors)].append(item)
193 else:
23c39a4b 194 sorted_items.append(item)
d400e261 195
23c39a4b
SS
196 for commit_infos in cleanup_misc_items.values():
197 sorted_items.append(CommitInfo(
198 'cleanup', ('Miscellaneous',), ', '.join(
812cdfa0 199 self._format_message_link(None, info.commit.hash).strip()
23c39a4b
SS
200 for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
201 [], Commit(None, '', commit_infos[0].commit.authors), []))
d400e261 202
23c39a4b 203 return sorted_items
d400e261
SS
204
205 def format_single_change(self, info):
206 message = self._format_message_link(info.message, info.commit.hash)
207 if info.issues:
812cdfa0 208 message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
d400e261
SS
209
210 if info.commit.authors:
812cdfa0 211 message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
d400e261
SS
212
213 if info.fixes:
214 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
215
216 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
217 if authors != info.commit.authors:
218 fix_message = f'{fix_message} by {self._format_authors(authors)}'
219
812cdfa0 220 message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
d400e261 221
812cdfa0 222 return message[:-1]
d400e261
SS
223
224 def _format_message_link(self, message, hash):
225 assert message or hash, 'Improperly defined commit message or override'
392389b7 226 message = message if message else hash[:HASH_LENGTH]
812cdfa0 227 if not hash:
228 return f'{message}\n'
229 return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
d400e261
SS
230
231 def _format_issues(self, issues):
232 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
233
234 @staticmethod
235 def _format_authors(authors):
236 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
237
238 @property
239 def repo_url(self):
240 return f'{BASE_URL}/{self._repo}'
241
242
243class CommitRange:
244 COMMAND = 'git'
245 COMMIT_SEPARATOR = '-----'
246
247 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
248 MESSAGE_RE = re.compile(r'''
23c39a4b
SS
249 (?:\[(?P<prefix>[^\]]+)\]\ )?
250 (?:(?P<sub_details>`?[^:`]+`?): )?
d400e261
SS
251 (?P<message>.+?)
252 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
253 ''', re.VERBOSE | re.DOTALL)
254 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
93449642 255 FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
d400e261
SS
256 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
257
392389b7 258 def __init__(self, start, end, default_author=None):
259 self._start, self._end = start, end
d400e261
SS
260 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
261 self._commits_added = []
262
d400e261
SS
263 def __iter__(self):
264 return iter(itertools.chain(self._commits.values(), self._commits_added))
265
266 def __len__(self):
267 return len(self._commits) + len(self._commits_added)
268
269 def __contains__(self, commit):
270 if isinstance(commit, Commit):
271 if not commit.hash:
272 return False
273 commit = commit.hash
274
275 return commit in self._commits
276
d400e261 277 def _get_commits_and_fixes(self, default_author):
392389b7 278 result = run_process(
d400e261 279 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
392389b7 280 f'{self._start}..{self._end}' if self._start else self._end).stdout
d400e261
SS
281
282 commits = {}
283 fixes = defaultdict(list)
284 lines = iter(result.splitlines(False))
7accdd98 285 for i, commit_hash in enumerate(lines):
d400e261
SS
286 short = next(lines)
287 skip = short.startswith('Release ') or short == '[version] update'
288
289 authors = [default_author] if default_author else []
290 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
291 match = self.AUTHOR_INDICATOR_RE.match(line)
292 if match:
293 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
294
295 commit = Commit(commit_hash, short, authors)
7accdd98 296 if skip and (self._start or not i):
d400e261
SS
297 logger.debug(f'Skipped commit: {commit}')
298 continue
7accdd98 299 elif skip:
300 logger.debug(f'Reached Release commit, breaking: {commit}')
301 break
d400e261
SS
302
303 fix_match = self.FIXES_RE.search(commit.short)
304 if fix_match:
305 commitish = fix_match.group(1)
306 fixes[commitish].append(commit)
307
308 commits[commit.hash] = commit
309
310 for commitish, fix_commits in fixes.items():
311 if commitish in commits:
392389b7 312 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
313 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
d400e261
SS
314 for fix_commit in fix_commits:
315 del commits[fix_commit.hash]
316 else:
392389b7 317 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
d400e261
SS
318
319 return commits, fixes
320
321 def apply_overrides(self, overrides):
322 for override in overrides:
323 when = override.get('when')
324 if when and when not in self and when != self._start:
325 logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
326 continue
327
ad54c913 328 override_hash = override.get('hash') or when
d400e261
SS
329 if override['action'] == 'add':
330 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
331 logger.info(f'ADD {commit}')
332 self._commits_added.append(commit)
333
334 elif override['action'] == 'remove':
335 if override_hash in self._commits:
336 logger.info(f'REMOVE {self._commits[override_hash]}')
337 del self._commits[override_hash]
338
339 elif override['action'] == 'change':
340 if override_hash not in self._commits:
341 continue
ad54c913 342 commit = Commit(override_hash, override['short'], override.get('authors') or [])
d400e261
SS
343 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
344 self._commits[commit.hash] = commit
345
346 self._commits = {key: value for key, value in reversed(self._commits.items())}
347
348 def groups(self):
23c39a4b 349 group_dict = defaultdict(list)
d400e261 350 for commit in self:
23c39a4b 351 upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
d400e261 352 if upstream_re:
ad54c913 353 commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
d400e261
SS
354
355 match = self.MESSAGE_RE.fullmatch(commit.short)
356 if not match:
357 logger.error(f'Error parsing short commit message: {commit.short!r}')
358 continue
359
23c39a4b
SS
360 prefix, sub_details_alt, message, issues = match.groups()
361 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
d400e261 362
23c39a4b
SS
363 if prefix:
364 groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
365 group = next(iter(filter(None, groups)), None)
366 details = ', '.join(unique(details))
367 sub_details = list(itertools.chain.from_iterable(sub_details))
d400e261
SS
368 else:
369 group = CommitGroup.CORE
23c39a4b
SS
370 details = None
371 sub_details = []
d400e261 372
23c39a4b
SS
373 if sub_details_alt:
374 sub_details.append(sub_details_alt)
375 sub_details = tuple(unique(sub_details))
d400e261
SS
376
377 if not group:
23c39a4b
SS
378 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
379 group = CommitGroup.EXTRACTOR
380 else:
381 group = CommitGroup.POSTPROCESSOR
382 logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}')
d400e261
SS
383
384 commit_info = CommitInfo(
385 details, sub_details, message.strip(),
386 issues, commit, self._fixes[commit.hash])
23c39a4b 387
d400e261 388 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
23c39a4b
SS
389 group_dict[group].append(commit_info)
390
391 return group_dict
392
393 @staticmethod
394 def details_from_prefix(prefix):
395 if not prefix:
396 return CommitGroup.CORE, None, ()
d400e261 397
23c39a4b 398 prefix, _, details = prefix.partition('/')
ad54c913 399 prefix = prefix.strip()
23c39a4b
SS
400 details = details.strip()
401
ad54c913 402 group = CommitGroup.get(prefix.lower())
23c39a4b
SS
403 if group is CommitGroup.PRIORITY:
404 prefix, _, details = details.partition('/')
405
406 if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
407 logger.debug(f'Replaced details with {prefix!r}')
408 details = prefix or None
409
410 if details == 'common':
411 details = None
412
413 if details:
414 details, *sub_details = details.split(':')
415 else:
416 sub_details = []
417
418 return group, details, sub_details
d400e261
SS
419
420
421def get_new_contributors(contributors_path, commits):
422 contributors = set()
423 if contributors_path.exists():
392389b7 424 for line in read_file(contributors_path).splitlines():
425 author, _, _ = line.strip().partition(' (')
426 authors = author.split('/')
427 contributors.update(map(str.casefold, authors))
d400e261
SS
428
429 new_contributors = set()
430 for commit in commits:
431 for author in commit.authors:
432 author_folded = author.casefold()
433 if author_folded not in contributors:
434 contributors.add(author_folded)
435 new_contributors.add(author)
436
437 return sorted(new_contributors, key=str.casefold)
438
439
440if __name__ == '__main__':
441 import argparse
442
443 parser = argparse.ArgumentParser(
444 description='Create a changelog markdown from a git commit range')
445 parser.add_argument(
446 'commitish', default='HEAD', nargs='?',
447 help='The commitish to create the range from (default: %(default)s)')
448 parser.add_argument(
449 '-v', '--verbosity', action='count', default=0,
450 help='increase verbosity (can be used twice)')
451 parser.add_argument(
452 '-c', '--contributors', action='store_true',
453 help='update CONTRIBUTORS file (default: %(default)s)')
454 parser.add_argument(
455 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
456 help='path to the CONTRIBUTORS file')
457 parser.add_argument(
458 '--no-override', action='store_true',
459 help='skip override json in commit generation (default: %(default)s)')
460 parser.add_argument(
461 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
462 help='path to the changelog_override.json file')
463 parser.add_argument(
464 '--default-author', default='pukkandan',
465 help='the author to use without a author indicator (default: %(default)s)')
466 parser.add_argument(
467 '--repo', default='yt-dlp/yt-dlp',
468 help='the github repository to use for the operations (default: %(default)s)')
23c39a4b
SS
469 parser.add_argument(
470 '--collapsible', action='store_true',
471 help='make changelog collapsible (default: %(default)s)')
d400e261
SS
472 args = parser.parse_args()
473
474 logging.basicConfig(
475 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
476 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
477
7accdd98 478 commits = CommitRange(None, args.commitish, args.default_author)
d400e261
SS
479
480 if not args.no_override:
481 if args.override_path.exists():
392389b7 482 overrides = json.loads(read_file(args.override_path))
d400e261
SS
483 commits.apply_overrides(overrides)
484 else:
485 logger.warning(f'File {args.override_path.as_posix()} does not exist')
486
487 logger.info(f'Loaded {len(commits)} commits')
488
489 new_contributors = get_new_contributors(args.contributors_path, commits)
490 if new_contributors:
491 if args.contributors:
392389b7 492 write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
d400e261
SS
493 logger.info(f'New contributors: {", ".join(new_contributors)}')
494
23c39a4b 495 print(Changelog(commits.groups(), args.repo, args.collapsible))