]> jfr.im git - yt-dlp.git/blame - devscripts/make_changelog.py
[ie/ARD] Overhaul extractors (#8878)
[yt-dlp.git] / devscripts / make_changelog.py
CommitLineData
d400e261
SS
1from __future__ import annotations
2
392389b7 3# Allow direct execution
4import os
5import sys
6
7sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
d400e261
SS
9import enum
10import itertools
11import json
12import logging
13import re
d400e261
SS
14from collections import defaultdict
15from dataclasses import dataclass
16from functools import lru_cache
17from pathlib import Path
18
392389b7 19from devscripts.utils import read_file, run_process, write_file
20
d400e261
SS
21BASE_URL = 'https://github.com'
22LOCATION_PATH = Path(__file__).parent
392389b7 23HASH_LENGTH = 7
d400e261
SS
24
25logger = logging.getLogger(__name__)
26
27
28class CommitGroup(enum.Enum):
d400e261
SS
29 PRIORITY = 'Important'
30 CORE = 'Core'
31 EXTRACTOR = 'Extractor'
32 DOWNLOADER = 'Downloader'
33 POSTPROCESSOR = 'Postprocessor'
30ba233d 34 NETWORKING = 'Networking'
d400e261
SS
35 MISC = 'Misc.'
36
37 @classmethod
38 @lru_cache
30ba233d 39 def subgroup_lookup(cls):
d400e261
SS
40 return {
41 name: group
42 for group, names in {
d400e261
SS
43 cls.CORE: {
44 'aes',
45 'cache',
46 'compat_utils',
47 'compat',
48 'cookies',
d400e261 49 'dependencies',
62b5c94c 50 'formats',
d400e261
SS
51 'jsinterp',
52 'outtmpl',
53 'plugins',
54 'update',
55 'utils',
56 },
57 cls.MISC: {
58 'build',
700444c2 59 'ci',
d400e261
SS
60 'cleanup',
61 'devscripts',
62 'docs',
d400e261
SS
63 'test',
64 },
30ba233d
SS
65 cls.NETWORKING: {
66 'rh',
67 },
d400e261
SS
68 }.items()
69 for name in names
70 }
71
72 @classmethod
30ba233d
SS
73 @lru_cache
74 def group_lookup(cls):
75 result = {
76 'fd': cls.DOWNLOADER,
77 'ie': cls.EXTRACTOR,
78 'pp': cls.POSTPROCESSOR,
79 'upstream': cls.CORE,
80 }
81 result.update({item.name.lower(): item for item in iter(cls)})
d400e261
SS
82 return result
83
30ba233d
SS
84 @classmethod
85 def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
86 group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
87
88 result = cls.group_lookup().get(group)
89 if not result:
90 if subgroup:
91 return None, value
92 subgroup = group
93 result = cls.subgroup_lookup().get(subgroup)
94
95 return result, subgroup or None
96
d400e261
SS
97
98@dataclass
99class Commit:
100 hash: str | None
101 short: str
102 authors: list[str]
103
104 def __str__(self):
105 result = f'{self.short!r}'
106
107 if self.hash:
392389b7 108 result += f' ({self.hash[:HASH_LENGTH]})'
d400e261
SS
109
110 if self.authors:
111 authors = ', '.join(self.authors)
112 result += f' by {authors}'
113
114 return result
115
116
117@dataclass
118class CommitInfo:
119 details: str | None
120 sub_details: tuple[str, ...]
121 message: str
122 issues: list[str]
123 commit: Commit
124 fixes: list[Commit]
125
126 def key(self):
127 return ((self.details or '').lower(), self.sub_details, self.message)
128
129
23c39a4b
SS
130def unique(items):
131 return sorted({item.strip().lower(): item for item in items if item}.values())
132
133
d400e261
SS
134class Changelog:
135 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
23c39a4b 136 ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
d400e261 137
23c39a4b 138 def __init__(self, groups, repo, collapsible=False):
d400e261
SS
139 self._groups = groups
140 self._repo = repo
23c39a4b 141 self._collapsible = collapsible
d400e261
SS
142
143 def __str__(self):
144 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
145
146 def _format_groups(self, groups):
23c39a4b 147 first = True
d400e261 148 for item in CommitGroup:
23c39a4b
SS
149 if self._collapsible and item not in self.ALWAYS_SHOWN and first:
150 first = False
151 yield '\n<details><summary><h3>Changelog</h3></summary>\n'
152
d400e261
SS
153 group = groups[item]
154 if group:
155 yield self.format_module(item.value, group)
156
23c39a4b
SS
157 if self._collapsible:
158 yield '\n</details>'
159
d400e261
SS
160 def format_module(self, name, group):
161 result = f'\n#### {name} changes\n' if name else '\n'
162 return result + '\n'.join(self._format_group(group))
163
164 def _format_group(self, group):
165 sorted_group = sorted(group, key=CommitInfo.key)
166 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
93449642
SS
167 for _, items in detail_groups:
168 items = list(items)
169 details = items[0].details
d400e261
SS
170
171 if details == 'cleanup':
23c39a4b
SS
172 items = self._prepare_cleanup_misc_items(items)
173
174 prefix = '-'
175 if details:
176 if len(items) == 1:
177 prefix = f'- **{details}**:'
178 else:
179 yield f'- **{details}**'
180 prefix = '\t-'
d400e261 181
93449642 182 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
d400e261
SS
183 for sub_details, entries in sub_detail_groups:
184 if not sub_details:
185 for entry in entries:
23c39a4b 186 yield f'{prefix} {self.format_single_change(entry)}'
d400e261
SS
187 continue
188
d400e261 189 entries = list(entries)
23c39a4b 190 sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
d400e261 191 if len(entries) == 1:
23c39a4b 192 yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
d400e261
SS
193 continue
194
23c39a4b 195 yield sub_prefix
d400e261 196 for entry in entries:
23c39a4b 197 yield f'\t{prefix} {self.format_single_change(entry)}'
d400e261 198
23c39a4b 199 def _prepare_cleanup_misc_items(self, items):
d400e261 200 cleanup_misc_items = defaultdict(list)
23c39a4b 201 sorted_items = []
d400e261
SS
202 for item in items:
203 if self.MISC_RE.search(item.message):
204 cleanup_misc_items[tuple(item.commit.authors)].append(item)
205 else:
23c39a4b 206 sorted_items.append(item)
d400e261 207
23c39a4b
SS
208 for commit_infos in cleanup_misc_items.values():
209 sorted_items.append(CommitInfo(
210 'cleanup', ('Miscellaneous',), ', '.join(
30ba233d 211 self._format_message_link(None, info.commit.hash)
23c39a4b
SS
212 for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
213 [], Commit(None, '', commit_infos[0].commit.authors), []))
d400e261 214
23c39a4b 215 return sorted_items
d400e261 216
30ba233d
SS
217 def format_single_change(self, info: CommitInfo):
218 message, sep, rest = info.message.partition('\n')
219 if '[' not in message:
220 # If the message doesn't already contain markdown links, try to add a link to the commit
221 message = self._format_message_link(message, info.commit.hash)
222
d400e261 223 if info.issues:
30ba233d 224 message = f'{message} ({self._format_issues(info.issues)})'
d400e261
SS
225
226 if info.commit.authors:
30ba233d 227 message = f'{message} by {self._format_authors(info.commit.authors)}'
d400e261
SS
228
229 if info.fixes:
230 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
231
232 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
233 if authors != info.commit.authors:
234 fix_message = f'{fix_message} by {self._format_authors(authors)}'
235
30ba233d 236 message = f'{message} (With fixes in {fix_message})'
d400e261 237
30ba233d 238 return message if not sep else f'{message}{sep}{rest}'
d400e261
SS
239
240 def _format_message_link(self, message, hash):
241 assert message or hash, 'Improperly defined commit message or override'
392389b7 242 message = message if message else hash[:HASH_LENGTH]
30ba233d 243 return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
d400e261
SS
244
245 def _format_issues(self, issues):
246 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
247
248 @staticmethod
249 def _format_authors(authors):
250 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
251
252 @property
253 def repo_url(self):
254 return f'{BASE_URL}/{self._repo}'
255
256
257class CommitRange:
258 COMMAND = 'git'
259 COMMIT_SEPARATOR = '-----'
260
261 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
262 MESSAGE_RE = re.compile(r'''
23c39a4b 263 (?:\[(?P<prefix>[^\]]+)\]\ )?
5ca095cb 264 (?:(?P<sub_details>`?[\w.-]+`?): )?
d400e261
SS
265 (?P<message>.+?)
266 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
267 ''', re.VERBOSE | re.DOTALL)
268 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
62b5c94c 269 REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
93449642 270 FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
d400e261
SS
271 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
272
392389b7 273 def __init__(self, start, end, default_author=None):
274 self._start, self._end = start, end
d400e261
SS
275 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
276 self._commits_added = []
277
d400e261
SS
278 def __iter__(self):
279 return iter(itertools.chain(self._commits.values(), self._commits_added))
280
281 def __len__(self):
282 return len(self._commits) + len(self._commits_added)
283
284 def __contains__(self, commit):
285 if isinstance(commit, Commit):
286 if not commit.hash:
287 return False
288 commit = commit.hash
289
290 return commit in self._commits
291
d400e261 292 def _get_commits_and_fixes(self, default_author):
392389b7 293 result = run_process(
d400e261 294 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
392389b7 295 f'{self._start}..{self._end}' if self._start else self._end).stdout
d400e261 296
fa448028 297 commits, reverts = {}, {}
d400e261
SS
298 fixes = defaultdict(list)
299 lines = iter(result.splitlines(False))
7accdd98 300 for i, commit_hash in enumerate(lines):
d400e261
SS
301 short = next(lines)
302 skip = short.startswith('Release ') or short == '[version] update'
303
304 authors = [default_author] if default_author else []
305 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
306 match = self.AUTHOR_INDICATOR_RE.match(line)
307 if match:
308 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
309
310 commit = Commit(commit_hash, short, authors)
7accdd98 311 if skip and (self._start or not i):
d400e261
SS
312 logger.debug(f'Skipped commit: {commit}')
313 continue
7accdd98 314 elif skip:
315 logger.debug(f'Reached Release commit, breaking: {commit}')
316 break
d400e261 317
fa448028 318 revert_match = self.REVERT_RE.fullmatch(commit.short)
319 if revert_match:
320 reverts[revert_match.group(1)] = commit
321 continue
322
d400e261
SS
323 fix_match = self.FIXES_RE.search(commit.short)
324 if fix_match:
325 commitish = fix_match.group(1)
326 fixes[commitish].append(commit)
327
328 commits[commit.hash] = commit
329
fa448028 330 for commitish, revert_commit in reverts.items():
331 reverted = commits.pop(commitish, None)
332 if reverted:
30ba233d 333 logger.debug(f'{commitish} fully reverted {reverted}')
fa448028 334 else:
335 commits[revert_commit.hash] = revert_commit
336
d400e261
SS
337 for commitish, fix_commits in fixes.items():
338 if commitish in commits:
392389b7 339 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
340 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
d400e261
SS
341 for fix_commit in fix_commits:
342 del commits[fix_commit.hash]
343 else:
392389b7 344 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
d400e261
SS
345
346 return commits, fixes
347
348 def apply_overrides(self, overrides):
349 for override in overrides:
350 when = override.get('when')
351 if when and when not in self and when != self._start:
30ba233d 352 logger.debug(f'Ignored {when!r} override')
d400e261
SS
353 continue
354
ad54c913 355 override_hash = override.get('hash') or when
d400e261
SS
356 if override['action'] == 'add':
357 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
358 logger.info(f'ADD {commit}')
359 self._commits_added.append(commit)
360
361 elif override['action'] == 'remove':
362 if override_hash in self._commits:
363 logger.info(f'REMOVE {self._commits[override_hash]}')
364 del self._commits[override_hash]
365
366 elif override['action'] == 'change':
367 if override_hash not in self._commits:
368 continue
ad54c913 369 commit = Commit(override_hash, override['short'], override.get('authors') or [])
d400e261
SS
370 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
371 self._commits[commit.hash] = commit
372
373 self._commits = {key: value for key, value in reversed(self._commits.items())}
374
375 def groups(self):
23c39a4b 376 group_dict = defaultdict(list)
d400e261 377 for commit in self:
23c39a4b 378 upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
d400e261 379 if upstream_re:
30ba233d 380 commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
d400e261
SS
381
382 match = self.MESSAGE_RE.fullmatch(commit.short)
383 if not match:
384 logger.error(f'Error parsing short commit message: {commit.short!r}')
385 continue
386
23c39a4b
SS
387 prefix, sub_details_alt, message, issues = match.groups()
388 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
d400e261 389
23c39a4b
SS
390 if prefix:
391 groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
392 group = next(iter(filter(None, groups)), None)
393 details = ', '.join(unique(details))
394 sub_details = list(itertools.chain.from_iterable(sub_details))
d400e261
SS
395 else:
396 group = CommitGroup.CORE
23c39a4b
SS
397 details = None
398 sub_details = []
d400e261 399
23c39a4b
SS
400 if sub_details_alt:
401 sub_details.append(sub_details_alt)
402 sub_details = tuple(unique(sub_details))
d400e261
SS
403
404 if not group:
23c39a4b
SS
405 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
406 group = CommitGroup.EXTRACTOR
407 else:
408 group = CommitGroup.POSTPROCESSOR
409 logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}')
d400e261
SS
410
411 commit_info = CommitInfo(
412 details, sub_details, message.strip(),
413 issues, commit, self._fixes[commit.hash])
23c39a4b 414
d400e261 415 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
23c39a4b
SS
416 group_dict[group].append(commit_info)
417
418 return group_dict
419
420 @staticmethod
421 def details_from_prefix(prefix):
422 if not prefix:
423 return CommitGroup.CORE, None, ()
d400e261 424
30ba233d 425 prefix, *sub_details = prefix.split(':')
23c39a4b 426
30ba233d
SS
427 group, details = CommitGroup.get(prefix)
428 if group is CommitGroup.PRIORITY and details:
429 details = details.partition('/')[2].strip()
23c39a4b 430
30ba233d
SS
431 if details and '/' in details:
432 logger.error(f'Prefix is overnested, using first part: {prefix}')
433 details = details.partition('/')[0].strip()
23c39a4b
SS
434
435 if details == 'common':
436 details = None
30ba233d
SS
437 elif group is CommitGroup.NETWORKING and details == 'rh':
438 details = 'Request Handler'
23c39a4b
SS
439
440 return group, details, sub_details
d400e261
SS
441
442
443def get_new_contributors(contributors_path, commits):
444 contributors = set()
445 if contributors_path.exists():
392389b7 446 for line in read_file(contributors_path).splitlines():
447 author, _, _ = line.strip().partition(' (')
448 authors = author.split('/')
449 contributors.update(map(str.casefold, authors))
d400e261
SS
450
451 new_contributors = set()
452 for commit in commits:
453 for author in commit.authors:
454 author_folded = author.casefold()
455 if author_folded not in contributors:
456 contributors.add(author_folded)
457 new_contributors.add(author)
458
459 return sorted(new_contributors, key=str.casefold)
460
461
462if __name__ == '__main__':
463 import argparse
464
465 parser = argparse.ArgumentParser(
466 description='Create a changelog markdown from a git commit range')
467 parser.add_argument(
468 'commitish', default='HEAD', nargs='?',
469 help='The commitish to create the range from (default: %(default)s)')
470 parser.add_argument(
471 '-v', '--verbosity', action='count', default=0,
472 help='increase verbosity (can be used twice)')
473 parser.add_argument(
474 '-c', '--contributors', action='store_true',
475 help='update CONTRIBUTORS file (default: %(default)s)')
476 parser.add_argument(
477 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
478 help='path to the CONTRIBUTORS file')
479 parser.add_argument(
480 '--no-override', action='store_true',
481 help='skip override json in commit generation (default: %(default)s)')
482 parser.add_argument(
483 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
484 help='path to the changelog_override.json file')
485 parser.add_argument(
486 '--default-author', default='pukkandan',
487 help='the author to use without a author indicator (default: %(default)s)')
488 parser.add_argument(
489 '--repo', default='yt-dlp/yt-dlp',
490 help='the github repository to use for the operations (default: %(default)s)')
23c39a4b
SS
491 parser.add_argument(
492 '--collapsible', action='store_true',
493 help='make changelog collapsible (default: %(default)s)')
d400e261
SS
494 args = parser.parse_args()
495
496 logging.basicConfig(
497 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
498 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
499
7accdd98 500 commits = CommitRange(None, args.commitish, args.default_author)
d400e261
SS
501
502 if not args.no_override:
503 if args.override_path.exists():
392389b7 504 overrides = json.loads(read_file(args.override_path))
d400e261
SS
505 commits.apply_overrides(overrides)
506 else:
507 logger.warning(f'File {args.override_path.as_posix()} does not exist')
508
509 logger.info(f'Loaded {len(commits)} commits')
510
511 new_contributors = get_new_contributors(args.contributors_path, commits)
512 if new_contributors:
513 if args.contributors:
392389b7 514 write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
d400e261
SS
515 logger.info(f'New contributors: {", ".join(new_contributors)}')
516
23c39a4b 517 print(Changelog(commits.groups(), args.repo, args.collapsible))