]> jfr.im git - yt-dlp.git/blob - devscripts/make_changelog.py
Release 2023.10.13
[yt-dlp.git] / devscripts / make_changelog.py
1 from __future__ import annotations
2
3 # Allow direct execution
4 import os
5 import sys
6
7 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
9 import enum
10 import itertools
11 import json
12 import logging
13 import re
14 from collections import defaultdict
15 from dataclasses import dataclass
16 from functools import lru_cache
17 from pathlib import Path
18
19 from devscripts.utils import read_file, run_process, write_file
20
21 BASE_URL = 'https://github.com'
22 LOCATION_PATH = Path(__file__).parent
23 HASH_LENGTH = 7
24
25 logger = logging.getLogger(__name__)
26
27
28 class CommitGroup(enum.Enum):
29 PRIORITY = 'Important'
30 CORE = 'Core'
31 EXTRACTOR = 'Extractor'
32 DOWNLOADER = 'Downloader'
33 POSTPROCESSOR = 'Postprocessor'
34 NETWORKING = 'Networking'
35 MISC = 'Misc.'
36
37 @classmethod
38 @lru_cache
39 def subgroup_lookup(cls):
40 return {
41 name: group
42 for group, names in {
43 cls.CORE: {
44 'aes',
45 'cache',
46 'compat_utils',
47 'compat',
48 'cookies',
49 'dependencies',
50 'formats',
51 'jsinterp',
52 'outtmpl',
53 'plugins',
54 'update',
55 'utils',
56 },
57 cls.MISC: {
58 'build',
59 'cleanup',
60 'devscripts',
61 'docs',
62 'test',
63 },
64 cls.NETWORKING: {
65 'rh',
66 },
67 }.items()
68 for name in names
69 }
70
71 @classmethod
72 @lru_cache
73 def group_lookup(cls):
74 result = {
75 'fd': cls.DOWNLOADER,
76 'ie': cls.EXTRACTOR,
77 'pp': cls.POSTPROCESSOR,
78 'upstream': cls.CORE,
79 }
80 result.update({item.name.lower(): item for item in iter(cls)})
81 return result
82
83 @classmethod
84 def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
85 group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
86
87 result = cls.group_lookup().get(group)
88 if not result:
89 if subgroup:
90 return None, value
91 subgroup = group
92 result = cls.subgroup_lookup().get(subgroup)
93
94 return result, subgroup or None
95
96
97 @dataclass
98 class Commit:
99 hash: str | None
100 short: str
101 authors: list[str]
102
103 def __str__(self):
104 result = f'{self.short!r}'
105
106 if self.hash:
107 result += f' ({self.hash[:HASH_LENGTH]})'
108
109 if self.authors:
110 authors = ', '.join(self.authors)
111 result += f' by {authors}'
112
113 return result
114
115
116 @dataclass
117 class CommitInfo:
118 details: str | None
119 sub_details: tuple[str, ...]
120 message: str
121 issues: list[str]
122 commit: Commit
123 fixes: list[Commit]
124
125 def key(self):
126 return ((self.details or '').lower(), self.sub_details, self.message)
127
128
129 def unique(items):
130 return sorted({item.strip().lower(): item for item in items if item}.values())
131
132
133 class Changelog:
134 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
135 ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
136
137 def __init__(self, groups, repo, collapsible=False):
138 self._groups = groups
139 self._repo = repo
140 self._collapsible = collapsible
141
142 def __str__(self):
143 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
144
145 def _format_groups(self, groups):
146 first = True
147 for item in CommitGroup:
148 if self._collapsible and item not in self.ALWAYS_SHOWN and first:
149 first = False
150 yield '\n<details><summary><h3>Changelog</h3></summary>\n'
151
152 group = groups[item]
153 if group:
154 yield self.format_module(item.value, group)
155
156 if self._collapsible:
157 yield '\n</details>'
158
159 def format_module(self, name, group):
160 result = f'\n#### {name} changes\n' if name else '\n'
161 return result + '\n'.join(self._format_group(group))
162
163 def _format_group(self, group):
164 sorted_group = sorted(group, key=CommitInfo.key)
165 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
166 for _, items in detail_groups:
167 items = list(items)
168 details = items[0].details
169
170 if details == 'cleanup':
171 items = self._prepare_cleanup_misc_items(items)
172
173 prefix = '-'
174 if details:
175 if len(items) == 1:
176 prefix = f'- **{details}**:'
177 else:
178 yield f'- **{details}**'
179 prefix = '\t-'
180
181 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
182 for sub_details, entries in sub_detail_groups:
183 if not sub_details:
184 for entry in entries:
185 yield f'{prefix} {self.format_single_change(entry)}'
186 continue
187
188 entries = list(entries)
189 sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
190 if len(entries) == 1:
191 yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
192 continue
193
194 yield sub_prefix
195 for entry in entries:
196 yield f'\t{prefix} {self.format_single_change(entry)}'
197
198 def _prepare_cleanup_misc_items(self, items):
199 cleanup_misc_items = defaultdict(list)
200 sorted_items = []
201 for item in items:
202 if self.MISC_RE.search(item.message):
203 cleanup_misc_items[tuple(item.commit.authors)].append(item)
204 else:
205 sorted_items.append(item)
206
207 for commit_infos in cleanup_misc_items.values():
208 sorted_items.append(CommitInfo(
209 'cleanup', ('Miscellaneous',), ', '.join(
210 self._format_message_link(None, info.commit.hash)
211 for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
212 [], Commit(None, '', commit_infos[0].commit.authors), []))
213
214 return sorted_items
215
216 def format_single_change(self, info: CommitInfo):
217 message, sep, rest = info.message.partition('\n')
218 if '[' not in message:
219 # If the message doesn't already contain markdown links, try to add a link to the commit
220 message = self._format_message_link(message, info.commit.hash)
221
222 if info.issues:
223 message = f'{message} ({self._format_issues(info.issues)})'
224
225 if info.commit.authors:
226 message = f'{message} by {self._format_authors(info.commit.authors)}'
227
228 if info.fixes:
229 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
230
231 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
232 if authors != info.commit.authors:
233 fix_message = f'{fix_message} by {self._format_authors(authors)}'
234
235 message = f'{message} (With fixes in {fix_message})'
236
237 return message if not sep else f'{message}{sep}{rest}'
238
239 def _format_message_link(self, message, hash):
240 assert message or hash, 'Improperly defined commit message or override'
241 message = message if message else hash[:HASH_LENGTH]
242 return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
243
244 def _format_issues(self, issues):
245 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
246
247 @staticmethod
248 def _format_authors(authors):
249 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
250
251 @property
252 def repo_url(self):
253 return f'{BASE_URL}/{self._repo}'
254
255
256 class CommitRange:
257 COMMAND = 'git'
258 COMMIT_SEPARATOR = '-----'
259
260 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
261 MESSAGE_RE = re.compile(r'''
262 (?:\[(?P<prefix>[^\]]+)\]\ )?
263 (?:(?P<sub_details>`?[\w.-]+`?): )?
264 (?P<message>.+?)
265 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
266 ''', re.VERBOSE | re.DOTALL)
267 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
268 REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
269 FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
270 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
271
272 def __init__(self, start, end, default_author=None):
273 self._start, self._end = start, end
274 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
275 self._commits_added = []
276
277 def __iter__(self):
278 return iter(itertools.chain(self._commits.values(), self._commits_added))
279
280 def __len__(self):
281 return len(self._commits) + len(self._commits_added)
282
283 def __contains__(self, commit):
284 if isinstance(commit, Commit):
285 if not commit.hash:
286 return False
287 commit = commit.hash
288
289 return commit in self._commits
290
291 def _get_commits_and_fixes(self, default_author):
292 result = run_process(
293 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
294 f'{self._start}..{self._end}' if self._start else self._end).stdout
295
296 commits, reverts = {}, {}
297 fixes = defaultdict(list)
298 lines = iter(result.splitlines(False))
299 for i, commit_hash in enumerate(lines):
300 short = next(lines)
301 skip = short.startswith('Release ') or short == '[version] update'
302
303 authors = [default_author] if default_author else []
304 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
305 match = self.AUTHOR_INDICATOR_RE.match(line)
306 if match:
307 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
308
309 commit = Commit(commit_hash, short, authors)
310 if skip and (self._start or not i):
311 logger.debug(f'Skipped commit: {commit}')
312 continue
313 elif skip:
314 logger.debug(f'Reached Release commit, breaking: {commit}')
315 break
316
317 revert_match = self.REVERT_RE.fullmatch(commit.short)
318 if revert_match:
319 reverts[revert_match.group(1)] = commit
320 continue
321
322 fix_match = self.FIXES_RE.search(commit.short)
323 if fix_match:
324 commitish = fix_match.group(1)
325 fixes[commitish].append(commit)
326
327 commits[commit.hash] = commit
328
329 for commitish, revert_commit in reverts.items():
330 reverted = commits.pop(commitish, None)
331 if reverted:
332 logger.debug(f'{commitish} fully reverted {reverted}')
333 else:
334 commits[revert_commit.hash] = revert_commit
335
336 for commitish, fix_commits in fixes.items():
337 if commitish in commits:
338 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
339 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
340 for fix_commit in fix_commits:
341 del commits[fix_commit.hash]
342 else:
343 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
344
345 return commits, fixes
346
347 def apply_overrides(self, overrides):
348 for override in overrides:
349 when = override.get('when')
350 if when and when not in self and when != self._start:
351 logger.debug(f'Ignored {when!r} override')
352 continue
353
354 override_hash = override.get('hash') or when
355 if override['action'] == 'add':
356 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
357 logger.info(f'ADD {commit}')
358 self._commits_added.append(commit)
359
360 elif override['action'] == 'remove':
361 if override_hash in self._commits:
362 logger.info(f'REMOVE {self._commits[override_hash]}')
363 del self._commits[override_hash]
364
365 elif override['action'] == 'change':
366 if override_hash not in self._commits:
367 continue
368 commit = Commit(override_hash, override['short'], override.get('authors') or [])
369 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
370 self._commits[commit.hash] = commit
371
372 self._commits = {key: value for key, value in reversed(self._commits.items())}
373
374 def groups(self):
375 group_dict = defaultdict(list)
376 for commit in self:
377 upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
378 if upstream_re:
379 commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
380
381 match = self.MESSAGE_RE.fullmatch(commit.short)
382 if not match:
383 logger.error(f'Error parsing short commit message: {commit.short!r}')
384 continue
385
386 prefix, sub_details_alt, message, issues = match.groups()
387 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
388
389 if prefix:
390 groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
391 group = next(iter(filter(None, groups)), None)
392 details = ', '.join(unique(details))
393 sub_details = list(itertools.chain.from_iterable(sub_details))
394 else:
395 group = CommitGroup.CORE
396 details = None
397 sub_details = []
398
399 if sub_details_alt:
400 sub_details.append(sub_details_alt)
401 sub_details = tuple(unique(sub_details))
402
403 if not group:
404 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
405 group = CommitGroup.EXTRACTOR
406 else:
407 group = CommitGroup.POSTPROCESSOR
408 logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}')
409
410 commit_info = CommitInfo(
411 details, sub_details, message.strip(),
412 issues, commit, self._fixes[commit.hash])
413
414 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
415 group_dict[group].append(commit_info)
416
417 return group_dict
418
419 @staticmethod
420 def details_from_prefix(prefix):
421 if not prefix:
422 return CommitGroup.CORE, None, ()
423
424 prefix, *sub_details = prefix.split(':')
425
426 group, details = CommitGroup.get(prefix)
427 if group is CommitGroup.PRIORITY and details:
428 details = details.partition('/')[2].strip()
429
430 if details and '/' in details:
431 logger.error(f'Prefix is overnested, using first part: {prefix}')
432 details = details.partition('/')[0].strip()
433
434 if details == 'common':
435 details = None
436 elif group is CommitGroup.NETWORKING and details == 'rh':
437 details = 'Request Handler'
438
439 return group, details, sub_details
440
441
442 def get_new_contributors(contributors_path, commits):
443 contributors = set()
444 if contributors_path.exists():
445 for line in read_file(contributors_path).splitlines():
446 author, _, _ = line.strip().partition(' (')
447 authors = author.split('/')
448 contributors.update(map(str.casefold, authors))
449
450 new_contributors = set()
451 for commit in commits:
452 for author in commit.authors:
453 author_folded = author.casefold()
454 if author_folded not in contributors:
455 contributors.add(author_folded)
456 new_contributors.add(author)
457
458 return sorted(new_contributors, key=str.casefold)
459
460
461 if __name__ == '__main__':
462 import argparse
463
464 parser = argparse.ArgumentParser(
465 description='Create a changelog markdown from a git commit range')
466 parser.add_argument(
467 'commitish', default='HEAD', nargs='?',
468 help='The commitish to create the range from (default: %(default)s)')
469 parser.add_argument(
470 '-v', '--verbosity', action='count', default=0,
471 help='increase verbosity (can be used twice)')
472 parser.add_argument(
473 '-c', '--contributors', action='store_true',
474 help='update CONTRIBUTORS file (default: %(default)s)')
475 parser.add_argument(
476 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
477 help='path to the CONTRIBUTORS file')
478 parser.add_argument(
479 '--no-override', action='store_true',
480 help='skip override json in commit generation (default: %(default)s)')
481 parser.add_argument(
482 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
483 help='path to the changelog_override.json file')
484 parser.add_argument(
485 '--default-author', default='pukkandan',
486 help='the author to use without a author indicator (default: %(default)s)')
487 parser.add_argument(
488 '--repo', default='yt-dlp/yt-dlp',
489 help='the github repository to use for the operations (default: %(default)s)')
490 parser.add_argument(
491 '--collapsible', action='store_true',
492 help='make changelog collapsible (default: %(default)s)')
493 args = parser.parse_args()
494
495 logging.basicConfig(
496 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
497 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
498
499 commits = CommitRange(None, args.commitish, args.default_author)
500
501 if not args.no_override:
502 if args.override_path.exists():
503 overrides = json.loads(read_file(args.override_path))
504 commits.apply_overrides(overrides)
505 else:
506 logger.warning(f'File {args.override_path.as_posix()} does not exist')
507
508 logger.info(f'Loaded {len(commits)} commits')
509
510 new_contributors = get_new_contributors(args.contributors_path, commits)
511 if new_contributors:
512 if args.contributors:
513 write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
514 logger.info(f'New contributors: {", ".join(new_contributors)}')
515
516 print(Changelog(commits.groups(), args.repo, args.collapsible))