]> jfr.im git - yt-dlp.git/blob - devscripts/make_changelog.py
[fd/external] Scope cookies
[yt-dlp.git] / devscripts / make_changelog.py
1 from __future__ import annotations
2
3 # Allow direct execution
4 import os
5 import sys
6
7 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
9 import enum
10 import itertools
11 import json
12 import logging
13 import re
14 from collections import defaultdict
15 from dataclasses import dataclass
16 from functools import lru_cache
17 from pathlib import Path
18
19 from devscripts.utils import read_file, run_process, write_file
20
21 BASE_URL = 'https://github.com'
22 LOCATION_PATH = Path(__file__).parent
23 HASH_LENGTH = 7
24
25 logger = logging.getLogger(__name__)
26
27
28 class CommitGroup(enum.Enum):
29 PRIORITY = 'Important'
30 CORE = 'Core'
31 EXTRACTOR = 'Extractor'
32 DOWNLOADER = 'Downloader'
33 POSTPROCESSOR = 'Postprocessor'
34 MISC = 'Misc.'
35
36 @classmethod
37 @property
38 def ignorable_prefixes(cls):
39 return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
40
41 @classmethod
42 @lru_cache
43 def commit_lookup(cls):
44 return {
45 name: group
46 for group, names in {
47 cls.PRIORITY: {'priority'},
48 cls.CORE: {
49 'aes',
50 'cache',
51 'compat_utils',
52 'compat',
53 'cookies',
54 'core',
55 'dependencies',
56 'jsinterp',
57 'outtmpl',
58 'formats',
59 'plugins',
60 'update',
61 'upstream',
62 'utils',
63 },
64 cls.MISC: {
65 'build',
66 'cleanup',
67 'devscripts',
68 'docs',
69 'misc',
70 'test',
71 },
72 cls.EXTRACTOR: {'extractor', 'ie'},
73 cls.DOWNLOADER: {'downloader', 'fd'},
74 cls.POSTPROCESSOR: {'postprocessor', 'pp'},
75 }.items()
76 for name in names
77 }
78
79 @classmethod
80 def get(cls, value):
81 result = cls.commit_lookup().get(value)
82 if result:
83 logger.debug(f'Mapped {value!r} => {result.name}')
84 return result
85
86
87 @dataclass
88 class Commit:
89 hash: str | None
90 short: str
91 authors: list[str]
92
93 def __str__(self):
94 result = f'{self.short!r}'
95
96 if self.hash:
97 result += f' ({self.hash[:HASH_LENGTH]})'
98
99 if self.authors:
100 authors = ', '.join(self.authors)
101 result += f' by {authors}'
102
103 return result
104
105
106 @dataclass
107 class CommitInfo:
108 details: str | None
109 sub_details: tuple[str, ...]
110 message: str
111 issues: list[str]
112 commit: Commit
113 fixes: list[Commit]
114
115 def key(self):
116 return ((self.details or '').lower(), self.sub_details, self.message)
117
118
119 def unique(items):
120 return sorted({item.strip().lower(): item for item in items if item}.values())
121
122
123 class Changelog:
124 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
125 ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
126
127 def __init__(self, groups, repo, collapsible=False):
128 self._groups = groups
129 self._repo = repo
130 self._collapsible = collapsible
131
132 def __str__(self):
133 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
134
135 def _format_groups(self, groups):
136 first = True
137 for item in CommitGroup:
138 if self._collapsible and item not in self.ALWAYS_SHOWN and first:
139 first = False
140 yield '\n<details><summary><h3>Changelog</h3></summary>\n'
141
142 group = groups[item]
143 if group:
144 yield self.format_module(item.value, group)
145
146 if self._collapsible:
147 yield '\n</details>'
148
149 def format_module(self, name, group):
150 result = f'\n#### {name} changes\n' if name else '\n'
151 return result + '\n'.join(self._format_group(group))
152
153 def _format_group(self, group):
154 sorted_group = sorted(group, key=CommitInfo.key)
155 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
156 for _, items in detail_groups:
157 items = list(items)
158 details = items[0].details
159
160 if details == 'cleanup':
161 items = self._prepare_cleanup_misc_items(items)
162
163 prefix = '-'
164 if details:
165 if len(items) == 1:
166 prefix = f'- **{details}**:'
167 else:
168 yield f'- **{details}**'
169 prefix = '\t-'
170
171 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
172 for sub_details, entries in sub_detail_groups:
173 if not sub_details:
174 for entry in entries:
175 yield f'{prefix} {self.format_single_change(entry)}'
176 continue
177
178 entries = list(entries)
179 sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
180 if len(entries) == 1:
181 yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
182 continue
183
184 yield sub_prefix
185 for entry in entries:
186 yield f'\t{prefix} {self.format_single_change(entry)}'
187
188 def _prepare_cleanup_misc_items(self, items):
189 cleanup_misc_items = defaultdict(list)
190 sorted_items = []
191 for item in items:
192 if self.MISC_RE.search(item.message):
193 cleanup_misc_items[tuple(item.commit.authors)].append(item)
194 else:
195 sorted_items.append(item)
196
197 for commit_infos in cleanup_misc_items.values():
198 sorted_items.append(CommitInfo(
199 'cleanup', ('Miscellaneous',), ', '.join(
200 self._format_message_link(None, info.commit.hash).strip()
201 for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
202 [], Commit(None, '', commit_infos[0].commit.authors), []))
203
204 return sorted_items
205
206 def format_single_change(self, info):
207 message = self._format_message_link(info.message, info.commit.hash)
208 if info.issues:
209 message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
210
211 if info.commit.authors:
212 message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
213
214 if info.fixes:
215 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
216
217 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
218 if authors != info.commit.authors:
219 fix_message = f'{fix_message} by {self._format_authors(authors)}'
220
221 message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
222
223 return message[:-1]
224
225 def _format_message_link(self, message, hash):
226 assert message or hash, 'Improperly defined commit message or override'
227 message = message if message else hash[:HASH_LENGTH]
228 if not hash:
229 return f'{message}\n'
230 return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
231
232 def _format_issues(self, issues):
233 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
234
235 @staticmethod
236 def _format_authors(authors):
237 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
238
239 @property
240 def repo_url(self):
241 return f'{BASE_URL}/{self._repo}'
242
243
244 class CommitRange:
245 COMMAND = 'git'
246 COMMIT_SEPARATOR = '-----'
247
248 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
249 MESSAGE_RE = re.compile(r'''
250 (?:\[(?P<prefix>[^\]]+)\]\ )?
251 (?:(?P<sub_details>`?[^:`]+`?): )?
252 (?P<message>.+?)
253 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
254 ''', re.VERBOSE | re.DOTALL)
255 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
256 REVERT_RE = re.compile(r'(?i:Revert)\s+([\da-f]{40})')
257 FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
258 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
259
260 def __init__(self, start, end, default_author=None):
261 self._start, self._end = start, end
262 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
263 self._commits_added = []
264
265 def __iter__(self):
266 return iter(itertools.chain(self._commits.values(), self._commits_added))
267
268 def __len__(self):
269 return len(self._commits) + len(self._commits_added)
270
271 def __contains__(self, commit):
272 if isinstance(commit, Commit):
273 if not commit.hash:
274 return False
275 commit = commit.hash
276
277 return commit in self._commits
278
279 def _get_commits_and_fixes(self, default_author):
280 result = run_process(
281 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
282 f'{self._start}..{self._end}' if self._start else self._end).stdout
283
284 commits, reverts = {}, {}
285 fixes = defaultdict(list)
286 lines = iter(result.splitlines(False))
287 for i, commit_hash in enumerate(lines):
288 short = next(lines)
289 skip = short.startswith('Release ') or short == '[version] update'
290
291 authors = [default_author] if default_author else []
292 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
293 match = self.AUTHOR_INDICATOR_RE.match(line)
294 if match:
295 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
296
297 commit = Commit(commit_hash, short, authors)
298 if skip and (self._start or not i):
299 logger.debug(f'Skipped commit: {commit}')
300 continue
301 elif skip:
302 logger.debug(f'Reached Release commit, breaking: {commit}')
303 break
304
305 revert_match = self.REVERT_RE.fullmatch(commit.short)
306 if revert_match:
307 reverts[revert_match.group(1)] = commit
308 continue
309
310 fix_match = self.FIXES_RE.search(commit.short)
311 if fix_match:
312 commitish = fix_match.group(1)
313 fixes[commitish].append(commit)
314
315 commits[commit.hash] = commit
316
317 for commitish, revert_commit in reverts.items():
318 reverted = commits.pop(commitish, None)
319 if reverted:
320 logger.debug(f'{commit} fully reverted {reverted}')
321 else:
322 commits[revert_commit.hash] = revert_commit
323
324 for commitish, fix_commits in fixes.items():
325 if commitish in commits:
326 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
327 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
328 for fix_commit in fix_commits:
329 del commits[fix_commit.hash]
330 else:
331 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
332
333 return commits, fixes
334
335 def apply_overrides(self, overrides):
336 for override in overrides:
337 when = override.get('when')
338 if when and when not in self and when != self._start:
339 logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
340 continue
341
342 override_hash = override.get('hash') or when
343 if override['action'] == 'add':
344 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
345 logger.info(f'ADD {commit}')
346 self._commits_added.append(commit)
347
348 elif override['action'] == 'remove':
349 if override_hash in self._commits:
350 logger.info(f'REMOVE {self._commits[override_hash]}')
351 del self._commits[override_hash]
352
353 elif override['action'] == 'change':
354 if override_hash not in self._commits:
355 continue
356 commit = Commit(override_hash, override['short'], override.get('authors') or [])
357 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
358 self._commits[commit.hash] = commit
359
360 self._commits = {key: value for key, value in reversed(self._commits.items())}
361
362 def groups(self):
363 group_dict = defaultdict(list)
364 for commit in self:
365 upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
366 if upstream_re:
367 commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
368
369 match = self.MESSAGE_RE.fullmatch(commit.short)
370 if not match:
371 logger.error(f'Error parsing short commit message: {commit.short!r}')
372 continue
373
374 prefix, sub_details_alt, message, issues = match.groups()
375 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
376
377 if prefix:
378 groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
379 group = next(iter(filter(None, groups)), None)
380 details = ', '.join(unique(details))
381 sub_details = list(itertools.chain.from_iterable(sub_details))
382 else:
383 group = CommitGroup.CORE
384 details = None
385 sub_details = []
386
387 if sub_details_alt:
388 sub_details.append(sub_details_alt)
389 sub_details = tuple(unique(sub_details))
390
391 if not group:
392 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
393 group = CommitGroup.EXTRACTOR
394 else:
395 group = CommitGroup.POSTPROCESSOR
396 logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}')
397
398 commit_info = CommitInfo(
399 details, sub_details, message.strip(),
400 issues, commit, self._fixes[commit.hash])
401
402 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
403 group_dict[group].append(commit_info)
404
405 return group_dict
406
407 @staticmethod
408 def details_from_prefix(prefix):
409 if not prefix:
410 return CommitGroup.CORE, None, ()
411
412 prefix, _, details = prefix.partition('/')
413 prefix = prefix.strip()
414 details = details.strip()
415
416 group = CommitGroup.get(prefix.lower())
417 if group is CommitGroup.PRIORITY:
418 prefix, _, details = details.partition('/')
419
420 if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
421 logger.debug(f'Replaced details with {prefix!r}')
422 details = prefix or None
423
424 if details == 'common':
425 details = None
426
427 if details:
428 details, *sub_details = details.split(':')
429 else:
430 sub_details = []
431
432 return group, details, sub_details
433
434
435 def get_new_contributors(contributors_path, commits):
436 contributors = set()
437 if contributors_path.exists():
438 for line in read_file(contributors_path).splitlines():
439 author, _, _ = line.strip().partition(' (')
440 authors = author.split('/')
441 contributors.update(map(str.casefold, authors))
442
443 new_contributors = set()
444 for commit in commits:
445 for author in commit.authors:
446 author_folded = author.casefold()
447 if author_folded not in contributors:
448 contributors.add(author_folded)
449 new_contributors.add(author)
450
451 return sorted(new_contributors, key=str.casefold)
452
453
454 if __name__ == '__main__':
455 import argparse
456
457 parser = argparse.ArgumentParser(
458 description='Create a changelog markdown from a git commit range')
459 parser.add_argument(
460 'commitish', default='HEAD', nargs='?',
461 help='The commitish to create the range from (default: %(default)s)')
462 parser.add_argument(
463 '-v', '--verbosity', action='count', default=0,
464 help='increase verbosity (can be used twice)')
465 parser.add_argument(
466 '-c', '--contributors', action='store_true',
467 help='update CONTRIBUTORS file (default: %(default)s)')
468 parser.add_argument(
469 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
470 help='path to the CONTRIBUTORS file')
471 parser.add_argument(
472 '--no-override', action='store_true',
473 help='skip override json in commit generation (default: %(default)s)')
474 parser.add_argument(
475 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
476 help='path to the changelog_override.json file')
477 parser.add_argument(
478 '--default-author', default='pukkandan',
479 help='the author to use without a author indicator (default: %(default)s)')
480 parser.add_argument(
481 '--repo', default='yt-dlp/yt-dlp',
482 help='the github repository to use for the operations (default: %(default)s)')
483 parser.add_argument(
484 '--collapsible', action='store_true',
485 help='make changelog collapsible (default: %(default)s)')
486 args = parser.parse_args()
487
488 logging.basicConfig(
489 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
490 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
491
492 commits = CommitRange(None, args.commitish, args.default_author)
493
494 if not args.no_override:
495 if args.override_path.exists():
496 overrides = json.loads(read_file(args.override_path))
497 commits.apply_overrides(overrides)
498 else:
499 logger.warning(f'File {args.override_path.as_posix()} does not exist')
500
501 logger.info(f'Loaded {len(commits)} commits')
502
503 new_contributors = get_new_contributors(args.contributors_path, commits)
504 if new_contributors:
505 if args.contributors:
506 write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
507 logger.info(f'New contributors: {", ".join(new_contributors)}')
508
509 print(Changelog(commits.groups(), args.repo, args.collapsible))