]> jfr.im git - yt-dlp.git/blob - devscripts/make_changelog.py
Fix d400e261cf029a3f20d364113b14de973be75404
[yt-dlp.git] / devscripts / make_changelog.py
1 from __future__ import annotations
2
3 import enum
4 import itertools
5 import json
6 import logging
7 import re
8 import subprocess
9 import sys
10 from collections import defaultdict
11 from dataclasses import dataclass
12 from functools import lru_cache
13 from pathlib import Path
14
15 BASE_URL = 'https://github.com'
16 LOCATION_PATH = Path(__file__).parent
17
18 logger = logging.getLogger(__name__)
19
20
21 class CommitGroup(enum.Enum):
22 UPSTREAM = None
23 PRIORITY = 'Important'
24 CORE = 'Core'
25 EXTRACTOR = 'Extractor'
26 DOWNLOADER = 'Downloader'
27 POSTPROCESSOR = 'Postprocessor'
28 MISC = 'Misc.'
29
30 @classmethod
31 @lru_cache
32 def commit_lookup(cls):
33 return {
34 name: group
35 for group, names in {
36 cls.PRIORITY: {''},
37 cls.UPSTREAM: {'upstream'},
38 cls.CORE: {
39 'aes',
40 'cache',
41 'compat_utils',
42 'compat',
43 'cookies',
44 'core',
45 'dependencies',
46 'jsinterp',
47 'outtmpl',
48 'plugins',
49 'update',
50 'utils',
51 },
52 cls.MISC: {
53 'build',
54 'cleanup',
55 'devscripts',
56 'docs',
57 'misc',
58 'test',
59 },
60 cls.EXTRACTOR: {'extractor', 'extractors'},
61 cls.DOWNLOADER: {'downloader'},
62 cls.POSTPROCESSOR: {'postprocessor'},
63 }.items()
64 for name in names
65 }
66
67 @classmethod
68 def get(cls, value):
69 result = cls.commit_lookup().get(value)
70 if result:
71 logger.debug(f'Mapped {value!r} => {result.name}')
72 return result
73
74
75 @dataclass
76 class Commit:
77 hash: str | None
78 short: str
79 authors: list[str]
80
81 def __str__(self):
82 result = f'{self.short!r}'
83
84 if self.hash:
85 result += f' ({self.hash[:7]})'
86
87 if self.authors:
88 authors = ', '.join(self.authors)
89 result += f' by {authors}'
90
91 return result
92
93
94 @dataclass
95 class CommitInfo:
96 details: str | None
97 sub_details: tuple[str, ...]
98 message: str
99 issues: list[str]
100 commit: Commit
101 fixes: list[Commit]
102
103 def key(self):
104 return ((self.details or '').lower(), self.sub_details, self.message)
105
106
107 class Changelog:
108 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
109
110 def __init__(self, groups, repo):
111 self._groups = groups
112 self._repo = repo
113
114 def __str__(self):
115 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
116
117 def _format_groups(self, groups):
118 for item in CommitGroup:
119 group = groups[item]
120 if group:
121 yield self.format_module(item.value, group)
122
123 def format_module(self, name, group):
124 result = f'\n#### {name} changes\n' if name else '\n'
125 return result + '\n'.join(self._format_group(group))
126
127 def _format_group(self, group):
128 sorted_group = sorted(group, key=CommitInfo.key)
129 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
130 for _, items in detail_groups:
131 items = list(items)
132 details = items[0].details
133 if not details:
134 indent = ''
135 else:
136 yield f'- {details}'
137 indent = '\t'
138
139 if details == 'cleanup':
140 items, cleanup_misc_items = self._filter_cleanup_misc_items(items)
141
142 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
143 for sub_details, entries in sub_detail_groups:
144 if not sub_details:
145 for entry in entries:
146 yield f'{indent}- {self.format_single_change(entry)}'
147 continue
148
149 entries = list(entries)
150 prefix = f'{indent}- {", ".join(entries[0].sub_details)}'
151 if len(entries) == 1:
152 yield f'{prefix}: {self.format_single_change(entries[0])}'
153 continue
154
155 yield prefix
156 for entry in entries:
157 yield f'{indent}\t- {self.format_single_change(entry)}'
158
159 if details == 'cleanup' and cleanup_misc_items:
160 yield from self._format_cleanup_misc_sub_group(cleanup_misc_items)
161
162 def _filter_cleanup_misc_items(self, items):
163 cleanup_misc_items = defaultdict(list)
164 non_misc_items = []
165 for item in items:
166 if self.MISC_RE.search(item.message):
167 cleanup_misc_items[tuple(item.commit.authors)].append(item)
168 else:
169 non_misc_items.append(item)
170
171 return non_misc_items, cleanup_misc_items
172
173 def _format_cleanup_misc_sub_group(self, group):
174 prefix = '\t- Miscellaneous'
175 if len(group) == 1:
176 yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}'
177 return
178
179 yield prefix
180 for message in self._format_cleanup_misc_items(group):
181 yield f'\t\t- {message}'
182
183 def _format_cleanup_misc_items(self, group):
184 for authors, infos in group.items():
185 message = ', '.join(
186 self._format_message_link(None, info.commit.hash)
187 for info in sorted(infos, key=lambda item: item.commit.hash or ''))
188 yield f'{message} by {self._format_authors(authors)}'
189
190 def format_single_change(self, info):
191 message = self._format_message_link(info.message, info.commit.hash)
192 if info.issues:
193 message = f'{message} ({self._format_issues(info.issues)})'
194
195 if info.commit.authors:
196 message = f'{message} by {self._format_authors(info.commit.authors)}'
197
198 if info.fixes:
199 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
200
201 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
202 if authors != info.commit.authors:
203 fix_message = f'{fix_message} by {self._format_authors(authors)}'
204
205 message = f'{message} (With fixes in {fix_message})'
206
207 return message
208
209 def _format_message_link(self, message, hash):
210 assert message or hash, 'Improperly defined commit message or override'
211 message = message if message else hash[:7]
212 return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
213
214 def _format_issues(self, issues):
215 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
216
217 @staticmethod
218 def _format_authors(authors):
219 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
220
221 @property
222 def repo_url(self):
223 return f'{BASE_URL}/{self._repo}'
224
225
226 class CommitRange:
227 COMMAND = 'git'
228 COMMIT_SEPARATOR = '-----'
229
230 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
231 MESSAGE_RE = re.compile(r'''
232 (?:\[
233 (?P<prefix>[^\]\/:,]+)
234 (?:/(?P<details>[^\]:,]+))?
235 (?:[:,](?P<sub_details>[^\]]+))?
236 \]\ )?
237 (?:(?P<sub_details_alt>`?[^:`]+`?): )?
238 (?P<message>.+?)
239 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
240 ''', re.VERBOSE | re.DOTALL)
241 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
242 FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
243 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
244
245 def __init__(self, start, end, default_author=None) -> None:
246 self._start = start
247 self._end = end
248 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
249 self._commits_added = []
250
251 @classmethod
252 def from_single(cls, commitish='HEAD', default_author=None):
253 start_commitish = cls.get_prev_tag(commitish)
254 end_commitish = cls.get_next_tag(commitish)
255 if start_commitish == end_commitish:
256 start_commitish = cls.get_prev_tag(f'{commitish}~')
257 logger.info(f'Determined range from {commitish!r}: {start_commitish}..{end_commitish}')
258 return cls(start_commitish, end_commitish, default_author)
259
260 @classmethod
261 def get_prev_tag(cls, commitish):
262 command = [cls.COMMAND, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish]
263 return subprocess.check_output(command, text=True).strip()
264
265 @classmethod
266 def get_next_tag(cls, commitish):
267 result = subprocess.run(
268 [cls.COMMAND, 'describe', '--contains', '--abbrev=0', commitish],
269 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
270 if result.returncode:
271 return 'HEAD'
272
273 return result.stdout.partition('~')[0].strip()
274
275 def __iter__(self):
276 return iter(itertools.chain(self._commits.values(), self._commits_added))
277
278 def __len__(self):
279 return len(self._commits) + len(self._commits_added)
280
281 def __contains__(self, commit):
282 if isinstance(commit, Commit):
283 if not commit.hash:
284 return False
285 commit = commit.hash
286
287 return commit in self._commits
288
289 def _is_ancestor(self, commitish):
290 return bool(subprocess.call(
291 [self.COMMAND, 'merge-base', '--is-ancestor', commitish, self._start]))
292
293 def _get_commits_and_fixes(self, default_author):
294 result = subprocess.check_output([
295 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
296 f'{self._start}..{self._end}'], text=True)
297
298 commits = {}
299 fixes = defaultdict(list)
300 lines = iter(result.splitlines(False))
301 for line in lines:
302 commit_hash = line
303 short = next(lines)
304 skip = short.startswith('Release ') or short == '[version] update'
305
306 authors = [default_author] if default_author else []
307 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
308 match = self.AUTHOR_INDICATOR_RE.match(line)
309 if match:
310 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
311
312 commit = Commit(commit_hash, short, authors)
313 if skip:
314 logger.debug(f'Skipped commit: {commit}')
315 continue
316
317 fix_match = self.FIXES_RE.search(commit.short)
318 if fix_match:
319 commitish = fix_match.group(1)
320 fixes[commitish].append(commit)
321
322 commits[commit.hash] = commit
323
324 for commitish, fix_commits in fixes.items():
325 if commitish in commits:
326 hashes = ', '.join(commit.hash[:7] for commit in fix_commits)
327 logger.info(f'Found fix(es) for {commitish[:7]}: {hashes}')
328 for fix_commit in fix_commits:
329 del commits[fix_commit.hash]
330 else:
331 logger.debug(f'Commit with fixes not in changes: {commitish[:7]}')
332
333 return commits, fixes
334
335 def apply_overrides(self, overrides):
336 for override in overrides:
337 when = override.get('when')
338 if when and when not in self and when != self._start:
339 logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
340 continue
341
342 override_hash = override.get('hash')
343 if override['action'] == 'add':
344 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
345 logger.info(f'ADD {commit}')
346 self._commits_added.append(commit)
347
348 elif override['action'] == 'remove':
349 if override_hash in self._commits:
350 logger.info(f'REMOVE {self._commits[override_hash]}')
351 del self._commits[override_hash]
352
353 elif override['action'] == 'change':
354 if override_hash not in self._commits:
355 continue
356 commit = Commit(override_hash, override['short'], override['authors'])
357 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
358 self._commits[commit.hash] = commit
359
360 self._commits = {key: value for key, value in reversed(self._commits.items())}
361
362 def groups(self):
363 groups = defaultdict(list)
364 for commit in self:
365 upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short)
366 if upstream_re:
367 commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}'
368
369 match = self.MESSAGE_RE.fullmatch(commit.short)
370 if not match:
371 logger.error(f'Error parsing short commit message: {commit.short!r}')
372 continue
373
374 prefix, details, sub_details, sub_details_alt, message, issues = match.groups()
375 group = None
376 if prefix:
377 if prefix == 'priority':
378 prefix, _, details = (details or '').partition('/')
379 logger.debug(f'Priority: {message!r}')
380 group = CommitGroup.PRIORITY
381
382 if not details and prefix:
383 if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'):
384 logger.debug(f'Replaced details with {prefix!r}')
385 details = prefix or None
386
387 if details == 'common':
388 details = None
389
390 if details:
391 details = details.strip()
392
393 else:
394 group = CommitGroup.CORE
395
396 sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.replace(':', ',')
397 sub_details = tuple(filter(None, map(str.strip, sub_details.split(','))))
398
399 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
400
401 if not group:
402 group = CommitGroup.get(prefix.lower())
403 if not group:
404 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
405 group = CommitGroup.EXTRACTOR
406 else:
407 group = CommitGroup.POSTPROCESSOR
408 logger.warning(f'Failed to map {commit.short!r}, selected {group.name}')
409
410 commit_info = CommitInfo(
411 details, sub_details, message.strip(),
412 issues, commit, self._fixes[commit.hash])
413 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
414 groups[group].append(commit_info)
415
416 return groups
417
418
419 def get_new_contributors(contributors_path, commits):
420 contributors = set()
421 if contributors_path.exists():
422 with contributors_path.open() as file:
423 for line in filter(None, map(str.strip, file)):
424 author, _, _ = line.partition(' (')
425 authors = author.split('/')
426 contributors.update(map(str.casefold, authors))
427
428 new_contributors = set()
429 for commit in commits:
430 for author in commit.authors:
431 author_folded = author.casefold()
432 if author_folded not in contributors:
433 contributors.add(author_folded)
434 new_contributors.add(author)
435
436 return sorted(new_contributors, key=str.casefold)
437
438
439 if __name__ == '__main__':
440 import argparse
441
442 parser = argparse.ArgumentParser(
443 description='Create a changelog markdown from a git commit range')
444 parser.add_argument(
445 'commitish', default='HEAD', nargs='?',
446 help='The commitish to create the range from (default: %(default)s)')
447 parser.add_argument(
448 '-v', '--verbosity', action='count', default=0,
449 help='increase verbosity (can be used twice)')
450 parser.add_argument(
451 '-c', '--contributors', action='store_true',
452 help='update CONTRIBUTORS file (default: %(default)s)')
453 parser.add_argument(
454 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
455 help='path to the CONTRIBUTORS file')
456 parser.add_argument(
457 '--no-override', action='store_true',
458 help='skip override json in commit generation (default: %(default)s)')
459 parser.add_argument(
460 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
461 help='path to the changelog_override.json file')
462 parser.add_argument(
463 '--default-author', default='pukkandan',
464 help='the author to use without a author indicator (default: %(default)s)')
465 parser.add_argument(
466 '--repo', default='yt-dlp/yt-dlp',
467 help='the github repository to use for the operations (default: %(default)s)')
468 args = parser.parse_args()
469
470 logging.basicConfig(
471 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
472 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
473
474 commits = CommitRange.from_single(args.commitish, args.default_author)
475
476 if not args.no_override:
477 if args.override_path.exists():
478 with args.override_path.open() as file:
479 overrides = json.load(file)
480 commits.apply_overrides(overrides)
481 else:
482 logger.warning(f'File {args.override_path.as_posix()} does not exist')
483
484 logger.info(f'Loaded {len(commits)} commits')
485
486 new_contributors = get_new_contributors(args.contributors_path, commits)
487 if new_contributors:
488 if args.contributors:
489 with args.contributors_path.open('a') as file:
490 file.writelines(f'{contributor}\n' for contributor in new_contributors)
491 logger.info(f'New contributors: {", ".join(new_contributors)}')
492
493 print(Changelog(commits.groups(), args.repo))