]> jfr.im git - yt-dlp.git/blob - devscripts/make_changelog.py
[cleanup] Misc
[yt-dlp.git] / devscripts / make_changelog.py
1 from __future__ import annotations
2
3 # Allow direct execution
4 import os
5 import sys
6
7 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
9 import enum
10 import itertools
11 import json
12 import logging
13 import re
14 from collections import defaultdict
15 from dataclasses import dataclass
16 from functools import lru_cache
17 from pathlib import Path
18
19 from devscripts.utils import read_file, run_process, write_file
20
21 BASE_URL = 'https://github.com'
22 LOCATION_PATH = Path(__file__).parent
23 HASH_LENGTH = 7
24
25 logger = logging.getLogger(__name__)
26
27
28 class CommitGroup(enum.Enum):
29 UPSTREAM = None
30 PRIORITY = 'Important'
31 CORE = 'Core'
32 EXTRACTOR = 'Extractor'
33 DOWNLOADER = 'Downloader'
34 POSTPROCESSOR = 'Postprocessor'
35 MISC = 'Misc.'
36
37 @classmethod
38 @lru_cache
39 def commit_lookup(cls):
40 return {
41 name: group
42 for group, names in {
43 cls.PRIORITY: {''},
44 cls.UPSTREAM: {'upstream'},
45 cls.CORE: {
46 'aes',
47 'cache',
48 'compat_utils',
49 'compat',
50 'cookies',
51 'core',
52 'dependencies',
53 'jsinterp',
54 'outtmpl',
55 'plugins',
56 'update',
57 'utils',
58 },
59 cls.MISC: {
60 'build',
61 'cleanup',
62 'devscripts',
63 'docs',
64 'misc',
65 'test',
66 },
67 cls.EXTRACTOR: {'extractor', 'extractors'},
68 cls.DOWNLOADER: {'downloader'},
69 cls.POSTPROCESSOR: {'postprocessor'},
70 }.items()
71 for name in names
72 }
73
74 @classmethod
75 def get(cls, value):
76 result = cls.commit_lookup().get(value)
77 if result:
78 logger.debug(f'Mapped {value!r} => {result.name}')
79 return result
80
81
82 @dataclass
83 class Commit:
84 hash: str | None
85 short: str
86 authors: list[str]
87
88 def __str__(self):
89 result = f'{self.short!r}'
90
91 if self.hash:
92 result += f' ({self.hash[:HASH_LENGTH]})'
93
94 if self.authors:
95 authors = ', '.join(self.authors)
96 result += f' by {authors}'
97
98 return result
99
100
101 @dataclass
102 class CommitInfo:
103 details: str | None
104 sub_details: tuple[str, ...]
105 message: str
106 issues: list[str]
107 commit: Commit
108 fixes: list[Commit]
109
110 def key(self):
111 return ((self.details or '').lower(), self.sub_details, self.message)
112
113
114 class Changelog:
115 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
116
117 def __init__(self, groups, repo):
118 self._groups = groups
119 self._repo = repo
120
121 def __str__(self):
122 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
123
124 def _format_groups(self, groups):
125 for item in CommitGroup:
126 group = groups[item]
127 if group:
128 yield self.format_module(item.value, group)
129
130 def format_module(self, name, group):
131 result = f'\n#### {name} changes\n' if name else '\n'
132 return result + '\n'.join(self._format_group(group))
133
134 def _format_group(self, group):
135 sorted_group = sorted(group, key=CommitInfo.key)
136 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
137 for _, items in detail_groups:
138 items = list(items)
139 details = items[0].details
140 if not details:
141 indent = ''
142 else:
143 yield f'- {details}'
144 indent = '\t'
145
146 if details == 'cleanup':
147 items, cleanup_misc_items = self._filter_cleanup_misc_items(items)
148
149 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
150 for sub_details, entries in sub_detail_groups:
151 if not sub_details:
152 for entry in entries:
153 yield f'{indent}- {self.format_single_change(entry)}'
154 continue
155
156 entries = list(entries)
157 prefix = f'{indent}- {", ".join(entries[0].sub_details)}'
158 if len(entries) == 1:
159 yield f'{prefix}: {self.format_single_change(entries[0])}'
160 continue
161
162 yield prefix
163 for entry in entries:
164 yield f'{indent}\t- {self.format_single_change(entry)}'
165
166 if details == 'cleanup' and cleanup_misc_items:
167 yield from self._format_cleanup_misc_sub_group(cleanup_misc_items)
168
169 def _filter_cleanup_misc_items(self, items):
170 cleanup_misc_items = defaultdict(list)
171 non_misc_items = []
172 for item in items:
173 if self.MISC_RE.search(item.message):
174 cleanup_misc_items[tuple(item.commit.authors)].append(item)
175 else:
176 non_misc_items.append(item)
177
178 return non_misc_items, cleanup_misc_items
179
180 def _format_cleanup_misc_sub_group(self, group):
181 prefix = '\t- Miscellaneous'
182 if len(group) == 1:
183 yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}'
184 return
185
186 yield prefix
187 for message in self._format_cleanup_misc_items(group):
188 yield f'\t\t- {message}'
189
190 def _format_cleanup_misc_items(self, group):
191 for authors, infos in group.items():
192 message = ', '.join(
193 self._format_message_link(None, info.commit.hash)
194 for info in sorted(infos, key=lambda item: item.commit.hash or ''))
195 yield f'{message} by {self._format_authors(authors)}'
196
197 def format_single_change(self, info):
198 message = self._format_message_link(info.message, info.commit.hash)
199 if info.issues:
200 message = f'{message} ({self._format_issues(info.issues)})'
201
202 if info.commit.authors:
203 message = f'{message} by {self._format_authors(info.commit.authors)}'
204
205 if info.fixes:
206 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
207
208 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
209 if authors != info.commit.authors:
210 fix_message = f'{fix_message} by {self._format_authors(authors)}'
211
212 message = f'{message} (With fixes in {fix_message})'
213
214 return message
215
216 def _format_message_link(self, message, hash):
217 assert message or hash, 'Improperly defined commit message or override'
218 message = message if message else hash[:HASH_LENGTH]
219 return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
220
221 def _format_issues(self, issues):
222 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
223
224 @staticmethod
225 def _format_authors(authors):
226 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
227
228 @property
229 def repo_url(self):
230 return f'{BASE_URL}/{self._repo}'
231
232
233 class CommitRange:
234 COMMAND = 'git'
235 COMMIT_SEPARATOR = '-----'
236
237 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
238 MESSAGE_RE = re.compile(r'''
239 (?:\[
240 (?P<prefix>[^\]\/:,]+)
241 (?:/(?P<details>[^\]:,]+))?
242 (?:[:,](?P<sub_details>[^\]]+))?
243 \]\ )?
244 (?:(?P<sub_details_alt>`?[^:`]+`?): )?
245 (?P<message>.+?)
246 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
247 ''', re.VERBOSE | re.DOTALL)
248 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
249 FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
250 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
251
252 def __init__(self, start, end, default_author=None):
253 self._start, self._end = start, end
254 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
255 self._commits_added = []
256
257 def __iter__(self):
258 return iter(itertools.chain(self._commits.values(), self._commits_added))
259
260 def __len__(self):
261 return len(self._commits) + len(self._commits_added)
262
263 def __contains__(self, commit):
264 if isinstance(commit, Commit):
265 if not commit.hash:
266 return False
267 commit = commit.hash
268
269 return commit in self._commits
270
271 def _get_commits_and_fixes(self, default_author):
272 result = run_process(
273 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
274 f'{self._start}..{self._end}' if self._start else self._end).stdout
275
276 commits = {}
277 fixes = defaultdict(list)
278 lines = iter(result.splitlines(False))
279 for i, commit_hash in enumerate(lines):
280 short = next(lines)
281 skip = short.startswith('Release ') or short == '[version] update'
282
283 authors = [default_author] if default_author else []
284 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
285 match = self.AUTHOR_INDICATOR_RE.match(line)
286 if match:
287 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
288
289 commit = Commit(commit_hash, short, authors)
290 if skip and (self._start or not i):
291 logger.debug(f'Skipped commit: {commit}')
292 continue
293 elif skip:
294 logger.debug(f'Reached Release commit, breaking: {commit}')
295 break
296
297 fix_match = self.FIXES_RE.search(commit.short)
298 if fix_match:
299 commitish = fix_match.group(1)
300 fixes[commitish].append(commit)
301
302 commits[commit.hash] = commit
303
304 for commitish, fix_commits in fixes.items():
305 if commitish in commits:
306 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
307 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
308 for fix_commit in fix_commits:
309 del commits[fix_commit.hash]
310 else:
311 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
312
313 return commits, fixes
314
315 def apply_overrides(self, overrides):
316 for override in overrides:
317 when = override.get('when')
318 if when and when not in self and when != self._start:
319 logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
320 continue
321
322 override_hash = override.get('hash')
323 if override['action'] == 'add':
324 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
325 logger.info(f'ADD {commit}')
326 self._commits_added.append(commit)
327
328 elif override['action'] == 'remove':
329 if override_hash in self._commits:
330 logger.info(f'REMOVE {self._commits[override_hash]}')
331 del self._commits[override_hash]
332
333 elif override['action'] == 'change':
334 if override_hash not in self._commits:
335 continue
336 commit = Commit(override_hash, override['short'], override['authors'])
337 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
338 self._commits[commit.hash] = commit
339
340 self._commits = {key: value for key, value in reversed(self._commits.items())}
341
342 def groups(self):
343 groups = defaultdict(list)
344 for commit in self:
345 upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short)
346 if upstream_re:
347 commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}'
348
349 match = self.MESSAGE_RE.fullmatch(commit.short)
350 if not match:
351 logger.error(f'Error parsing short commit message: {commit.short!r}')
352 continue
353
354 prefix, details, sub_details, sub_details_alt, message, issues = match.groups()
355 group = None
356 if prefix:
357 if prefix == 'priority':
358 prefix, _, details = (details or '').partition('/')
359 logger.debug(f'Priority: {message!r}')
360 group = CommitGroup.PRIORITY
361
362 if not details and prefix:
363 if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'):
364 logger.debug(f'Replaced details with {prefix!r}')
365 details = prefix or None
366
367 if details == 'common':
368 details = None
369
370 if details:
371 details = details.strip()
372
373 else:
374 group = CommitGroup.CORE
375
376 sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.replace(':', ',')
377 sub_details = tuple(filter(None, map(str.strip, sub_details.split(','))))
378
379 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
380
381 if not group:
382 group = CommitGroup.get(prefix.lower())
383 if not group:
384 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
385 group = CommitGroup.EXTRACTOR
386 else:
387 group = CommitGroup.POSTPROCESSOR
388 logger.warning(f'Failed to map {commit.short!r}, selected {group.name}')
389
390 commit_info = CommitInfo(
391 details, sub_details, message.strip(),
392 issues, commit, self._fixes[commit.hash])
393 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
394 groups[group].append(commit_info)
395
396 return groups
397
398
399 def get_new_contributors(contributors_path, commits):
400 contributors = set()
401 if contributors_path.exists():
402 for line in read_file(contributors_path).splitlines():
403 author, _, _ = line.strip().partition(' (')
404 authors = author.split('/')
405 contributors.update(map(str.casefold, authors))
406
407 new_contributors = set()
408 for commit in commits:
409 for author in commit.authors:
410 author_folded = author.casefold()
411 if author_folded not in contributors:
412 contributors.add(author_folded)
413 new_contributors.add(author)
414
415 return sorted(new_contributors, key=str.casefold)
416
417
418 if __name__ == '__main__':
419 import argparse
420
421 parser = argparse.ArgumentParser(
422 description='Create a changelog markdown from a git commit range')
423 parser.add_argument(
424 'commitish', default='HEAD', nargs='?',
425 help='The commitish to create the range from (default: %(default)s)')
426 parser.add_argument(
427 '-v', '--verbosity', action='count', default=0,
428 help='increase verbosity (can be used twice)')
429 parser.add_argument(
430 '-c', '--contributors', action='store_true',
431 help='update CONTRIBUTORS file (default: %(default)s)')
432 parser.add_argument(
433 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
434 help='path to the CONTRIBUTORS file')
435 parser.add_argument(
436 '--no-override', action='store_true',
437 help='skip override json in commit generation (default: %(default)s)')
438 parser.add_argument(
439 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
440 help='path to the changelog_override.json file')
441 parser.add_argument(
442 '--default-author', default='pukkandan',
443 help='the author to use without a author indicator (default: %(default)s)')
444 parser.add_argument(
445 '--repo', default='yt-dlp/yt-dlp',
446 help='the github repository to use for the operations (default: %(default)s)')
447 args = parser.parse_args()
448
449 logging.basicConfig(
450 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
451 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
452
453 commits = CommitRange(None, args.commitish, args.default_author)
454
455 if not args.no_override:
456 if args.override_path.exists():
457 overrides = json.loads(read_file(args.override_path))
458 commits.apply_overrides(overrides)
459 else:
460 logger.warning(f'File {args.override_path.as_posix()} does not exist')
461
462 logger.info(f'Loaded {len(commits)} commits')
463
464 new_contributors = get_new_contributors(args.contributors_path, commits)
465 if new_contributors:
466 if args.contributors:
467 write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
468 logger.info(f'New contributors: {", ".join(new_contributors)}')
469
470 print(Changelog(commits.groups(), args.repo))