]>
Commit | Line | Data |
---|---|---|
d400e261 SS |
1 | from __future__ import annotations |
2 | ||
3 | import enum | |
4 | import itertools | |
5 | import json | |
6 | import logging | |
7 | import re | |
8 | import subprocess | |
9 | import sys | |
10 | from collections import defaultdict | |
11 | from dataclasses import dataclass | |
12 | from functools import lru_cache | |
13 | from pathlib import Path | |
14 | ||
15 | BASE_URL = 'https://github.com' | |
16 | LOCATION_PATH = Path(__file__).parent | |
17 | ||
18 | logger = logging.getLogger(__name__) | |
19 | ||
20 | ||
21 | class CommitGroup(enum.Enum): | |
22 | UPSTREAM = None | |
23 | PRIORITY = 'Important' | |
24 | CORE = 'Core' | |
25 | EXTRACTOR = 'Extractor' | |
26 | DOWNLOADER = 'Downloader' | |
27 | POSTPROCESSOR = 'Postprocessor' | |
28 | MISC = 'Misc.' | |
29 | ||
30 | @classmethod | |
31 | @lru_cache | |
32 | def commit_lookup(cls): | |
33 | return { | |
34 | name: group | |
35 | for group, names in { | |
36 | cls.PRIORITY: {''}, | |
37 | cls.UPSTREAM: {'upstream'}, | |
38 | cls.CORE: { | |
39 | 'aes', | |
40 | 'cache', | |
41 | 'compat_utils', | |
42 | 'compat', | |
43 | 'cookies', | |
44 | 'core', | |
45 | 'dependencies', | |
46 | 'jsinterp', | |
47 | 'outtmpl', | |
48 | 'plugins', | |
49 | 'update', | |
50 | 'utils', | |
51 | }, | |
52 | cls.MISC: { | |
53 | 'build', | |
54 | 'cleanup', | |
55 | 'devscripts', | |
56 | 'docs', | |
57 | 'misc', | |
58 | 'test', | |
59 | }, | |
60 | cls.EXTRACTOR: {'extractor', 'extractors'}, | |
61 | cls.DOWNLOADER: {'downloader'}, | |
62 | cls.POSTPROCESSOR: {'postprocessor'}, | |
63 | }.items() | |
64 | for name in names | |
65 | } | |
66 | ||
67 | @classmethod | |
68 | def get(cls, value): | |
69 | result = cls.commit_lookup().get(value) | |
70 | if result: | |
71 | logger.debug(f'Mapped {value!r} => {result.name}') | |
72 | return result | |
73 | ||
74 | ||
75 | @dataclass | |
76 | class Commit: | |
77 | hash: str | None | |
78 | short: str | |
79 | authors: list[str] | |
80 | ||
81 | def __str__(self): | |
82 | result = f'{self.short!r}' | |
83 | ||
84 | if self.hash: | |
85 | result += f' ({self.hash[:7]})' | |
86 | ||
87 | if self.authors: | |
88 | authors = ', '.join(self.authors) | |
89 | result += f' by {authors}' | |
90 | ||
91 | return result | |
92 | ||
93 | ||
94 | @dataclass | |
95 | class CommitInfo: | |
96 | details: str | None | |
97 | sub_details: tuple[str, ...] | |
98 | message: str | |
99 | issues: list[str] | |
100 | commit: Commit | |
101 | fixes: list[Commit] | |
102 | ||
103 | def key(self): | |
104 | return ((self.details or '').lower(), self.sub_details, self.message) | |
105 | ||
106 | ||
107 | class Changelog: | |
108 | MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) | |
109 | ||
110 | def __init__(self, groups, repo): | |
111 | self._groups = groups | |
112 | self._repo = repo | |
113 | ||
114 | def __str__(self): | |
115 | return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') | |
116 | ||
117 | def _format_groups(self, groups): | |
118 | for item in CommitGroup: | |
119 | group = groups[item] | |
120 | if group: | |
121 | yield self.format_module(item.value, group) | |
122 | ||
123 | def format_module(self, name, group): | |
124 | result = f'\n#### {name} changes\n' if name else '\n' | |
125 | return result + '\n'.join(self._format_group(group)) | |
126 | ||
127 | def _format_group(self, group): | |
128 | sorted_group = sorted(group, key=CommitInfo.key) | |
129 | detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) | |
130 | for details, items in detail_groups: | |
131 | if not details: | |
132 | indent = '' | |
133 | else: | |
134 | yield f'- {details}' | |
135 | indent = '\t' | |
136 | ||
137 | if details == 'cleanup': | |
138 | items, cleanup_misc_items = self._filter_cleanup_misc_items(items) | |
139 | ||
140 | sub_detail_groups = itertools.groupby(items, lambda item: item.sub_details) | |
141 | for sub_details, entries in sub_detail_groups: | |
142 | if not sub_details: | |
143 | for entry in entries: | |
144 | yield f'{indent}- {self.format_single_change(entry)}' | |
145 | continue | |
146 | ||
147 | prefix = f'{indent}- {", ".join(sub_details)}' | |
148 | entries = list(entries) | |
149 | if len(entries) == 1: | |
150 | yield f'{prefix}: {self.format_single_change(entries[0])}' | |
151 | continue | |
152 | ||
153 | yield prefix | |
154 | for entry in entries: | |
155 | yield f'{indent}\t- {self.format_single_change(entry)}' | |
156 | ||
157 | if details == 'cleanup' and cleanup_misc_items: | |
158 | yield from self._format_cleanup_misc_sub_group(cleanup_misc_items) | |
159 | ||
160 | def _filter_cleanup_misc_items(self, items): | |
161 | cleanup_misc_items = defaultdict(list) | |
162 | non_misc_items = [] | |
163 | for item in items: | |
164 | if self.MISC_RE.search(item.message): | |
165 | cleanup_misc_items[tuple(item.commit.authors)].append(item) | |
166 | else: | |
167 | non_misc_items.append(item) | |
168 | ||
169 | return non_misc_items, cleanup_misc_items | |
170 | ||
171 | def _format_cleanup_misc_sub_group(self, group): | |
172 | prefix = '\t- Miscellaneous' | |
173 | if len(group) == 1: | |
174 | yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}' | |
175 | return | |
176 | ||
177 | yield prefix | |
178 | for message in self._format_cleanup_misc_items(group): | |
179 | yield f'\t\t- {message}' | |
180 | ||
181 | def _format_cleanup_misc_items(self, group): | |
182 | for authors, infos in group.items(): | |
183 | message = ', '.join( | |
184 | self._format_message_link(None, info.commit.hash) | |
185 | for info in sorted(infos, key=lambda item: item.commit.hash or '')) | |
186 | yield f'{message} by {self._format_authors(authors)}' | |
187 | ||
188 | def format_single_change(self, info): | |
189 | message = self._format_message_link(info.message, info.commit.hash) | |
190 | if info.issues: | |
191 | message = f'{message} ({self._format_issues(info.issues)})' | |
192 | ||
193 | if info.commit.authors: | |
194 | message = f'{message} by {self._format_authors(info.commit.authors)}' | |
195 | ||
196 | if info.fixes: | |
197 | fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) | |
198 | ||
199 | authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) | |
200 | if authors != info.commit.authors: | |
201 | fix_message = f'{fix_message} by {self._format_authors(authors)}' | |
202 | ||
203 | message = f'{message} (With fixes in {fix_message})' | |
204 | ||
205 | return message | |
206 | ||
207 | def _format_message_link(self, message, hash): | |
208 | assert message or hash, 'Improperly defined commit message or override' | |
209 | message = message if message else hash[:7] | |
210 | return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message | |
211 | ||
212 | def _format_issues(self, issues): | |
213 | return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) | |
214 | ||
215 | @staticmethod | |
216 | def _format_authors(authors): | |
217 | return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) | |
218 | ||
219 | @property | |
220 | def repo_url(self): | |
221 | return f'{BASE_URL}/{self._repo}' | |
222 | ||
223 | ||
224 | class CommitRange: | |
225 | COMMAND = 'git' | |
226 | COMMIT_SEPARATOR = '-----' | |
227 | ||
228 | AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) | |
229 | MESSAGE_RE = re.compile(r''' | |
230 | (?:\[ | |
231 | (?P<prefix>[^\]\/:,]+) | |
232 | (?:/(?P<details>[^\]:,]+))? | |
233 | (?:[:,](?P<sub_details>[^\]]+))? | |
234 | \]\ )? | |
235 | (?:`?(?P<sub_details_alt>[^:`]+)`?: )? | |
236 | (?P<message>.+?) | |
237 | (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))? | |
238 | ''', re.VERBOSE | re.DOTALL) | |
239 | EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) | |
240 | FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+for)?|Revert)\s+([\da-f]{40})') | |
241 | UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') | |
242 | ||
243 | def __init__(self, start, end, default_author=None) -> None: | |
244 | self._start = start | |
245 | self._end = end | |
246 | self._commits, self._fixes = self._get_commits_and_fixes(default_author) | |
247 | self._commits_added = [] | |
248 | ||
249 | @classmethod | |
250 | def from_single(cls, commitish='HEAD', default_author=None): | |
251 | start_commitish = cls.get_prev_tag(commitish) | |
252 | end_commitish = cls.get_next_tag(commitish) | |
253 | if start_commitish == end_commitish: | |
254 | start_commitish = cls.get_prev_tag(f'{commitish}~') | |
255 | logger.info(f'Determined range from {commitish!r}: {start_commitish}..{end_commitish}') | |
256 | return cls(start_commitish, end_commitish, default_author) | |
257 | ||
258 | @classmethod | |
259 | def get_prev_tag(cls, commitish): | |
260 | command = [cls.COMMAND, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish] | |
261 | return subprocess.check_output(command, text=True).strip() | |
262 | ||
263 | @classmethod | |
264 | def get_next_tag(cls, commitish): | |
265 | result = subprocess.run( | |
266 | [cls.COMMAND, 'describe', '--contains', '--abbrev=0', commitish], | |
267 | stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) | |
268 | if result.returncode: | |
269 | return 'HEAD' | |
270 | ||
271 | return result.stdout.partition('~')[0].strip() | |
272 | ||
273 | def __iter__(self): | |
274 | return iter(itertools.chain(self._commits.values(), self._commits_added)) | |
275 | ||
276 | def __len__(self): | |
277 | return len(self._commits) + len(self._commits_added) | |
278 | ||
279 | def __contains__(self, commit): | |
280 | if isinstance(commit, Commit): | |
281 | if not commit.hash: | |
282 | return False | |
283 | commit = commit.hash | |
284 | ||
285 | return commit in self._commits | |
286 | ||
287 | def _is_ancestor(self, commitish): | |
288 | return bool(subprocess.call( | |
289 | [self.COMMAND, 'merge-base', '--is-ancestor', commitish, self._start])) | |
290 | ||
291 | def _get_commits_and_fixes(self, default_author): | |
292 | result = subprocess.check_output([ | |
293 | self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', | |
294 | f'{self._start}..{self._end}'], text=True) | |
295 | ||
296 | commits = {} | |
297 | fixes = defaultdict(list) | |
298 | lines = iter(result.splitlines(False)) | |
299 | for line in lines: | |
300 | commit_hash = line | |
301 | short = next(lines) | |
302 | skip = short.startswith('Release ') or short == '[version] update' | |
303 | ||
304 | authors = [default_author] if default_author else [] | |
305 | for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): | |
306 | match = self.AUTHOR_INDICATOR_RE.match(line) | |
307 | if match: | |
308 | authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) | |
309 | ||
310 | commit = Commit(commit_hash, short, authors) | |
311 | if skip: | |
312 | logger.debug(f'Skipped commit: {commit}') | |
313 | continue | |
314 | ||
315 | fix_match = self.FIXES_RE.search(commit.short) | |
316 | if fix_match: | |
317 | commitish = fix_match.group(1) | |
318 | fixes[commitish].append(commit) | |
319 | ||
320 | commits[commit.hash] = commit | |
321 | ||
322 | for commitish, fix_commits in fixes.items(): | |
323 | if commitish in commits: | |
324 | hashes = ', '.join(commit.hash[:7] for commit in fix_commits) | |
325 | logger.info(f'Found fix(es) for {commitish[:7]}: {hashes}') | |
326 | for fix_commit in fix_commits: | |
327 | del commits[fix_commit.hash] | |
328 | else: | |
329 | logger.debug(f'Commit with fixes not in changes: {commitish[:7]}') | |
330 | ||
331 | return commits, fixes | |
332 | ||
333 | def apply_overrides(self, overrides): | |
334 | for override in overrides: | |
335 | when = override.get('when') | |
336 | if when and when not in self and when != self._start: | |
337 | logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') | |
338 | continue | |
339 | ||
340 | override_hash = override.get('hash') | |
341 | if override['action'] == 'add': | |
342 | commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) | |
343 | logger.info(f'ADD {commit}') | |
344 | self._commits_added.append(commit) | |
345 | ||
346 | elif override['action'] == 'remove': | |
347 | if override_hash in self._commits: | |
348 | logger.info(f'REMOVE {self._commits[override_hash]}') | |
349 | del self._commits[override_hash] | |
350 | ||
351 | elif override['action'] == 'change': | |
352 | if override_hash not in self._commits: | |
353 | continue | |
354 | commit = Commit(override_hash, override['short'], override['authors']) | |
355 | logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') | |
356 | self._commits[commit.hash] = commit | |
357 | ||
358 | self._commits = {key: value for key, value in reversed(self._commits.items())} | |
359 | ||
360 | def groups(self): | |
361 | groups = defaultdict(list) | |
362 | for commit in self: | |
363 | upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short) | |
364 | if upstream_re: | |
365 | commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}' | |
366 | ||
367 | match = self.MESSAGE_RE.fullmatch(commit.short) | |
368 | if not match: | |
369 | logger.error(f'Error parsing short commit message: {commit.short!r}') | |
370 | continue | |
371 | ||
372 | prefix, details, sub_details, sub_details_alt, message, issues = match.groups() | |
373 | group = None | |
374 | if prefix: | |
375 | if prefix == 'priority': | |
376 | prefix, _, details = (details or '').partition('/') | |
377 | logger.debug(f'Priority: {message!r}') | |
378 | group = CommitGroup.PRIORITY | |
379 | ||
380 | if not details and prefix: | |
381 | if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'): | |
382 | logger.debug(f'Replaced details with {prefix!r}') | |
383 | details = prefix or None | |
384 | ||
385 | if details == 'common': | |
386 | details = None | |
387 | ||
388 | if details: | |
389 | details = details.strip() | |
390 | ||
391 | else: | |
392 | group = CommitGroup.CORE | |
393 | ||
394 | sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.lower().replace(':', ',') | |
395 | sub_details = tuple(filter(None, map(str.strip, sub_details.split(',')))) | |
396 | ||
397 | issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] | |
398 | ||
399 | if not group: | |
400 | group = CommitGroup.get(prefix.lower()) | |
401 | if not group: | |
402 | if self.EXTRACTOR_INDICATOR_RE.search(commit.short): | |
403 | group = CommitGroup.EXTRACTOR | |
404 | else: | |
405 | group = CommitGroup.POSTPROCESSOR | |
406 | logger.warning(f'Failed to map {commit.short!r}, selected {group.name}') | |
407 | ||
408 | commit_info = CommitInfo( | |
409 | details, sub_details, message.strip(), | |
410 | issues, commit, self._fixes[commit.hash]) | |
411 | logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') | |
412 | groups[group].append(commit_info) | |
413 | ||
414 | return groups | |
415 | ||
416 | ||
417 | def get_new_contributors(contributors_path, commits): | |
418 | contributors = set() | |
419 | if contributors_path.exists(): | |
420 | with contributors_path.open() as file: | |
421 | for line in filter(None, map(str.strip, file)): | |
422 | author, _, _ = line.partition(' (') | |
423 | authors = author.split('/') | |
424 | contributors.update(map(str.casefold, authors)) | |
425 | ||
426 | new_contributors = set() | |
427 | for commit in commits: | |
428 | for author in commit.authors: | |
429 | author_folded = author.casefold() | |
430 | if author_folded not in contributors: | |
431 | contributors.add(author_folded) | |
432 | new_contributors.add(author) | |
433 | ||
434 | return sorted(new_contributors, key=str.casefold) | |
435 | ||
436 | ||
437 | if __name__ == '__main__': | |
438 | import argparse | |
439 | ||
440 | parser = argparse.ArgumentParser( | |
441 | description='Create a changelog markdown from a git commit range') | |
442 | parser.add_argument( | |
443 | 'commitish', default='HEAD', nargs='?', | |
444 | help='The commitish to create the range from (default: %(default)s)') | |
445 | parser.add_argument( | |
446 | '-v', '--verbosity', action='count', default=0, | |
447 | help='increase verbosity (can be used twice)') | |
448 | parser.add_argument( | |
449 | '-c', '--contributors', action='store_true', | |
450 | help='update CONTRIBUTORS file (default: %(default)s)') | |
451 | parser.add_argument( | |
452 | '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', | |
453 | help='path to the CONTRIBUTORS file') | |
454 | parser.add_argument( | |
455 | '--no-override', action='store_true', | |
456 | help='skip override json in commit generation (default: %(default)s)') | |
457 | parser.add_argument( | |
458 | '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', | |
459 | help='path to the changelog_override.json file') | |
460 | parser.add_argument( | |
461 | '--default-author', default='pukkandan', | |
462 | help='the author to use without a author indicator (default: %(default)s)') | |
463 | parser.add_argument( | |
464 | '--repo', default='yt-dlp/yt-dlp', | |
465 | help='the github repository to use for the operations (default: %(default)s)') | |
466 | args = parser.parse_args() | |
467 | ||
468 | logging.basicConfig( | |
469 | datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', | |
470 | level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) | |
471 | ||
472 | commits = CommitRange.from_single(args.commitish, args.default_author) | |
473 | ||
474 | if not args.no_override: | |
475 | if args.override_path.exists(): | |
476 | with args.override_path.open() as file: | |
477 | overrides = json.load(file) | |
478 | commits.apply_overrides(overrides) | |
479 | else: | |
480 | logger.warning(f'File {args.override_path.as_posix()} does not exist') | |
481 | ||
482 | logger.info(f'Loaded {len(commits)} commits') | |
483 | ||
484 | new_contributors = get_new_contributors(args.contributors_path, commits) | |
485 | if new_contributors: | |
486 | if args.contributors: | |
487 | with args.contributors_path.open('a') as file: | |
488 | file.writelines(f'{contributor}\n' for contributor in new_contributors) | |
489 | logger.info(f'New contributors: {", ".join(new_contributors)}') | |
490 | ||
491 | print(Changelog(commits.groups(), args.repo)) |