1 from __future__
import annotations
10 from collections
import defaultdict
11 from dataclasses
import dataclass
12 from functools
import lru_cache
13 from pathlib
import Path
15 BASE_URL
= 'https://github.com'
16 LOCATION_PATH
= Path(__file__
).parent
18 logger
= logging
.getLogger(__name__
)
21 class CommitGroup(enum
.Enum
):
23 PRIORITY
= 'Important'
25 EXTRACTOR
= 'Extractor'
26 DOWNLOADER
= 'Downloader'
27 POSTPROCESSOR
= 'Postprocessor'
32 def commit_lookup(cls
):
37 cls
.UPSTREAM
: {'upstream'}
,
60 cls
.EXTRACTOR
: {'extractor', 'extractors'}
,
61 cls
.DOWNLOADER
: {'downloader'}
,
62 cls
.POSTPROCESSOR
: {'postprocessor'}
,
69 result
= cls
.commit_lookup().get(value
)
71 logger
.debug(f
'Mapped {value!r} => {result.name}')
82 result
= f
'{self.short!r}'
85 result
+= f
' ({self.hash[:7]})'
88 authors
= ', '.join(self
.authors
)
89 result
+= f
' by {authors}'
97 sub_details
: tuple[str, ...]
104 return ((self
.details
or '').lower(), self
.sub_details
, self
.message
)
108 MISC_RE
= re
.compile(r
'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re
.IGNORECASE
)
110 def __init__(self
, groups
, repo
):
111 self
._groups
= groups
115 return '\n'.join(self
._format
_groups
(self
._groups
)).replace('\t', ' ')
117 def _format_groups(self
, groups
):
118 for item
in CommitGroup
:
121 yield self
.format_module(item
.value
, group
)
123 def format_module(self
, name
, group
):
124 result
= f
'\n#### {name} changes\n' if name
else '\n'
125 return result
+ '\n'.join(self
._format
_group
(group
))
127 def _format_group(self
, group
):
128 sorted_group
= sorted(group
, key
=CommitInfo
.key
)
129 detail_groups
= itertools
.groupby(sorted_group
, lambda item
: (item
.details
or '').lower())
130 for _
, items
in detail_groups
:
132 details
= items
[0].details
139 if details
== 'cleanup':
140 items
, cleanup_misc_items
= self
._filter
_cleanup
_misc
_items
(items
)
142 sub_detail_groups
= itertools
.groupby(items
, lambda item
: tuple(map(str.lower
, item
.sub_details
)))
143 for sub_details
, entries
in sub_detail_groups
:
145 for entry
in entries
:
146 yield f
'{indent}- {self.format_single_change(entry)}'
149 entries
= list(entries
)
150 prefix
= f
'{indent}- {", ".join(entries[0].sub_details)}'
151 if len(entries
) == 1:
152 yield f
'{prefix}: {self.format_single_change(entries[0])}'
156 for entry
in entries
:
157 yield f
'{indent}\t- {self.format_single_change(entry)}'
159 if details
== 'cleanup' and cleanup_misc_items
:
160 yield from self
._format
_cleanup
_misc
_sub
_group
(cleanup_misc_items
)
162 def _filter_cleanup_misc_items(self
, items
):
163 cleanup_misc_items
= defaultdict(list)
166 if self
.MISC_RE
.search(item
.message
):
167 cleanup_misc_items
[tuple(item
.commit
.authors
)].append(item
)
169 non_misc_items
.append(item
)
171 return non_misc_items
, cleanup_misc_items
173 def _format_cleanup_misc_sub_group(self
, group
):
174 prefix
= '\t- Miscellaneous'
176 yield f
'{prefix}: {next(self._format_cleanup_misc_items(group))}'
180 for message
in self
._format
_cleanup
_misc
_items
(group
):
181 yield f
'\t\t- {message}'
183 def _format_cleanup_misc_items(self
, group
):
184 for authors
, infos
in group
.items():
186 self
._format
_message
_link
(None, info
.commit
.hash)
187 for info
in sorted(infos
, key
=lambda item
: item
.commit
.hash or ''))
188 yield f
'{message} by {self._format_authors(authors)}'
190 def format_single_change(self
, info
):
191 message
= self
._format
_message
_link
(info
.message
, info
.commit
.hash)
193 message
= f
'{message} ({self._format_issues(info.issues)})'
195 if info
.commit
.authors
:
196 message
= f
'{message} by {self._format_authors(info.commit.authors)}'
199 fix_message
= ', '.join(f
'{self._format_message_link(None, fix.hash)}' for fix
in info
.fixes
)
201 authors
= sorted({author for fix in info.fixes for author in fix.authors}
, key
=str.casefold
)
202 if authors
!= info
.commit
.authors
:
203 fix_message
= f
'{fix_message} by {self._format_authors(authors)}'
205 message
= f
'{message} (With fixes in {fix_message})'
209 def _format_message_link(self
, message
, hash):
210 assert message
or hash, 'Improperly defined commit message or override'
211 message
= message
if message
else hash[:7]
212 return f
'[{message}]({self.repo_url}/commit/{hash})' if hash else message
214 def _format_issues(self
, issues
):
215 return ', '.join(f
'[#{issue}]({self.repo_url}/issues/{issue})' for issue
in issues
)
218 def _format_authors(authors
):
219 return ', '.join(f
'[{author}]({BASE_URL}/{author})' for author
in authors
)
223 return f
'{BASE_URL}/{self._repo}'
228 COMMIT_SEPARATOR
= '-----'
230 AUTHOR_INDICATOR_RE
= re
.compile(r
'Authored by:? ', re
.IGNORECASE
)
231 MESSAGE_RE
= re
.compile(r
'''
233 (?P<prefix>[^\]\/:,]+)
234 (?:/(?P<details>[^\]:,]+))?
235 (?:[:,](?P<sub_details>[^\]]+))?
237 (?:(?P<sub_details_alt>`?[^:`]+`?): )?
239 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
240 ''', re
.VERBOSE | re
.DOTALL
)
241 EXTRACTOR_INDICATOR_RE
= re
.compile(r
'(?:Fix|Add)\s+Extractors?', re
.IGNORECASE
)
242 FIXES_RE
= re
.compile(r
'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
243 UPSTREAM_MERGE_RE
= re
.compile(r
'Update to ytdl-commit-([\da-f]+)')
245 def __init__(self
, start
, end
, default_author
=None) -> None:
248 self
._commits
, self
._fixes
= self
._get
_commits
_and
_fixes
(default_author
)
249 self
._commits
_added
= []
252 def from_single(cls
, commitish
='HEAD', default_author
=None):
253 start_commitish
= cls
.get_prev_tag(commitish
)
254 end_commitish
= cls
.get_next_tag(commitish
)
255 if start_commitish
== end_commitish
:
256 start_commitish
= cls
.get_prev_tag(f
'{commitish}~')
257 logger
.info(f
'Determined range from {commitish!r}: {start_commitish}..{end_commitish}')
258 return cls(start_commitish
, end_commitish
, default_author
)
261 def get_prev_tag(cls
, commitish
):
262 command
= [cls
.COMMAND
, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish
]
263 return subprocess
.check_output(command
, text
=True).strip()
266 def get_next_tag(cls
, commitish
):
267 result
= subprocess
.run(
268 [cls
.COMMAND
, 'describe', '--contains', '--abbrev=0', commitish
],
269 stdout
=subprocess
.PIPE
, stderr
=subprocess
.DEVNULL
, text
=True)
270 if result
.returncode
:
273 return result
.stdout
.partition('~')[0].strip()
276 return iter(itertools
.chain(self
._commits
.values(), self
._commits
_added
))
279 return len(self
._commits
) + len(self
._commits
_added
)
281 def __contains__(self
, commit
):
282 if isinstance(commit
, Commit
):
287 return commit
in self
._commits
289 def _is_ancestor(self
, commitish
):
290 return bool(subprocess
.call(
291 [self
.COMMAND
, 'merge-base', '--is-ancestor', commitish
, self
._start
]))
293 def _get_commits_and_fixes(self
, default_author
):
294 result
= subprocess
.check_output([
295 self
.COMMAND
, 'log', f
'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
296 f
'{self._start}..{self._end}'], text
=True)
299 fixes
= defaultdict(list)
300 lines
= iter(result
.splitlines(False))
304 skip
= short
.startswith('Release ') or short
== '[version] update'
306 authors
= [default_author
] if default_author
else []
307 for line
in iter(lambda: next(lines
), self
.COMMIT_SEPARATOR
):
308 match
= self
.AUTHOR_INDICATOR_RE
.match(line
)
310 authors
= sorted(map(str.strip
, line
[match
.end():].split(',')), key
=str.casefold
)
312 commit
= Commit(commit_hash
, short
, authors
)
314 logger
.debug(f
'Skipped commit: {commit}')
317 fix_match
= self
.FIXES_RE
.search(commit
.short
)
319 commitish
= fix_match
.group(1)
320 fixes
[commitish
].append(commit
)
322 commits
[commit
.hash] = commit
324 for commitish
, fix_commits
in fixes
.items():
325 if commitish
in commits
:
326 hashes
= ', '.join(commit
.hash[:7] for commit
in fix_commits
)
327 logger
.info(f
'Found fix(es) for {commitish[:7]}: {hashes}')
328 for fix_commit
in fix_commits
:
329 del commits
[fix_commit
.hash]
331 logger
.debug(f
'Commit with fixes not in changes: {commitish[:7]}')
333 return commits
, fixes
335 def apply_overrides(self
, overrides
):
336 for override
in overrides
:
337 when
= override
.get('when')
338 if when
and when
not in self
and when
!= self
._start
:
339 logger
.debug(f
'Ignored {when!r}, not in commits {self._start!r}')
342 override_hash
= override
.get('hash')
343 if override
['action'] == 'add':
344 commit
= Commit(override
.get('hash'), override
['short'], override
.get('authors') or [])
345 logger
.info(f
'ADD {commit}')
346 self
._commits
_added
.append(commit
)
348 elif override
['action'] == 'remove':
349 if override_hash
in self
._commits
:
350 logger
.info(f
'REMOVE {self._commits[override_hash]}')
351 del self
._commits
[override_hash
]
353 elif override
['action'] == 'change':
354 if override_hash
not in self
._commits
:
356 commit
= Commit(override_hash
, override
['short'], override
['authors'])
357 logger
.info(f
'CHANGE {self._commits[commit.hash]} -> {commit}')
358 self
._commits
[commit
.hash] = commit
360 self
._commits
= {key: value for key, value in reversed(self._commits.items())}
363 groups
= defaultdict(list)
365 upstream_re
= self
.UPSTREAM_MERGE_RE
.match(commit
.short
)
367 commit
.short
= f
'[upstream] Merge up to youtube-dl {upstream_re.group(1)}'
369 match
= self
.MESSAGE_RE
.fullmatch(commit
.short
)
371 logger
.error(f
'Error parsing short commit message: {commit.short!r}')
374 prefix
, details
, sub_details
, sub_details_alt
, message
, issues
= match
.groups()
377 if prefix
== 'priority':
378 prefix
, _
, details
= (details
or '').partition('/')
379 logger
.debug(f
'Priority: {message!r}')
380 group
= CommitGroup
.PRIORITY
382 if not details
and prefix
:
383 if prefix
not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'):
384 logger
.debug(f
'Replaced details with {prefix!r}')
385 details
= prefix
or None
387 if details
== 'common':
391 details
= details
.strip()
394 group
= CommitGroup
.CORE
396 sub_details
= f
'{sub_details or ""},{sub_details_alt or ""}'.replace(':', ',')
397 sub_details
= tuple(filter(None, map(str.strip
, sub_details
.split(','))))
399 issues
= [issue
.strip()[1:] for issue
in issues
.split(',')] if issues
else []
402 group
= CommitGroup
.get(prefix
.lower())
404 if self
.EXTRACTOR_INDICATOR_RE
.search(commit
.short
):
405 group
= CommitGroup
.EXTRACTOR
407 group
= CommitGroup
.POSTPROCESSOR
408 logger
.warning(f
'Failed to map {commit.short!r}, selected {group.name}')
410 commit_info
= CommitInfo(
411 details
, sub_details
, message
.strip(),
412 issues
, commit
, self
._fixes
[commit
.hash])
413 logger
.debug(f
'Resolved {commit.short!r} to {commit_info!r}')
414 groups
[group
].append(commit_info
)
419 def get_new_contributors(contributors_path
, commits
):
421 if contributors_path
.exists():
422 with contributors_path
.open() as file:
423 for line
in filter(None, map(str.strip
, file)):
424 author
, _
, _
= line
.partition(' (')
425 authors
= author
.split('/')
426 contributors
.update(map(str.casefold
, authors
))
428 new_contributors
= set()
429 for commit
in commits
:
430 for author
in commit
.authors
:
431 author_folded
= author
.casefold()
432 if author_folded
not in contributors
:
433 contributors
.add(author_folded
)
434 new_contributors
.add(author
)
436 return sorted(new_contributors
, key
=str.casefold
)
439 if __name__
== '__main__':
442 parser
= argparse
.ArgumentParser(
443 description
='Create a changelog markdown from a git commit range')
445 'commitish', default
='HEAD', nargs
='?',
446 help='The commitish to create the range from (default: %(default)s)')
448 '-v', '--verbosity', action
='count', default
=0,
449 help='increase verbosity (can be used twice)')
451 '-c', '--contributors', action
='store_true',
452 help='update CONTRIBUTORS file (default: %(default)s)')
454 '--contributors-path', type=Path
, default
=LOCATION_PATH
.parent
/ 'CONTRIBUTORS',
455 help='path to the CONTRIBUTORS file')
457 '--no-override', action
='store_true',
458 help='skip override json in commit generation (default: %(default)s)')
460 '--override-path', type=Path
, default
=LOCATION_PATH
/ 'changelog_override.json',
461 help='path to the changelog_override.json file')
463 '--default-author', default
='pukkandan',
464 help='the author to use without a author indicator (default: %(default)s)')
466 '--repo', default
='yt-dlp/yt-dlp',
467 help='the github repository to use for the operations (default: %(default)s)')
468 args
= parser
.parse_args()
471 datefmt
='%Y-%m-%d %H-%M-%S', format
='{asctime} | {levelname:<8} | {message}',
472 level
=logging
.WARNING
- 10 * args
.verbosity
, style
='{', stream
=sys
.stderr
)
474 commits
= CommitRange
.from_single(args
.commitish
, args
.default_author
)
476 if not args
.no_override
:
477 if args
.override_path
.exists():
478 with args
.override_path
.open() as file:
479 overrides
= json
.load(file)
480 commits
.apply_overrides(overrides
)
482 logger
.warning(f
'File {args.override_path.as_posix()} does not exist')
484 logger
.info(f
'Loaded {len(commits)} commits')
486 new_contributors
= get_new_contributors(args
.contributors_path
, commits
)
488 if args
.contributors
:
489 with args
.contributors_path
.open('a') as file:
490 file.writelines(f
'{contributor}\n' for contributor
in new_contributors
)
491 logger
.info(f
'New contributors: {", ".join(new_contributors)}')
493 print(Changelog(commits
.groups(), args
.repo
))