info[d_k] = parse_duration(query[k][0])
# Youtube Music Auto-generated description
- if video_description:
+ if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
+ # XXX: Causes catastrophic backtracking if description has "·"
+ # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
+ # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
+ # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
mobj = re.search(
r'''(?xs)
- (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
- (?P<album>[^\n]+)
+ (?=(?P<track>[^\n·]+))(?P=track)·
+ (?=(?P<artist>[^\n]+))(?P=artist)\n+
+ (?=(?P<album>[^\n]+))(?P=album)\n
(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
- (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
- .+\nAuto-generated\ by\ YouTube\.\s*$
+ (.+?\nArtist\s*:\s*
+ (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
+ )?.+\nAuto-generated\ by\ YouTube\.\s*$
''', video_description)
if mobj:
release_year = mobj.group('release_year')