import base64
import calendar
+import collections
import copy
import datetime
import enum
<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
\s[^>]*\bclass="[^"]*\blazy-load-youtube''',
]
+ _RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
'note': '6 channel audio',
'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
'only_matching': True,
+ }, {
+ 'note': 'Multiple HLS formats with same itag',
+ 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
+ 'info_dict': {
+ 'id': 'kX3nB4PpJko',
+ 'ext': 'mp4',
+ 'categories': ['Entertainment'],
+ 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
+ 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
+ 'live_status': 'not_live',
+ 'duration': 937,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
+ 'title': 'Last To Take Hand Off Jet, Keeps It!',
+ 'channel': 'MrBeast',
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'upload_date': '20221112',
+ 'uploader': 'MrBeast',
+ 'uploader_id': 'MrBeast6000',
+ 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
+ 'like_count': int,
+ 'tags': [],
+ },
+ 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}
]
return live_status
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
- itags, stream_ids = {}, []
+ itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None}
q = qualities([
# Normally tiny is the smallest video-only formats. But
video_id=video_id, only_once=True)
throttled = True
- if itag:
- itags[itag] = 'https'
- stream_ids.append(stream_id)
-
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
}
if dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
+
+ if itag:
+ itags[itag].add(('https', dct.get('language')))
+ stream_ids.append(stream_id)
yield dct
needs_live_processing = self._needs_live_processing(live_status, duration)
skip_manifests.add('dash')
def process_manifest_format(f, proto, itag):
- if itag in itags:
- if itags[itag] == proto or f'{itag}-{proto}' in itags:
- return False
- itag = f'{itag}-{proto}'
- if itag:
+ key = (proto, f.get('language'))
+ if key in itags[itag]:
+ return False
+ itags[itag].add(key)
+
+ if any(p != proto for p, _ in itags[itag]):
+ f['format_id'] = f'{itag}-{proto}'
+ elif itag:
f['format_id'] = itag
- itags[itag] = proto
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):