ExtractorError,
int_or_none,
PagedList,
- RegexNotFoundError,
unescapeHTML,
unified_strdate,
orderedSet,
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
- '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
+ '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
- '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
+ '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
# Dash mp4 audio
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
u"format": "141",
},
},
+ # DASH manifest with encrypted signature
+ {
+ u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+ u'info_dict': {
+ u'id': u'IB3lcPjvWLA',
+ u'ext': u'm4a',
+ u'title': u'Afrojack - The Spark ft. Spree Wilson',
+ u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
+ u'uploader': u'AfrojackVEVO',
+ u'uploader_id': u'AfrojackVEVO',
+ u'upload_date': u'20131011',
+ },
+ u"params": {
+ u'youtube_include_dash_manifest': True,
+ u'format': '141',
+ },
+ },
]
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find vevo ID')
- info = json.loads(mobj.group(1))
- args = info['args']
+ ytplayer_config = json.loads(mobj.group(1))
+ args = ytplayer_config['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest
- dash_manifest_url_lst = video_info.get('dashmpd')
- if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
- self._downloader.params.get('youtube_include_dash_manifest', False)):
+ if (self._downloader.params.get('youtube_include_dash_manifest', False)):
try:
+ # The DASH manifest used needs to be the one from the original video_webpage.
+ # The one found in get_video_info seems to be using different signatures.
+ # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
+ # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
+ # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
+ if age_gate:
+ dash_manifest_url = video_info.get('dashmpd')[0]
+ else:
+ dash_manifest_url = ytplayer_config['args']['dashmpd']
+ def decrypt_sig(mobj):
+ s = mobj.group(1)
+ dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+ return '/signature/%s' % dec_s
+ dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
- dash_manifest_url_lst[0], video_id,
+ dash_manifest_url, video_id,
note=u'Downloading DASH manifest',
errnote=u'Could not download DASH manifest')
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
- title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
- get_element_by_attribute('class', 'title ', webpage))
+ search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
+ title_span = (search_title('playlist-title') or
+ search_title('title long-title') or search_title('title'))
title = clean_html(title_span)
- video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id)
- ids = orderedSet(re.findall(video_re, webpage))
+ video_re = r'''(?x)data-video-username="(.*?)".*?
+ href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
+ matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
+ # Some of the videos may have been deleted, their username field is empty
+ ids = [video_id for (username, video_id) in matches if username]
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
class YoutubeUserIE(InfoExtractor):
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
for video_id in video_ids]
return self.playlist_result(videos, query)
+
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = u'YouTube.com searches, newest videos first'
+
+class YoutubeSearchURLIE(InfoExtractor):
+ IE_DESC = u'YouTube.com search URLs'
+ IE_NAME = u'youtube:search_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+
+ webpage = self._download_webpage(url, query)
+ result_code = self._search_regex(
+ r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
+
+ part_codes = re.findall(
+ r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
+ entries = []
+ for part_code in part_codes:
+ part_title = self._html_search_regex(
+ r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
+ part_url_snippet = self._html_search_regex(
+ r'(?s)href="([^"]+)"', part_code, 'item URL')
+ part_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com/', part_url_snippet)
+ entries.append({
+ '_type': 'url',
+ 'url': part_url,
+ 'title': part_title,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': query,
+ }
+
+
class YoutubeShowIE(InfoExtractor):
IE_DESC = u'YouTube.com (multi-season) shows'
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'