X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/3014b0ae835e3f42b5f3628464ed7e4b2557ef6c..09728d5fbc93c769b3f8971c06e9ed0bfb168b37:/youtube_dl/extractor/youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 44c1191bd..6c9f77d95 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -125,6 +125,12 @@ def _login(self):
if login_results is False:
return False
+ error_msg = self._html_search_regex(
+ r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
+ login_results, 'error message', default=None)
+ if error_msg:
+ raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
+
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
@@ -338,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
@@ -1320,9 +1328,9 @@ def add_dash_mpd(video_info):
if video_description:
video_description = re.sub(r'''(?x)
]*>
[^<]+\.{3}\s*
@@ -1818,20 +1826,32 @@ def _real_initialize(self):
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
- url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
- webpage = self._download_webpage(
- url, playlist_id, 'Downloading Youtube mix')
+ ids = []
+ last_id = playlist_id[-11:]
+ for n in itertools.count(1):
+ url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ webpage = self._download_webpage(
+ url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+ new_ids = orderedSet(re.findall(
+ r'''(?xs)data-video-username=".*?".*?
+ href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
+ webpage))
+ # Fetch new pages until all the videos are repeated, it seems that
+ # there are always 51 unique videos.
+ new_ids = [_id for _id in new_ids if _id not in ids]
+ if not new_ids:
+ break
+ ids.extend(new_ids)
+ last_id = ids[-1]
+
+ url_results = self._ids_to_results(ids)
+
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = (
search_title('playlist-title') or
search_title('title long-title') or
search_title('title'))
title = clean_html(title_span)
- ids = orderedSet(re.findall(
- r'''(?xs)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
- webpage))
- url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
@@ -2121,10 +2141,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-class YoutubeSearchURLIE(InfoExtractor):
+class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P[^&]+)(?:[&]|$)'
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P[^"]+))?'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
@@ -2139,32 +2160,8 @@ class YoutubeSearchURLIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
-
webpage = self._download_webpage(url, query)
- result_code = self._search_regex(
- r'(?s)]+class="item-section"(.*?)
', webpage, 'result HTML')
-
- part_codes = re.findall(
- r'(?s)]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)
', result_code)
- entries = []
- for part_code in part_codes:
- part_title = self._html_search_regex(
- [r'(?s)title="([^"]+)"', r'>([^<]+)'], part_code, 'item title', fatal=False)
- part_url_snippet = self._html_search_regex(
- r'(?s)href="([^"]+)"', part_code, 'item URL')
- part_url = compat_urlparse.urljoin(
- 'https://www.youtube.com/', part_url_snippet)
- entries.append({
- '_type': 'url',
- 'url': part_url,
- 'title': part_title,
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'title': query,
- }
+ return self.playlist_result(self._process_page(webpage), playlist_title=query)
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):