]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/youtube.py
Release 2021.07.21
[yt-dlp.git] / yt_dlp / extractor / youtube.py
index aa0421a72e011bbda95c7b70f9c80444d57e1fdd..7a1d39ac8524feb2c4ccec18c5a5fb0ccc378e51 100644 (file)
     int_or_none,
     intlist_to_bytes,
     mimetype2ext,
+    network_exceptions,
     orderedSet,
     parse_codecs,
     parse_count,
     parse_duration,
+    parse_iso8601,
     qualities,
     remove_start,
     smuggle_url,
@@ -98,7 +100,9 @@ def warn(message):
             self.report_warning(message)
 
         # username+password login is broken
-        if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
+        if (self._LOGIN_REQUIRED
+                and self.get_param('cookiefile') is None
+                and self.get_param('cookiesfrombrowser') is None):
             self.raise_login_required(
                 'Login details are needed to download this content', method='cookies')
         username, password = self._get_login_info()
@@ -517,13 +521,15 @@ def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
         yt_cookies = self._get_cookies('https://www.youtube.com')
         sapisid_cookie = dict_get(
             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
-        if sapisid_cookie is None:
+        if sapisid_cookie is None or not sapisid_cookie.value:
             return
         time_now = round(time.time())
         # SAPISID cookie is required if not already present
         if not yt_cookies.get('SAPISID'):
+            self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
             self._set_cookie(
                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
+        self.write_debug('Extracted SAPISID cookie', only_once=True)
         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
         sapisidhash = hashlib.sha1(
             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
@@ -755,12 +761,15 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                     api_hostname=api_hostname, default_client=default_client,
                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
+                if isinstance(e.cause, network_exceptions):
                     # Downloading page may result in intermittent 5xx HTTP error
                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
-                    last_error = 'HTTP Error %s' % e.cause.code
-                    if count < retries:
-                        continue
+                    # We also want to catch all other network exceptions since errors in later pages can be troublesome
+                    # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
+                    if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+                        last_error = error_to_compat_str(e.cause or e)
+                        if count < retries:
+                            continue
                 if fatal:
                     raise
                 else:
@@ -1929,10 +1938,11 @@ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=F
         return sts
 
     def _mark_watched(self, video_id, player_responses):
-        playback_url = url_or_none((traverse_obj(
-            player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
-            expected_type=str) or [None])[0])
+        playback_url = traverse_obj(
+            player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
+            expected_type=url_or_none, get_all=False)
         if not playback_url:
+            self.report_warning('Unable to mark watched')
             return
         parsed_playback_url = compat_urlparse.urlparse(playback_url)
         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
@@ -2606,8 +2616,7 @@ def _real_extract(self, url):
             self._get_requested_clients(url, smuggled_data),
             video_id, webpage, master_ytcfg, player_url, identity_token))
 
-        get_first = lambda obj, keys, **kwargs: (
-            traverse_obj(obj, (..., *variadic(keys)), **kwargs) or [None])[0]
+        get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
 
         playability_statuses = traverse_obj(
             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
@@ -2678,17 +2687,10 @@ def feed_entry(name):
             else:
                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 
-        category = get_first(microformats, 'category') or search_meta('genre')
-        channel_id = get_first(video_details, 'channelId') \
-            or get_first(microformats, 'externalChannelId') \
-            or search_meta('channelId')
-        duration = int_or_none(
-            get_first(video_details, 'lengthSeconds')
-            or get_first(microformats, 'lengthSeconds')) \
-            or parse_duration(search_meta('duration'))
+        live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
         is_live = get_first(video_details, 'isLive')
-        is_upcoming = get_first(video_details, 'isUpcoming')
-        owner_profile_url = get_first(microformats, 'ownerProfileUrl')
+        if is_live is None:
+            is_live = get_first(live_broadcast_details, 'isLiveNow')
 
         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
@@ -2767,6 +2769,7 @@ def feed_entry(name):
         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
+        # TODO: Test them also? - For some videos, even these don't exist
         guaranteed_thumbnail_names = [
             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
             'mqdefault', 'mq1', 'mq2', 'mq3',
@@ -2786,6 +2789,29 @@ def feed_entry(name):
             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
         self._remove_duplicate_formats(thumbnails)
 
+        category = get_first(microformats, 'category') or search_meta('genre')
+        channel_id = str_or_none(
+            get_first(video_details, 'channelId')
+            or get_first(microformats, 'externalChannelId')
+            or search_meta('channelId'))
+        duration = int_or_none(
+            get_first(video_details, 'lengthSeconds')
+            or get_first(microformats, 'lengthSeconds')
+            or parse_duration(search_meta('duration'))) or None
+        owner_profile_url = get_first(microformats, 'ownerProfileUrl')
+
+        live_content = get_first(video_details, 'isLiveContent')
+        is_upcoming = get_first(video_details, 'isUpcoming')
+        if is_live is None:
+            if is_upcoming or live_content is False:
+                is_live = False
+        if is_upcoming is None and (live_content or is_live):
+            is_upcoming = False
+        live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
+        live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
+        if not duration and live_endtime and live_starttime:
+            duration = live_endtime - live_starttime
+
         info = {
             'id': video_id,
             'title': self._live_title(video_title) if is_live else video_title,
@@ -2812,9 +2838,13 @@ def feed_entry(name):
             'webpage_url': webpage_url,
             'categories': [category] if category else None,
             'tags': keywords,
-            'is_live': is_live,
             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
-            'was_live': get_first(video_details, 'isLiveContent'),
+            'is_live': is_live,
+            'was_live': (False if is_live or is_upcoming or live_content is False
+                         else None if is_live is None or is_upcoming is None
+                         else live_content),
+            'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
+            'release_timestamp': live_starttime,
         }
 
         pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)