X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/c76eb41bb9e7e0a106ce44f4afcf74b0c00a3fb2..30a074c2b666503eb1b09f06d7c7d8fcb1efd058:/youtube_dlc/extractor/youtube.py diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 59e5bc2ab..20657bb19 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -332,6 +332,36 @@ def _extract_ytcfg(self, video_id, webpage): r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', default='{}'), video_id, fatal=False) + def _extract_video(self, renderer): + video_id = renderer.get('videoId') + title = try_get( + renderer, + (lambda x: x['title']['runs'][0]['text'], + lambda x: x['title']['simpleText']), compat_str) + description = try_get( + renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], + compat_str) + duration = parse_duration(try_get( + renderer, lambda x: x['lengthText']['simpleText'], compat_str)) + view_count_text = try_get( + renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or '' + view_count = str_to_int(self._search_regex( + r'^([\d,]+)', re.sub(r'\s', '', view_count_text), + 'view count', default=None)) + uploader = try_get( + renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str) + return { + '_type': 'url_transparent', + 'ie_key': YoutubeIE.ie_key(), + 'id': video_id, + 'url': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'uploader': uploader, + } + class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com' @@ -1686,11 +1716,12 @@ def extract_embedded_config(embed_webpage, video_id): if embedded_config: return embedded_config + video_info = {} player_response = {} + ytplayer_config = None + embed_webpage = None # Get video info - video_info = {} - embed_webpage = None if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+' or re.search(r'player-age-gate-content">', video_webpage) is not None): cookie_keys = self._get_cookies('https://www.youtube.com').keys() @@ -1816,6 +1847,9 @@ def extract_unavailable_message(): if not isinstance(video_info, dict): video_info = {} + playable_in_embed = try_get( + player_response, lambda x: x['playabilityStatus']['playableInEmbed']) + video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} @@ -2537,6 +2571,7 @@ def decrypt_sig(mobj): 'release_date': release_date, 'release_year': release_year, 'subscriber_count': subscriber_count, + 'playable_in_embed': playable_in_embed, } @@ -2866,36 +2901,6 @@ def _extract_grid_item_renderer(item): if renderer: return renderer - def _extract_video(self, renderer): - video_id = renderer.get('videoId') - title = try_get( - renderer, - (lambda x: x['title']['runs'][0]['text'], - lambda x: x['title']['simpleText']), compat_str) - description = try_get( - renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], - compat_str) - duration = parse_duration(try_get( - renderer, lambda x: x['lengthText']['simpleText'], compat_str)) - view_count_text = try_get( - renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or '' - view_count = str_to_int(self._search_regex( - r'^([\d,]+)', re.sub(r'\s', '', view_count_text), - 'view count', default=None)) - uploader = try_get( - renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str) - return { - '_type': 'url_transparent', - 'ie_key': YoutubeIE.ie_key(), - 'id': video_id, - 'url': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'uploader': uploader, - } - def _grid_entries(self, grid_renderer): for item in grid_renderer['items']: if not isinstance(item, dict): @@ -3578,65 +3583,38 @@ def _entries(self, query, n): if not slr_contents: break - isr_contents = [] - continuation_token = None # Youtube sometimes adds promoted content to searches, # changing the index location of videos and token. # So we search through all entries till we find them. - for index, isr in enumerate(slr_contents): + continuation_token = None + for slr_content in slr_contents: + isr_contents = try_get( + slr_content, + lambda x: x['itemSectionRenderer']['contents'], + list) if not isr_contents: - isr_contents = try_get( - slr_contents, - (lambda x: x[index]['itemSectionRenderer']['contents']), - list) - for content in isr_contents: - if content.get('videoRenderer') is not None: - break - else: - isr_contents = [] + continue + for content in isr_contents: + if not isinstance(content, dict): + continue + video = content.get('videoRenderer') + if not isinstance(video, dict): + continue + video_id = video.get('videoId') + if not video_id: + continue + + yield self._extract_video(video) + total += 1 + if total == n: + return if continuation_token is None: continuation_token = try_get( - slr_contents, - lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][ - 'token'], + slr_content, + lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], compat_str) - if continuation_token is not None and isr_contents: - break - if not isr_contents: - break - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) - description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) - duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) - view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' - view_count = int_or_none(self._search_regex( - r'^(\d+)', re.sub(r'\s', '', view_count_text), - 'view count', default=None)) - uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) - total += 1 - yield { - '_type': 'url_transparent', - 'ie_key': YoutubeIE.ie_key(), - 'id': video_id, - 'url': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'uploader': uploader, - } - if total == n: - return if not continuation_token: break data['continuation'] = continuation_token