[yt-dlp.git] / yt_dlp / extractor / steam.py

import re

from .common import InfoExtractor
from ..utils import (
    extract_attributes,
    ExtractorError,
    get_element_by_class,
)


class SteamIE(InfoExtractor):
    _VALID_URL = r"""(?x)
        https?://(?:store\.steampowered|steamcommunity)\.com/
            (?:agecheck/)?
            (?P<urltype>video|app)/ #If the page is only for videos or for a game
            (?P<gameID>\d+)/?
            (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
        |
        https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
    """
    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
    _TESTS = [{
        'url': 'http://store.steampowered.com/video/105600/',
        'playlist': [
            {
                'md5': '695242613303ffa2a4c44c9374ddc067',
                'info_dict': {
                    'id': '256785003',
                    'ext': 'mp4',
                    'title': 'Terraria video 256785003',
                    'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
                    'n_entries': 2,
                }
            },
            {
                'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
                'info_dict': {
                    'id': '2040428',
                    'ext': 'mp4',
                    'title': 'Terraria video 2040428',
                    'playlist_index': 2,
                    'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
                    'n_entries': 2,
                }
            }
        ],
        'info_dict': {
            'id': '105600',
            'title': 'Terraria',
        },
        'params': {
            'playlistend': 2,
        }
    }, {
        'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
        'info_dict': {
            'id': '256757115',
            'title': 'Grand Theft Auto V video 256757115',
            'ext': 'mp4',
            'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
            'n_entries': 20,
        },
    }]

    def _real_extract(self, url):
        m = self._match_valid_url(url)
        fileID = m.group('fileID')
        if fileID:
            video_url = url
            playlist_id = fileID
        else:
            gameID = m.group('gameID')
            playlist_id = gameID
            video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id

        self._set_cookie('steampowered.com', 'wants_mature_content', '1')
        self._set_cookie('steampowered.com', 'birthtime', '944006401')
        self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')

        webpage = self._download_webpage(video_url, playlist_id)

        if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
            video_url = self._AGECHECK_TEMPLATE % playlist_id
            self.report_age_confirmation()
            webpage = self._download_webpage(video_url, playlist_id)

        videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
        entries = []
        playlist_title = get_element_by_class('apphub_AppName', webpage)
        for movie, movie_id in videos:
            if not movie:
                continue
            movie = extract_attributes(movie)
            if not movie_id:
                continue
            entry = {
                'id': movie_id,
                'title': f'{playlist_title} video {movie_id}',
            }
            formats = []
            if movie:
                entry['thumbnail'] = movie.get('data-poster')
                for quality in ('', '-hd'):
                    for ext in ('webm', 'mp4'):
                        video_url = movie.get('data-%s%s-source' % (ext, quality))
                        if video_url:
                            formats.append({
                                'format_id': ext + quality,
                                'url': video_url,
                            })
            self._sort_formats(formats)
            entry['formats'] = formats
            entries.append(entry)
        embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
        for evideos in embedded_videos:
            evideos = extract_attributes(evideos).get('src')
            video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
            if video_id:
                entries.append({
                    '_type': 'url_transparent',
                    'id': video_id,
                    'url': video_id,
                    'ie_key': 'Youtube',
                })
        if not entries:
            raise ExtractorError('Could not find any videos')

        return self.playlist_result(entries, playlist_id, playlist_title)


class SteamCommunityBroadcastIE(InfoExtractor):
    _VALID_URL = r'https?://steamcommunity\.(?:com)/broadcast/watch/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://steamcommunity.com/broadcast/watch/76561199073851486',
        'info_dict': {
            'id': '76561199073851486',
            'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
            'ext': 'mp4',
            'uploader_id': 1113585758,
            'uploader': 'pepperm!nt',
            'live_status': 'is_live',
        },
        'skip': 'Stream has ended',
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        json_data = self._download_json(
            'https://steamcommunity.com/broadcast/getbroadcastmpd/',
            video_id, query={'steamid': f'{video_id}'})

        formats, subs = self._extract_m3u8_formats_and_subtitles(json_data['hls_url'], video_id)

        ''' # We cannot download live dash atm
        mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(json_data['url'], video_id)
        formats.extend(mpd_formats)
        self._merge_subtitles(mpd_subs, target=subs)
        '''

        uploader_json = self._download_json(
            'https://steamcommunity.com/actions/ajaxresolveusers',
            video_id, query={'steamids': video_id})[0]

        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': self._generic_title('', webpage),
            'formats': formats,
            'live_status': 'is_live',
            'view_count': json_data.get('num_view'),
            'uploader': uploader_json.get('persona_name'),
            'uploader_id': uploader_json.get('accountid'),
            'subtitles': subs,
        }
Commit	Line	Data
462dc88b PH	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
7fee3377	5	extract_attributes,
462dc88b	6	ExtractorError,
7fee3377	7	get_element_by_class,
462dc88b PH	8	)
	9
	10
	11	class SteamIE(InfoExtractor):
1f27d2c0	12	_VALID_URL = r"""(?x)
070f6a85	13	https?://(?:store\.steampowered\|steamcommunity)\.com/
070f6a85	14	(?:agecheck/)?
1f27d2c0 PH	15	(?P<urltype>video\|app)/ #If the page is only for videos or for a game
	16	(?P<gameID>\d+)/?
	17	(?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
	18	\|
	19	https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
	20	"""
462dc88b PH	21	_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
462dc88b PH	22	_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
1f27d2c0	23	_TESTS = [{
611c1dd9 S	24	'url': 'http://store.steampowered.com/video/105600/',
611c1dd9 S	25	'playlist': [
08217714	26	{
070f6a85	27	'md5': '695242613303ffa2a4c44c9374ddc067',
611c1dd9	28	'info_dict': {
070f6a85	29	'id': '256785003',
7fee3377	30	'ext': 'mp4',
070f6a85	31	'title': 'Terraria video 256785003',
	32	'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
	33	'n_entries': 2,
08217714 PH	34	}
	35	},
	36	{
070f6a85	37	'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
611c1dd9	38	'info_dict': {
070f6a85	39	'id': '2040428',
7fee3377	40	'ext': 'mp4',
070f6a85	41	'title': 'Terraria video 2040428',
3fa6b6e2	42	'playlist_index': 2,
070f6a85	43	'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
070f6a85	44	'n_entries': 2,
08217714 PH	45	}
08217714 PH	46	}
7f9c31df	47	],
7fee3377 RA	48	'info_dict': {
	49	'id': '105600',
	50	'title': 'Terraria',
	51	},
7f9c31df PH	52	'params': {
	53	'playlistend': 2,
	54	}
1f27d2c0	55	}, {
070f6a85	56	'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
1f27d2c0	57	'info_dict': {
070f6a85	58	'id': '256757115',
070f6a85	59	'title': 'Grand Theft Auto V video 256757115',
1f27d2c0	60	'ext': 'mp4',
070f6a85	61	'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
	62	'n_entries': 20,
	63	},
1f27d2c0	64	}]
08217714	65
462dc88b	66	def _real_extract(self, url):
5ad28e7f	67	m = self._match_valid_url(url)
1f27d2c0 PH	68	fileID = m.group('fileID')
1f27d2c0 PH	69	if fileID:
070f6a85	70	video_url = url
1f27d2c0 PH	71	playlist_id = fileID
	72	else:
	73	gameID = m.group('gameID')
	74	playlist_id = gameID
070f6a85	75	video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
717ea4e1	76
070f6a85	77	self._set_cookie('steampowered.com', 'wants_mature_content', '1')
	78	self._set_cookie('steampowered.com', 'birthtime', '944006401')
	79	self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
717ea4e1	80
070f6a85	81	webpage = self._download_webpage(video_url, playlist_id)
462dc88b	82
070f6a85	83	if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
070f6a85	84	video_url = self._AGECHECK_TEMPLATE % playlist_id
462dc88b	85	self.report_age_confirmation()
070f6a85	86	webpage = self._download_webpage(video_url, playlist_id)
7fee3377	87
070f6a85	88	videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
7fee3377	89	entries = []
070f6a85	90	playlist_title = get_element_by_class('apphub_AppName', webpage)
	91	for movie, movie_id in videos:
	92	if not movie:
	93	continue
	94	movie = extract_attributes(movie)
	95	if not movie_id:
	96	continue
	97	entry = {
	98	'id': movie_id,
	99	'title': f'{playlist_title} video {movie_id}',
	100	}
	101	formats = []
	102	if movie:
	103	entry['thumbnail'] = movie.get('data-poster')
	104	for quality in ('', '-hd'):
	105	for ext in ('webm', 'mp4'):
	106	video_url = movie.get('data-%s%s-source' % (ext, quality))
	107	if video_url:
	108	formats.append({
	109	'format_id': ext + quality,
	110	'url': video_url,
	111	})
	112	self._sort_formats(formats)
	113	entry['formats'] = formats
	114	entries.append(entry)
	115	embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
	116	for evideos in embedded_videos:
	117	evideos = extract_attributes(evideos).get('src')
	118	video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
	119	if video_id:
7fee3377	120	entries.append({
070f6a85	121	'_type': 'url_transparent',
	122	'id': video_id,
	123	'url': video_id,
7fee3377 RA	124	'ie_key': 'Youtube',
7fee3377 RA	125	})
7fee3377	126	if not entries:
1f27d2c0	127	raise ExtractorError('Could not find any videos')
462dc88b	128
7fee3377	129	return self.playlist_result(entries, playlist_id, playlist_title)
5fb450a6 H	130
	131
	132	class SteamCommunityBroadcastIE(InfoExtractor):
	133	_VALID_URL = r'https?://steamcommunity\.(?:com)/broadcast/watch/(?P<id>\d+)'
	134	_TESTS = [{
	135	'url': 'https://steamcommunity.com/broadcast/watch/76561199073851486',
	136	'info_dict': {
	137	'id': '76561199073851486',
	138	'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
	139	'ext': 'mp4',
	140	'uploader_id': 1113585758,
	141	'uploader': 'pepperm!nt',
	142	'live_status': 'is_live',
	143	},
	144	'skip': 'Stream has ended',
	145	}]
	146
	147	def _real_extract(self, url):
	148	video_id = self._match_id(url)
	149	webpage = self._download_webpage(url, video_id)
	150	json_data = self._download_json(
	151	'https://steamcommunity.com/broadcast/getbroadcastmpd/',
	152	video_id, query={'steamid': f'{video_id}'})
	153
	154	formats, subs = self._extract_m3u8_formats_and_subtitles(json_data['hls_url'], video_id)
	155
	156	''' # We cannot download live dash atm
	157	mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(json_data['url'], video_id)
	158	formats.extend(mpd_formats)
	159	self._merge_subtitles(mpd_subs, target=subs)
	160	'''
	161
	162	uploader_json = self._download_json(
	163	'https://steamcommunity.com/actions/ajaxresolveusers',
	164	video_id, query={'steamids': video_id})[0]
	165
	166	self._sort_formats(formats)
	167	return {
	168	'id': video_id,
62b8dac4	169	'title': self._generic_title('', webpage),
5fb450a6 H	170	'formats': formats,
	171	'live_status': 'is_live',
	172	'view_count': json_data.get('num_view'),
	173	'uploader': uploader_json.get('persona_name'),
	174	'uploader_id': uploader_json.get('accountid'),
	175	'subtitles': subs,
	176	}