[yt-dlp.git] / yt_dlp / extractor / steam.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    extract_attributes,
    ExtractorError,
    get_element_by_class,
)


class SteamIE(InfoExtractor):
    _VALID_URL = r"""(?x)
        https?://(?:store\.steampowered|steamcommunity)\.com/
            (?:agecheck/)?
            (?P<urltype>video|app)/ #If the page is only for videos or for a game
            (?P<gameID>\d+)/?
            (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
        |
        https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
    """
    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
    _TESTS = [{
        'url': 'http://store.steampowered.com/video/105600/',
        'playlist': [
            {
                'md5': '695242613303ffa2a4c44c9374ddc067',
                'info_dict': {
                    'id': '256785003',
                    'ext': 'mp4',
                    'title': 'Terraria video 256785003',
                    'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
                    'n_entries': 2,
                }
            },
            {
                'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
                'info_dict': {
                    'id': '2040428',
                    'ext': 'mp4',
                    'title': 'Terraria video 2040428',
                    'playlist_index': 2,
                    'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
                    'n_entries': 2,
                }
            }
        ],
        'info_dict': {
            'id': '105600',
            'title': 'Terraria',
        },
        'params': {
            'playlistend': 2,
        }
    }, {
        'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
        'info_dict': {
            'id': '256757115',
            'title': 'Grand Theft Auto V video 256757115',
            'ext': 'mp4',
            'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
            'n_entries': 20,
        },
    }]

    def _real_extract(self, url):
        m = self._match_valid_url(url)
        fileID = m.group('fileID')
        if fileID:
            video_url = url
            playlist_id = fileID
        else:
            gameID = m.group('gameID')
            playlist_id = gameID
            video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id

        self._set_cookie('steampowered.com', 'wants_mature_content', '1')
        self._set_cookie('steampowered.com', 'birthtime', '944006401')
        self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')

        webpage = self._download_webpage(video_url, playlist_id)

        if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
            video_url = self._AGECHECK_TEMPLATE % playlist_id
            self.report_age_confirmation()
            webpage = self._download_webpage(video_url, playlist_id)

        videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
        entries = []
        playlist_title = get_element_by_class('apphub_AppName', webpage)
        for movie, movie_id in videos:
            if not movie:
                continue
            movie = extract_attributes(movie)
            if not movie_id:
                continue
            entry = {
                'id': movie_id,
                'title': f'{playlist_title} video {movie_id}',
            }
            formats = []
            if movie:
                entry['thumbnail'] = movie.get('data-poster')
                for quality in ('', '-hd'):
                    for ext in ('webm', 'mp4'):
                        video_url = movie.get('data-%s%s-source' % (ext, quality))
                        if video_url:
                            formats.append({
                                'format_id': ext + quality,
                                'url': video_url,
                            })
            self._sort_formats(formats)
            entry['formats'] = formats
            entries.append(entry)
        embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
        for evideos in embedded_videos:
            evideos = extract_attributes(evideos).get('src')
            video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
            if video_id:
                entries.append({
                    '_type': 'url_transparent',
                    'id': video_id,
                    'url': video_id,
                    'ie_key': 'Youtube',
                })
        if not entries:
            raise ExtractorError('Could not find any videos')

        return self.playlist_result(entries, playlist_id, playlist_title)
Commit	Line	Data
3fa6b6e2 PH	1	from __future__ import unicode_literals
3fa6b6e2 PH	2
462dc88b PH	3	import re
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
7fee3377	7	extract_attributes,
462dc88b	8	ExtractorError,
7fee3377	9	get_element_by_class,
462dc88b PH	10	)
	11
	12
	13	class SteamIE(InfoExtractor):
1f27d2c0	14	_VALID_URL = r"""(?x)
070f6a85	15	https?://(?:store\.steampowered\|steamcommunity)\.com/
070f6a85	16	(?:agecheck/)?
1f27d2c0 PH	17	(?P<urltype>video\|app)/ #If the page is only for videos or for a game
	18	(?P<gameID>\d+)/?
	19	(?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
	20	\|
	21	https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
	22	"""
462dc88b PH	23	_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
462dc88b PH	24	_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
1f27d2c0	25	_TESTS = [{
611c1dd9 S	26	'url': 'http://store.steampowered.com/video/105600/',
611c1dd9 S	27	'playlist': [
08217714	28	{
070f6a85	29	'md5': '695242613303ffa2a4c44c9374ddc067',
611c1dd9	30	'info_dict': {
070f6a85	31	'id': '256785003',
7fee3377	32	'ext': 'mp4',
070f6a85	33	'title': 'Terraria video 256785003',
	34	'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
	35	'n_entries': 2,
08217714 PH	36	}
	37	},
	38	{
070f6a85	39	'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
611c1dd9	40	'info_dict': {
070f6a85	41	'id': '2040428',
7fee3377	42	'ext': 'mp4',
070f6a85	43	'title': 'Terraria video 2040428',
3fa6b6e2	44	'playlist_index': 2,
070f6a85	45	'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
070f6a85	46	'n_entries': 2,
08217714 PH	47	}
08217714 PH	48	}
7f9c31df	49	],
7fee3377 RA	50	'info_dict': {
	51	'id': '105600',
	52	'title': 'Terraria',
	53	},
7f9c31df PH	54	'params': {
	55	'playlistend': 2,
	56	}
1f27d2c0	57	}, {
070f6a85	58	'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
1f27d2c0	59	'info_dict': {
070f6a85	60	'id': '256757115',
070f6a85	61	'title': 'Grand Theft Auto V video 256757115',
1f27d2c0	62	'ext': 'mp4',
070f6a85	63	'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
	64	'n_entries': 20,
	65	},
1f27d2c0	66	}]
08217714	67
462dc88b	68	def _real_extract(self, url):
5ad28e7f	69	m = self._match_valid_url(url)
1f27d2c0 PH	70	fileID = m.group('fileID')
1f27d2c0 PH	71	if fileID:
070f6a85	72	video_url = url
1f27d2c0 PH	73	playlist_id = fileID
	74	else:
	75	gameID = m.group('gameID')
	76	playlist_id = gameID
070f6a85	77	video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
717ea4e1	78
070f6a85	79	self._set_cookie('steampowered.com', 'wants_mature_content', '1')
	80	self._set_cookie('steampowered.com', 'birthtime', '944006401')
	81	self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
717ea4e1	82
070f6a85	83	webpage = self._download_webpage(video_url, playlist_id)
462dc88b	84
070f6a85	85	if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
070f6a85	86	video_url = self._AGECHECK_TEMPLATE % playlist_id
462dc88b	87	self.report_age_confirmation()
070f6a85	88	webpage = self._download_webpage(video_url, playlist_id)
7fee3377	89
070f6a85	90	videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
7fee3377	91	entries = []
070f6a85	92	playlist_title = get_element_by_class('apphub_AppName', webpage)
	93	for movie, movie_id in videos:
	94	if not movie:
	95	continue
	96	movie = extract_attributes(movie)
	97	if not movie_id:
	98	continue
	99	entry = {
	100	'id': movie_id,
	101	'title': f'{playlist_title} video {movie_id}',
	102	}
	103	formats = []
	104	if movie:
	105	entry['thumbnail'] = movie.get('data-poster')
	106	for quality in ('', '-hd'):
	107	for ext in ('webm', 'mp4'):
	108	video_url = movie.get('data-%s%s-source' % (ext, quality))
	109	if video_url:
	110	formats.append({
	111	'format_id': ext + quality,
	112	'url': video_url,
	113	})
	114	self._sort_formats(formats)
	115	entry['formats'] = formats
	116	entries.append(entry)
	117	embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
	118	for evideos in embedded_videos:
	119	evideos = extract_attributes(evideos).get('src')
	120	video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
	121	if video_id:
7fee3377	122	entries.append({
070f6a85	123	'_type': 'url_transparent',
	124	'id': video_id,
	125	'url': video_id,
7fee3377 RA	126	'ie_key': 'Youtube',
7fee3377 RA	127	})
7fee3377	128	if not entries:
1f27d2c0	129	raise ExtractorError('Could not find any videos')
462dc88b	130
7fee3377	131	return self.playlist_result(entries, playlist_id, playlist_title)