]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/steam.py
[generic] Extract subtitles from video.js (#3156)
[yt-dlp.git] / yt_dlp / extractor / steam.py
CommitLineData
3fa6b6e2
PH
1from __future__ import unicode_literals
2
462dc88b
PH
3import re
4
5from .common import InfoExtractor
6from ..utils import (
7fee3377 7 extract_attributes,
462dc88b 8 ExtractorError,
7fee3377 9 get_element_by_class,
462dc88b
PH
10)
11
12
13class SteamIE(InfoExtractor):
1f27d2c0 14 _VALID_URL = r"""(?x)
070f6a85 15 https?://(?:store\.steampowered|steamcommunity)\.com/
16 (?:agecheck/)?
1f27d2c0
PH
17 (?P<urltype>video|app)/ #If the page is only for videos or for a game
18 (?P<gameID>\d+)/?
19 (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
20 |
21 https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
22 """
462dc88b
PH
23 _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
24 _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
1f27d2c0 25 _TESTS = [{
611c1dd9
S
26 'url': 'http://store.steampowered.com/video/105600/',
27 'playlist': [
08217714 28 {
070f6a85 29 'md5': '695242613303ffa2a4c44c9374ddc067',
611c1dd9 30 'info_dict': {
070f6a85 31 'id': '256785003',
7fee3377 32 'ext': 'mp4',
070f6a85 33 'title': 'Terraria video 256785003',
34 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
35 'n_entries': 2,
08217714
PH
36 }
37 },
38 {
070f6a85 39 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
611c1dd9 40 'info_dict': {
070f6a85 41 'id': '2040428',
7fee3377 42 'ext': 'mp4',
070f6a85 43 'title': 'Terraria video 2040428',
3fa6b6e2 44 'playlist_index': 2,
070f6a85 45 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
46 'n_entries': 2,
08217714
PH
47 }
48 }
7f9c31df 49 ],
7fee3377
RA
50 'info_dict': {
51 'id': '105600',
52 'title': 'Terraria',
53 },
7f9c31df
PH
54 'params': {
55 'playlistend': 2,
56 }
1f27d2c0 57 }, {
070f6a85 58 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
1f27d2c0 59 'info_dict': {
070f6a85 60 'id': '256757115',
61 'title': 'Grand Theft Auto V video 256757115',
1f27d2c0 62 'ext': 'mp4',
070f6a85 63 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
64 'n_entries': 20,
65 },
1f27d2c0 66 }]
08217714 67
462dc88b 68 def _real_extract(self, url):
5ad28e7f 69 m = self._match_valid_url(url)
1f27d2c0
PH
70 fileID = m.group('fileID')
71 if fileID:
070f6a85 72 video_url = url
1f27d2c0
PH
73 playlist_id = fileID
74 else:
75 gameID = m.group('gameID')
76 playlist_id = gameID
070f6a85 77 video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
717ea4e1 78
070f6a85 79 self._set_cookie('steampowered.com', 'wants_mature_content', '1')
80 self._set_cookie('steampowered.com', 'birthtime', '944006401')
81 self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
717ea4e1 82
070f6a85 83 webpage = self._download_webpage(video_url, playlist_id)
462dc88b 84
070f6a85 85 if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
86 video_url = self._AGECHECK_TEMPLATE % playlist_id
462dc88b 87 self.report_age_confirmation()
070f6a85 88 webpage = self._download_webpage(video_url, playlist_id)
7fee3377 89
070f6a85 90 videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
7fee3377 91 entries = []
070f6a85 92 playlist_title = get_element_by_class('apphub_AppName', webpage)
93 for movie, movie_id in videos:
94 if not movie:
95 continue
96 movie = extract_attributes(movie)
97 if not movie_id:
98 continue
99 entry = {
100 'id': movie_id,
101 'title': f'{playlist_title} video {movie_id}',
102 }
103 formats = []
104 if movie:
105 entry['thumbnail'] = movie.get('data-poster')
106 for quality in ('', '-hd'):
107 for ext in ('webm', 'mp4'):
108 video_url = movie.get('data-%s%s-source' % (ext, quality))
109 if video_url:
110 formats.append({
111 'format_id': ext + quality,
112 'url': video_url,
113 })
114 self._sort_formats(formats)
115 entry['formats'] = formats
116 entries.append(entry)
117 embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
118 for evideos in embedded_videos:
119 evideos = extract_attributes(evideos).get('src')
120 video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
121 if video_id:
7fee3377 122 entries.append({
070f6a85 123 '_type': 'url_transparent',
124 'id': video_id,
125 'url': video_id,
7fee3377
RA
126 'ie_key': 'Youtube',
127 })
7fee3377 128 if not entries:
1f27d2c0 129 raise ExtractorError('Could not find any videos')
462dc88b 130
7fee3377 131 return self.playlist_result(entries, playlist_id, playlist_title)