]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/steam.py
[dplay] Add extractors for site changes (#2401)
[yt-dlp.git] / yt_dlp / extractor / steam.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7 extract_attributes,
8 ExtractorError,
9 get_element_by_class,
10 )
11
12
13 class SteamIE(InfoExtractor):
14 _VALID_URL = r"""(?x)
15 https?://(?:store\.steampowered|steamcommunity)\.com/
16 (?:agecheck/)?
17 (?P<urltype>video|app)/ #If the page is only for videos or for a game
18 (?P<gameID>\d+)/?
19 (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
20 |
21 https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
22 """
23 _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
24 _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
25 _TESTS = [{
26 'url': 'http://store.steampowered.com/video/105600/',
27 'playlist': [
28 {
29 'md5': '695242613303ffa2a4c44c9374ddc067',
30 'info_dict': {
31 'id': '256785003',
32 'ext': 'mp4',
33 'title': 'Terraria video 256785003',
34 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
35 'n_entries': 2,
36 }
37 },
38 {
39 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
40 'info_dict': {
41 'id': '2040428',
42 'ext': 'mp4',
43 'title': 'Terraria video 2040428',
44 'playlist_index': 2,
45 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
46 'n_entries': 2,
47 }
48 }
49 ],
50 'info_dict': {
51 'id': '105600',
52 'title': 'Terraria',
53 },
54 'params': {
55 'playlistend': 2,
56 }
57 }, {
58 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
59 'info_dict': {
60 'id': '256757115',
61 'title': 'Grand Theft Auto V video 256757115',
62 'ext': 'mp4',
63 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
64 'n_entries': 20,
65 },
66 }]
67
68 def _real_extract(self, url):
69 m = self._match_valid_url(url)
70 fileID = m.group('fileID')
71 if fileID:
72 video_url = url
73 playlist_id = fileID
74 else:
75 gameID = m.group('gameID')
76 playlist_id = gameID
77 video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
78
79 self._set_cookie('steampowered.com', 'wants_mature_content', '1')
80 self._set_cookie('steampowered.com', 'birthtime', '944006401')
81 self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
82
83 webpage = self._download_webpage(video_url, playlist_id)
84
85 if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
86 video_url = self._AGECHECK_TEMPLATE % playlist_id
87 self.report_age_confirmation()
88 webpage = self._download_webpage(video_url, playlist_id)
89
90 videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
91 entries = []
92 playlist_title = get_element_by_class('apphub_AppName', webpage)
93 for movie, movie_id in videos:
94 if not movie:
95 continue
96 movie = extract_attributes(movie)
97 if not movie_id:
98 continue
99 entry = {
100 'id': movie_id,
101 'title': f'{playlist_title} video {movie_id}',
102 }
103 formats = []
104 if movie:
105 entry['thumbnail'] = movie.get('data-poster')
106 for quality in ('', '-hd'):
107 for ext in ('webm', 'mp4'):
108 video_url = movie.get('data-%s%s-source' % (ext, quality))
109 if video_url:
110 formats.append({
111 'format_id': ext + quality,
112 'url': video_url,
113 })
114 self._sort_formats(formats)
115 entry['formats'] = formats
116 entries.append(entry)
117 embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
118 for evideos in embedded_videos:
119 evideos = extract_attributes(evideos).get('src')
120 video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
121 if video_id:
122 entries.append({
123 '_type': 'url_transparent',
124 'id': video_id,
125 'url': video_id,
126 'ie_key': 'Youtube',
127 })
128 if not entries:
129 raise ExtractorError('Could not find any videos')
130
131 return self.playlist_result(entries, playlist_id, playlist_title)