yt_dlp/extractor/steam.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     extract_attributes,
   6     ExtractorError,
   7     get_element_by_class,
   8 )
   9
  10
  11 class SteamIE(InfoExtractor):
  12     _VALID_URL = r"""(?x)
  13         https?://(?:store\.steampowered|steamcommunity)\.com/
  14             (?:agecheck/)?
  15             (?P<urltype>video|app)/ #If the page is only for videos or for a game
  16             (?P<gameID>\d+)/?
  17             (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
  18         |
  19         https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
  20     """
  21     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
  22     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
  23     _TESTS = [{
  24         'url': 'http://store.steampowered.com/video/105600/',
  25         'playlist': [
  26             {
  27                 'md5': '695242613303ffa2a4c44c9374ddc067',
  28                 'info_dict': {
  29                     'id': '256785003',
  30                     'ext': 'mp4',
  31                     'title': 'Terraria video 256785003',
  32                     'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
  33                     'n_entries': 2,
  34                 }
  35             },
  36             {
  37                 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
  38                 'info_dict': {
  39                     'id': '2040428',
  40                     'ext': 'mp4',
  41                     'title': 'Terraria video 2040428',
  42                     'playlist_index': 2,
  43                     'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
  44                     'n_entries': 2,
  45                 }
  46             }
  47         ],
  48         'info_dict': {
  49             'id': '105600',
  50             'title': 'Terraria',
  51         },
  52         'params': {
  53             'playlistend': 2,
  54         }
  55     }, {
  56         'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
  57         'info_dict': {
  58             'id': '256757115',
  59             'title': 'Grand Theft Auto V video 256757115',
  60             'ext': 'mp4',
  61             'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
  62             'n_entries': 20,
  63         },
  64     }]
  65
  66     def _real_extract(self, url):
  67         m = self._match_valid_url(url)
  68         fileID = m.group('fileID')
  69         if fileID:
  70             video_url = url
  71             playlist_id = fileID
  72         else:
  73             gameID = m.group('gameID')
  74             playlist_id = gameID
  75             video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
  76
  77         self._set_cookie('steampowered.com', 'wants_mature_content', '1')
  78         self._set_cookie('steampowered.com', 'birthtime', '944006401')
  79         self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
  80
  81         webpage = self._download_webpage(video_url, playlist_id)
  82
  83         if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
  84             video_url = self._AGECHECK_TEMPLATE % playlist_id
  85             self.report_age_confirmation()
  86             webpage = self._download_webpage(video_url, playlist_id)
  87
  88         videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
  89         entries = []
  90         playlist_title = get_element_by_class('apphub_AppName', webpage)
  91         for movie, movie_id in videos:
  92             if not movie:
  93                 continue
  94             movie = extract_attributes(movie)
  95             if not movie_id:
  96                 continue
  97             entry = {
  98                 'id': movie_id,
  99                 'title': f'{playlist_title} video {movie_id}',
 100             }
 101             formats = []
 102             if movie:
 103                 entry['thumbnail'] = movie.get('data-poster')
 104                 for quality in ('', '-hd'):
 105                     for ext in ('webm', 'mp4'):
 106                         video_url = movie.get('data-%s%s-source' % (ext, quality))
 107                         if video_url:
 108                             formats.append({
 109                                 'format_id': ext + quality,
 110                                 'url': video_url,
 111                             })
 112             entry['formats'] = formats
 113             entries.append(entry)
 114         embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
 115         for evideos in embedded_videos:
 116             evideos = extract_attributes(evideos).get('src')
 117             video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
 118             if video_id:
 119                 entries.append({
 120                     '_type': 'url_transparent',
 121                     'id': video_id,
 122                     'url': video_id,
 123                     'ie_key': 'Youtube',
 124                 })
 125         if not entries:
 126             raise ExtractorError('Could not find any videos')
 127
 128         return self.playlist_result(entries, playlist_id, playlist_title)
 129
 130
 131 class SteamCommunityBroadcastIE(InfoExtractor):
 132     _VALID_URL = r'https?://steamcommunity\.(?:com)/broadcast/watch/(?P<id>\d+)'
 133     _TESTS = [{
 134         'url': 'https://steamcommunity.com/broadcast/watch/76561199073851486',
 135         'info_dict': {
 136             'id': '76561199073851486',
 137             'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
 138             'ext': 'mp4',
 139             'uploader_id': 1113585758,
 140             'uploader': 'pepperm!nt',
 141             'live_status': 'is_live',
 142         },
 143         'skip': 'Stream has ended',
 144     }]
 145
 146     def _real_extract(self, url):
 147         video_id = self._match_id(url)
 148         webpage = self._download_webpage(url, video_id)
 149         json_data = self._download_json(
 150             'https://steamcommunity.com/broadcast/getbroadcastmpd/',
 151             video_id, query={'steamid': f'{video_id}'})
 152
 153         formats, subs = self._extract_m3u8_formats_and_subtitles(json_data['hls_url'], video_id)
 154
 155         ''' # We cannot download live dash atm
 156         mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(json_data['url'], video_id)
 157         formats.extend(mpd_formats)
 158         self._merge_subtitles(mpd_subs, target=subs)
 159         '''
 160
 161         uploader_json = self._download_json(
 162             'https://steamcommunity.com/actions/ajaxresolveusers',
 163             video_id, query={'steamids': video_id})[0]
 164
 165         return {
 166             'id': video_id,
 167             'title': self._generic_title('', webpage),
 168             'formats': formats,
 169             'live_status': 'is_live',
 170             'view_count': json_data.get('num_view'),
 171             'uploader': uploader_json.get('persona_name'),
 172             'uploader_id': uploader_json.get('accountid'),
 173             'subtitles': subs,
 174         }