]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/twitch.py
2 from __future__
import unicode_literals
8 from .common
import InfoExtractor
12 compat_urllib_request
,
20 class TwitchBaseIE(InfoExtractor
):
21 _VALID_URL_BASE
= r
'https?://(?:www\.)?twitch\.tv'
23 _API_BASE
= 'https://api.twitch.tv'
24 _USHER_BASE
= 'http://usher.twitch.tv'
25 _LOGIN_URL
= 'https://secure.twitch.tv/login'
26 _LOGIN_POST_URL
= 'https://passport.twitch.tv/authorize'
27 _NETRC_MACHINE
= 'twitch'
29 def _handle_error(self
, response
):
30 if not isinstance(response
, dict):
32 error
= response
.get('error')
35 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')),
38 def _download_json(self
, url
, video_id
, note
='Downloading JSON metadata'):
40 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
41 'X-Requested-With': 'XMLHttpRequest',
43 for cookie
in self
._downloader
.cookiejar
:
44 if cookie
.name
== 'api_token':
45 headers
['Twitch-Api-Token'] = cookie
.value
46 request
= compat_urllib_request
.Request(url
, headers
=headers
)
47 response
= super(TwitchBaseIE
, self
)._download
_json
(request
, video_id
, note
)
48 self
._handle
_error
(response
)
51 def _real_initialize(self
):
55 (username
, password
) = self
._get
_login
_info
()
59 login_page
= self
._download
_webpage
(
60 self
._LOGIN
_URL
, None, 'Downloading login page')
62 login_form
= dict(re
.findall(
63 r
'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
67 'login': username
.encode('utf-8'),
68 'password': password
.encode('utf-8'),
71 request
= compat_urllib_request
.Request(
72 self
._LOGIN
_POST
_URL
, compat_urllib_parse
.urlencode(login_form
).encode('utf-8'))
73 request
.add_header('Referer', self
._LOGIN
_URL
)
74 response
= self
._download
_webpage
(
75 request
, None, 'Logging in as %s' % username
)
77 error_message
= self
._search
_regex
(
78 r
'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
79 response
, 'error message', default
=None)
82 'Unable to login. Twitch said: %s' % error_message
, expected
=True)
84 if '>Reset your password<' in response
:
85 self
.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
87 def _prefer_source(self
, formats
):
89 source
= next(f
for f
in formats
if f
['format_id'] == 'Source')
90 source
['preference'] = 10
92 pass # No Source stream present
93 self
._sort
_formats
(formats
)
96 class TwitchItemBaseIE(TwitchBaseIE
):
97 def _download_info(self
, item
, item_id
):
98 return self
._extract
_info
(self
._download
_json
(
99 '%s/kraken/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
,
100 'Downloading %s info JSON' % self
._ITEM
_TYPE
))
102 def _extract_media(self
, item_id
):
103 info
= self
._download
_info
(self
._ITEM
_SHORTCUT
, item_id
)
104 response
= self
._download
_json
(
105 '%s/api/videos/%s%s' % (self
._API
_BASE
, self
._ITEM
_SHORTCUT
, item_id
), item_id
,
106 'Downloading %s playlist JSON' % self
._ITEM
_TYPE
)
108 chunks
= response
['chunks']
109 qualities
= list(chunks
.keys())
110 for num
, fragment
in enumerate(zip(*chunks
.values()), start
=1):
112 for fmt_num
, fragment_fmt
in enumerate(fragment
):
113 format_id
= qualities
[fmt_num
]
115 'url': fragment_fmt
['url'],
116 'format_id': format_id
,
117 'quality': 1 if format_id
== 'live' else 0,
119 m
= re
.search(r
'^(?P<height>\d+)[Pp]', format_id
)
121 fmt
['height'] = int(m
.group('height'))
123 self
._sort
_formats
(formats
)
125 entry
['id'] = '%s_%d' % (entry
['id'], num
)
126 entry
['title'] = '%s part %d' % (entry
['title'], num
)
127 entry
['formats'] = formats
128 entries
.append(entry
)
129 return self
.playlist_result(entries
, info
['id'], info
['title'])
131 def _extract_info(self
, info
):
134 'title': info
['title'],
135 'description': info
['description'],
136 'duration': info
['length'],
137 'thumbnail': info
['preview'],
138 'uploader': info
['channel']['display_name'],
139 'uploader_id': info
['channel']['name'],
140 'timestamp': parse_iso8601(info
['recorded_at']),
141 'view_count': info
['views'],
144 def _real_extract(self
, url
):
145 return self
._extract
_media
(self
._match
_id
(url
))
148 class TwitchVideoIE(TwitchItemBaseIE
):
149 IE_NAME
= 'twitch:video'
150 _VALID_URL
= r
'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE
._VALID
_URL
_BASE
155 'url': 'http://www.twitch.tv/riotgames/b/577357806',
158 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
160 'playlist_mincount': 12,
164 class TwitchChapterIE(TwitchItemBaseIE
):
165 IE_NAME
= 'twitch:chapter'
166 _VALID_URL
= r
'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE
._VALID
_URL
_BASE
167 _ITEM_TYPE
= 'chapter'
171 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
174 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
176 'playlist_mincount': 3,
178 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
179 'only_matching': True,
183 class TwitchVodIE(TwitchItemBaseIE
):
184 IE_NAME
= 'twitch:vod'
185 _VALID_URL
= r
'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE
._VALID
_URL
_BASE
190 'url': 'http://www.twitch.tv/riotgames/v/6528877',
194 'title': 'LCK Summer Split - Week 6 Day 1',
195 'thumbnail': 're:^https?://.*\.jpg$',
197 'timestamp': 1435131709,
198 'upload_date': '20150624',
199 'uploader': 'Riot Games',
200 'uploader_id': 'riotgames',
205 'skip_download': True,
209 def _real_extract(self
, url
):
210 item_id
= self
._match
_id
(url
)
211 info
= self
._download
_info
(self
._ITEM
_SHORTCUT
, item_id
)
212 access_token
= self
._download
_json
(
213 '%s/api/vods/%s/access_token' % (self
._API
_BASE
, item_id
), item_id
,
214 'Downloading %s access token' % self
._ITEM
_TYPE
)
215 formats
= self
._extract
_m
3u8_formats
(
216 '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
217 % (self
._USHER
_BASE
, item_id
, access_token
['token'], access_token
['sig']),
219 self
._prefer
_source
(formats
)
220 info
['formats'] = formats
224 class TwitchPlaylistBaseIE(TwitchBaseIE
):
225 _PLAYLIST_URL
= '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE
._API
_BASE
228 def _extract_playlist(self
, channel_id
):
229 info
= self
._download
_json
(
230 '%s/kraken/channels/%s' % (self
._API
_BASE
, channel_id
),
231 channel_id
, 'Downloading channel info JSON')
232 channel_name
= info
.get('display_name') or info
.get('name')
235 limit
= self
._PAGE
_LIMIT
236 for counter
in itertools
.count(1):
237 response
= self
._download
_json
(
238 self
._PLAYLIST
_URL
% (channel_id
, offset
, limit
),
239 channel_id
, 'Downloading %s videos JSON page %d' % (self
._PLAYLIST
_TYPE
, counter
))
240 page_entries
= self
._extract
_playlist
_page
(response
)
243 entries
.extend(page_entries
)
245 return self
.playlist_result(
246 [self
.url_result(entry
) for entry
in set(entries
)],
247 channel_id
, channel_name
)
249 def _extract_playlist_page(self
, response
):
250 videos
= response
.get('videos')
251 return [video
['url'] for video
in videos
] if videos
else []
253 def _real_extract(self
, url
):
254 return self
._extract
_playlist
(self
._match
_id
(url
))
257 class TwitchProfileIE(TwitchPlaylistBaseIE
):
258 IE_NAME
= 'twitch:profile'
259 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
260 _PLAYLIST_TYPE
= 'profile'
263 'url': 'http://www.twitch.tv/vanillatv/profile',
266 'title': 'VanillaTV',
268 'playlist_mincount': 412,
272 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE
):
273 IE_NAME
= 'twitch:past_broadcasts'
274 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
275 _PLAYLIST_URL
= TwitchPlaylistBaseIE
._PLAYLIST
_URL
+ '&broadcasts=true'
276 _PLAYLIST_TYPE
= 'past broadcasts'
279 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
284 'playlist_mincount': 54,
288 class TwitchBookmarksIE(TwitchPlaylistBaseIE
):
289 IE_NAME
= 'twitch:bookmarks'
290 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
291 _PLAYLIST_URL
= '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE
._API
_BASE
292 _PLAYLIST_TYPE
= 'bookmarks'
295 'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
300 'playlist_mincount': 3,
303 def _extract_playlist_page(self
, response
):
305 for bookmark
in response
.get('bookmarks', []):
306 video
= bookmark
.get('video')
309 entries
.append(video
['url'])
313 class TwitchStreamIE(TwitchBaseIE
):
314 IE_NAME
= 'twitch:stream'
315 _VALID_URL
= r
'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
318 'url': 'http://www.twitch.tv/shroomztv',
321 'display_id': 'shroomztv',
323 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
324 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
326 'timestamp': 1421928037,
327 'upload_date': '20150122',
328 'uploader': 'ShroomzTV',
329 'uploader_id': 'shroomztv',
334 'skip_download': True,
338 def _real_extract(self
, url
):
339 channel_id
= self
._match
_id
(url
)
341 stream
= self
._download
_json
(
342 '%s/kraken/streams/%s' % (self
._API
_BASE
, channel_id
), channel_id
,
343 'Downloading stream JSON').get('stream')
345 # Fallback on profile extraction if stream is offline
347 return self
.url_result(
348 'http://www.twitch.tv/%s/profile' % channel_id
,
349 'TwitchProfile', channel_id
)
351 access_token
= self
._download
_json
(
352 '%s/api/channels/%s/access_token' % (self
._API
_BASE
, channel_id
), channel_id
,
353 'Downloading channel access token')
356 'allow_source': 'true',
357 'p': random
.randint(1000000, 10000000),
358 'player': 'twitchweb',
359 'segment_preference': '4',
360 'sig': access_token
['sig'].encode('utf-8'),
361 'token': access_token
['token'].encode('utf-8'),
363 formats
= self
._extract
_m
3u8_formats
(
364 '%s/api/channel/hls/%s.m3u8?%s'
365 % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse
.urlencode(query
)),
367 self
._prefer
_source
(formats
)
369 view_count
= stream
.get('viewers')
370 timestamp
= parse_iso8601(stream
.get('created_at'))
372 channel
= stream
['channel']
373 title
= self
._live
_title
(channel
.get('display_name') or channel
.get('name'))
374 description
= channel
.get('status')
377 for thumbnail_key
, thumbnail_url
in stream
['preview'].items():
378 m
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
)
382 'url': thumbnail_url
,
383 'width': int(m
.group('width')),
384 'height': int(m
.group('height')),
388 'id': compat_str(stream
['_id']),
389 'display_id': channel_id
,
391 'description': description
,
392 'thumbnails': thumbnails
,
393 'uploader': channel
.get('display_name'),
394 'uploader_id': channel
.get('name'),
395 'timestamp': timestamp
,
396 'view_count': view_count
,