]>
Commit | Line | Data |
---|---|---|
23e7cba8 S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..compat import compat_urllib_request | |
7 | from ..utils import ( | |
8 | float_or_none, | |
9 | unescapeHTML, | |
10 | ) | |
11 | ||
12 | ||
13 | class TwitterCardIE(InfoExtractor): | |
14 | _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)' | |
15 | _TEST = { | |
16 | 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', | |
17 | 'md5': 'a74f50b310c83170319ba16de6955192', | |
18 | 'info_dict': { | |
19 | 'id': '560070183650213889', | |
20 | 'ext': 'mp4', | |
21 | 'title': 'TwitterCard', | |
22 | 'thumbnail': 're:^https?://.*\.jpg$', | |
23 | 'duration': 30.033, | |
24 | }, | |
25 | } | |
26 | ||
27 | def _real_extract(self, url): | |
28 | video_id = self._match_id(url) | |
29 | ||
30 | # Different formats served for different User-Agents | |
31 | USER_AGENTS = [ | |
32 | 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', # mp4 | |
33 | 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0', # webm | |
34 | ] | |
35 | ||
36 | config = None | |
37 | formats = [] | |
38 | for user_agent in USER_AGENTS: | |
39 | request = compat_urllib_request.Request(url) | |
40 | request.add_header('User-Agent', user_agent) | |
41 | webpage = self._download_webpage(request, video_id) | |
42 | ||
43 | config = self._parse_json( | |
44 | unescapeHTML(self._search_regex( | |
45 | r'data-player-config="([^"]+)"', webpage, 'data player config')), | |
46 | video_id) | |
47 | ||
48 | video_url = config['playlist'][0]['source'] | |
49 | ||
50 | f = { | |
51 | 'url': video_url, | |
52 | } | |
53 | ||
54 | m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) | |
55 | if m: | |
56 | f.update({ | |
57 | 'width': int(m.group('width')), | |
58 | 'height': int(m.group('height')), | |
59 | }) | |
60 | formats.append(f) | |
61 | self._sort_formats(formats) | |
62 | ||
63 | thumbnail = config.get('posterImageUrl') | |
64 | duration = float_or_none(config.get('duration')) | |
65 | ||
66 | return { | |
67 | 'id': video_id, | |
68 | 'title': 'TwitterCard', | |
69 | 'thumbnail': thumbnail, | |
70 | 'duration': duration, | |
71 | 'formats': formats, | |
72 | } |