]>
Commit | Line | Data |
---|---|---|
1 | # encoding: utf-8 | |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | clean_html, | |
7 | ExtractorError, | |
8 | ) | |
9 | ||
10 | ||
11 | class RTLnowIE(InfoExtractor): | |
12 | """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" | |
13 | _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' | |
14 | _TESTS = [{ | |
15 | u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', | |
16 | u'file': u'90419.flv', | |
17 | u'info_dict': { | |
18 | u'upload_date': u'20070416', | |
19 | u'title': u'Ahornallee - Folge 1 - Der Einzug', | |
20 | u'description': u'Folge 1 - Der Einzug', | |
21 | }, | |
22 | u'params': { | |
23 | u'skip_download': True, | |
24 | }, | |
25 | u'skip': u'Only works from Germany', | |
26 | }, | |
27 | { | |
28 | u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', | |
29 | u'file': u'69756.flv', | |
30 | u'info_dict': { | |
31 | u'upload_date': u'20120519', | |
32 | u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', | |
33 | u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', | |
34 | u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', | |
35 | }, | |
36 | u'params': { | |
37 | u'skip_download': True, | |
38 | }, | |
39 | u'skip': u'Only works from Germany', | |
40 | }, | |
41 | { | |
42 | u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', | |
43 | u'file': u'13883.flv', | |
44 | u'info_dict': { | |
45 | u'upload_date': u'20090627', | |
46 | u'title': u'Voxtours - Südafrika-Reporter II', | |
47 | u'description': u'Südafrika-Reporter II', | |
48 | }, | |
49 | u'params': { | |
50 | u'skip_download': True, | |
51 | }, | |
52 | }, | |
53 | { | |
54 | u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', | |
55 | u'file': u'99205.flv', | |
56 | u'info_dict': { | |
57 | u'upload_date': u'20080928', | |
58 | u'title': u'Medicopter 117 - Angst!', | |
59 | u'description': u'Angst!', | |
60 | u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' | |
61 | }, | |
62 | u'params': { | |
63 | u'skip_download': True, | |
64 | }, | |
65 | }, | |
66 | { | |
67 | u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', | |
68 | u'file': u'124903.flv', | |
69 | u'info_dict': { | |
70 | u'upload_date': u'20130101', | |
71 | u'title': u'Top Gear vom 01.01.2013', | |
72 | u'description': u'Episode 1', | |
73 | }, | |
74 | u'params': { | |
75 | u'skip_download': True, | |
76 | }, | |
77 | u'skip': u'Only works from Germany', | |
78 | }] | |
79 | ||
80 | ||
81 | def _real_extract(self,url): | |
82 | mobj = re.match(self._VALID_URL, url) | |
83 | ||
84 | webpage_url = u'http://' + mobj.group('url') | |
85 | video_page_url = u'http://' + mobj.group('domain') + u'/' | |
86 | video_id = mobj.group(u'video_id') | |
87 | ||
88 | webpage = self._download_webpage(webpage_url, video_id) | |
89 | ||
90 | note_m = re.search(r'''(?sx) | |
91 | <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) | |
92 | <div[ ]id="playerteaser">''', webpage) | |
93 | if note_m: | |
94 | msg = clean_html(note_m.group(1)) | |
95 | raise ExtractorError(msg) | |
96 | ||
97 | video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', | |
98 | webpage, u'title') | |
99 | playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', | |
100 | webpage, u'playerdata_url') | |
101 | ||
102 | playerdata = self._download_webpage(playerdata_url, video_id) | |
103 | mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata) | |
104 | if mobj: | |
105 | video_description = mobj.group(u'description') | |
106 | if mobj.group('upload_date_Y'): | |
107 | video_upload_date = mobj.group('upload_date_Y') | |
108 | elif mobj.group('upload_date_y'): | |
109 | video_upload_date = u'20' + mobj.group('upload_date_y') | |
110 | else: | |
111 | video_upload_date = None | |
112 | if video_upload_date: | |
113 | video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') | |
114 | else: | |
115 | video_description = None | |
116 | video_upload_date = None | |
117 | self._downloader.report_warning(u'Unable to extract description and upload date') | |
118 | ||
119 | # Thumbnail: not every video has an thumbnail | |
120 | mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) | |
121 | if mobj: | |
122 | video_thumbnail = mobj.group(u'thumbnail') | |
123 | else: | |
124 | video_thumbnail = None | |
125 | ||
126 | mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) | |
127 | if mobj is None: | |
128 | raise ExtractorError(u'Unable to extract media URL') | |
129 | video_url = mobj.group(u'url') | |
130 | video_play_path = u'mp4:' + mobj.group(u'play_path') | |
131 | video_player_url = video_page_url + u'includes/vodplayer.swf' | |
132 | ||
133 | return [{ | |
134 | 'id': video_id, | |
135 | 'url': video_url, | |
136 | 'play_path': video_play_path, | |
137 | 'page_url': video_page_url, | |
138 | 'player_url': video_player_url, | |
139 | 'ext': 'flv', | |
140 | 'title': video_title, | |
141 | 'description': video_description, | |
142 | 'upload_date': video_upload_date, | |
143 | 'thumbnail': video_thumbnail, | |
144 | }] |