]> jfr.im git - yt-dlp.git/blame_incremental - youtube_dl/extractor/videomega.py
[udemy] Extract asset captions
[yt-dlp.git] / youtube_dl / extractor / videomega.py
... / ...
CommitLineData
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
7from ..utils import (
8 decode_packed_codes,
9 sanitized_Request,
10)
11
12
13class VideoMegaIE(InfoExtractor):
14 _VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
15 _TESTS = [{
16 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
17 'md5': 'cc1920a58add3f05c6a93285b84fb3aa',
18 'info_dict': {
19 'id': 'AOSQBJYKIDDIKYJBQSOA',
20 'ext': 'mp4',
21 'title': '1254207',
22 'thumbnail': r're:^https?://.*\.jpg$',
23 }
24 }, {
25 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600',
26 'only_matching': True,
27 }, {
28 'url': 'http://videomega.tv/view.php?ref=090051111052065112106089103052052103089106112065052111051090',
29 'only_matching': True,
30 }]
31
32 def _real_extract(self, url):
33 video_id = self._match_id(url)
34
35 iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
36 req = sanitized_Request(iframe_url)
37 req.add_header('Referer', url)
38 req.add_header('Cookie', 'noadvtday=0')
39 webpage = self._download_webpage(req, video_id)
40
41 title = self._html_search_regex(
42 r'<title>(.+?)</title>', webpage, 'title')
43 title = re.sub(
44 r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
45 thumbnail = self._search_regex(
46 r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
47
48 real_codes = decode_packed_codes(webpage)
49 video_url = self._search_regex(
50 r'"src"\s*,\s*"([^"]+)"', real_codes, 'video URL')
51
52 return {
53 'id': video_id,
54 'title': title,
55 'url': video_url,
56 'thumbnail': thumbnail,
57 'http_headers': {
58 'Referer': iframe_url,
59 },
60 }