]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/moevideo.py
[extractor] Common function `_match_valid_url`
[yt-dlp.git] / yt_dlp / extractor / moevideo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4
5 from .common import InfoExtractor
6 from ..utils import (
7 clean_html,
8 int_or_none,
9 )
10
11
12 class MoeVideoIE(InfoExtractor):
13 IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
14 _VALID_URL = r'''(?x)
15 https?://(?P<host>(?:www\.)?
16 (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
17 (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
18 _API_URL = 'http://api.letitbit.net/'
19 _API_KEY = 'tVL0gjqo5'
20 _TESTS = [
21 {
22 'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
23 'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
24 'info_dict': {
25 'id': '00297.0036103fe3d513ef27915216fd29',
26 'ext': 'flv',
27 'title': 'Sink cut out machine',
28 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
29 'thumbnail': r're:^https?://.*\.jpg$',
30 'width': 540,
31 'height': 360,
32 'duration': 179,
33 'filesize': 17822500,
34 },
35 'skip': 'Video has been removed',
36 },
37 {
38 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
39 'md5': '74f0a014d5b661f0f0e2361300d1620e',
40 'info_dict': {
41 'id': '77107.7f325710a627383d40540d8e991a',
42 'ext': 'flv',
43 'title': 'Operacion Condor.',
44 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
45 'thumbnail': r're:^https?://.*\.jpg$',
46 'width': 480,
47 'height': 296,
48 'duration': 6027,
49 'filesize': 588257923,
50 },
51 'skip': 'Video has been removed',
52 },
53 ]
54
55 def _real_extract(self, url):
56 host, video_id = self._match_valid_url(url).groups()
57
58 webpage = self._download_webpage(
59 'http://%s/video/%s' % (host, video_id),
60 video_id, 'Downloading webpage')
61
62 title = self._og_search_title(webpage)
63
64 embed_webpage = self._download_webpage(
65 'http://%s/embed/%s' % (host, video_id),
66 video_id, 'Downloading embed webpage')
67 video = self._parse_json(self._search_regex(
68 r'mvplayer\("#player"\s*,\s*({.+})',
69 embed_webpage, 'mvplayer'), video_id)['video']
70
71 return {
72 'id': video_id,
73 'title': title,
74 'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
75 'description': clean_html(self._og_search_description(webpage)),
76 'duration': int_or_none(self._og_search_property('video:duration', webpage)),
77 'url': video['ourUrl'],
78 }