]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/musicplayon.py
[musicplayon] Fix extraction (closes #9222)
[yt-dlp.git] / youtube_dl / extractor / musicplayon.py
CommitLineData
91a76c40
S
1# encoding: utf-8
2from __future__ import unicode_literals
3
91a76c40 4from .common import InfoExtractor
b1cf58f4
YCH
5from ..compat import compat_urlparse
6from ..utils import (
7 int_or_none,
8 js_to_json,
9 mimetype2ext,
10)
91a76c40
S
11
12
13class MusicPlayOnIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
15
16 _TEST = {
17 'url': 'http://en.musicplayon.com/play?v=433377',
b1cf58f4 18 'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
91a76c40
S
19 'info_dict': {
20 'id': '433377',
21 'ext': 'mp4',
22 'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
23 'description': 'Rick Ross Interview On Chelsea Lately',
24 'duration': 342,
25 'uploader': 'ultrafish',
26 },
91a76c40
S
27 }
28
29 def _real_extract(self, url):
b1cf58f4 30 video_id = self._match_id(url)
91a76c40
S
31
32 page = self._download_webpage(url, video_id)
33
34 title = self._og_search_title(page)
35 description = self._og_search_description(page)
36 thumbnail = self._og_search_thumbnail(page)
37 duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
38 view_count = self._og_search_property('count', page, fatal=False)
39 uploader = self._html_search_regex(
40 r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
41
b1cf58f4
YCH
42 sources = self._parse_json(
43 self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
44 video_id, transform_source=js_to_json)
45 formats = [{
46 'url': compat_urlparse.urljoin(url, source['src']),
47 'ext': mimetype2ext(source.get('type')),
48 'format_note': source.get('data-res'),
49 } for source in sources]
91a76c40
S
50
51 return {
52 'id': video_id,
53 'title': title,
54 'description': description,
55 'thumbnail': thumbnail,
56 'uploader': uploader,
57 'duration': int_or_none(duration),
58 'view_count': int_or_none(view_count),
59 'formats': formats,
5f6a1245 60 }