]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/aparat.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
14 class AparatIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
18 'url': 'http://www.aparat.com/v/wP8On',
19 'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
23 'title': 'تیم گلکسی 11 - زومیت',
24 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
26 'timestamp': 1387394859,
27 'upload_date': '20131218',
32 'url': 'https://www.aparat.com/v/8dflw/',
33 'only_matching': True,
36 def _parse_options(self
, webpage
, video_id
, fatal
=True):
37 return self
._parse
_json
(self
._search
_regex
(
38 r
'options\s*=\s*({.+?})\s*;', webpage
, 'options', default
='{}'), video_id
)
40 def _real_extract(self
, url
):
41 video_id
= self
._match
_id
(url
)
43 # If available, provides more metadata
44 webpage
= self
._download
_webpage
(url
, video_id
, fatal
=False)
45 options
= self
._parse
_options
(webpage
, video_id
, fatal
=False)
48 webpage
= self
._download
_webpage
(
49 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
,
50 video_id
, 'Downloading embed webpage')
51 options
= self
._parse
_options
(webpage
, video_id
)
54 for sources
in (options
.get('multiSRC') or []):
56 if not isinstance(item
, dict):
58 file_url
= url_or_none(item
.get('src'))
61 item_type
= item
.get('type')
62 if item_type
== 'application/vnd.apple.mpegurl':
63 formats
.extend(self
._extract
_m
3u8_formats
(
64 file_url
, video_id
, 'mp4',
65 entry_protocol
='m3u8_native', m3u8_id
='hls',
68 ext
= mimetype2ext(item
.get('type'))
69 label
= item
.get('label')
73 'format_id': 'http-%s' % (label
or ext
),
74 'height': int_or_none(self
._search
_regex
(
75 r
'(\d+)[pP]', label
or '', 'height',
78 self
._sort
_formats
(formats
)
80 info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
82 if not info
.get('title'):
83 info
['title'] = get_element_by_id('videoTitle', webpage
) or \
84 self
._html
_search
_meta
(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage
, fatal
=True)
86 return merge_dicts(info
, {
88 'thumbnail': url_or_none(options
.get('poster')),
89 'duration': int_or_none(options
.get('duration')),