]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/joj.py
[tiktok] Fix `vm.tiktok` URLs
[yt-dlp.git] / yt_dlp / extractor / joj.py
CommitLineData
cefecac1
U
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
7from ..compat import compat_str
8from ..utils import (
e0ddbd02 9 format_field,
cefecac1
U
10 int_or_none,
11 js_to_json,
12 try_get,
13)
14
15
16class JojIE(InfoExtractor):
17 _VALID_URL = r'''(?x)
18 (?:
19 joj:|
20 https?://media\.joj\.sk/embed/
21 )
22 (?P<id>[^/?#^]+)
23 '''
24 _TESTS = [{
25 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
26 'info_dict': {
27 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
28 'ext': 'mp4',
29 'title': 'NOVÉ BÝVANIE',
30 'thumbnail': r're:^https?://.*\.jpg$',
31 'duration': 3118,
32 }
33 }, {
34 'url': 'https://media.joj.sk/embed/9i1cxv',
35 'only_matching': True,
36 }, {
37 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
38 'only_matching': True,
39 }, {
40 'url': 'joj:9i1cxv',
41 'only_matching': True,
42 }]
43
44 @staticmethod
45 def _extract_urls(webpage):
46 return [
47 mobj.group('url')
48 for mobj in re.finditer(
49 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
50 webpage)]
51
52 def _real_extract(self, url):
53 video_id = self._match_id(url)
54
55 webpage = self._download_webpage(
56 'https://media.joj.sk/embed/%s' % video_id, video_id)
57
58 title = self._search_regex(
59 (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
60 r'<title>(?P<title>[^<]+)'), webpage, 'title',
61 default=None, group='title') or self._og_search_title(webpage)
62
63 bitrates = self._parse_json(
64 self._search_regex(
65 r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
66 default='{}'),
67 video_id, transform_source=js_to_json, fatal=False)
68
69 formats = []
70 for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
71 if isinstance(format_url, compat_str):
72 height = self._search_regex(
73 r'(\d+)[pP]\.', format_url, 'height', default=None)
74 formats.append({
75 'url': format_url,
e0ddbd02 76 'format_id': format_field(height, template='%sp'),
cefecac1
U
77 'height': int(height),
78 })
79 if not formats:
80 playlist = self._download_xml(
81 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
82 video_id)
83 for file_el in playlist.findall('./files/file'):
84 path = file_el.get('path')
85 if not path:
86 continue
87 format_id = file_el.get('id') or file_el.get('label')
88 formats.append({
89 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
90 'dat/', '', 1),
91 'format_id': format_id,
92 'height': int_or_none(self._search_regex(
93 r'(\d+)[pP]', format_id or path, 'height',
94 default=None)),
95 })
96 self._sort_formats(formats)
97
98 thumbnail = self._og_search_thumbnail(webpage)
99
100 duration = int_or_none(self._search_regex(
101 r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
102
103 return {
104 'id': video_id,
105 'title': title,
106 'thumbnail': thumbnail,
107 'duration': duration,
108 'formats': formats,
109 }