]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/startv.py
[youtube:comments] Add more options for limiting number of comments extracted (#1626)
[yt-dlp.git] / yt_dlp / extractor / startv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import (
6 compat_str,
7 )
8 from ..utils import (
9 clean_html,
10 ExtractorError,
11 traverse_obj,
12 int_or_none,
13 )
14
15
16 class StarTVIE(InfoExtractor):
17 _VALID_URL = r"""(?x)
18 https?://(?:www\.)?startv\.com\.tr/
19 (?:
20 (?:dizi|program)/(?:[^/?#&]+)/(?:bolumler|fragmanlar|ekstralar)|
21 video/arsiv/(?:dizi|program)/(?:[^/?#&]+)
22 )/
23 (?P<id>[^/?#&]+)
24 """
25 IE_NAME = 'startv'
26 _TESTS = [
27 {
28 'url': 'https://www.startv.com.tr/dizi/cocuk/bolumler/3-bolum',
29 'md5': '72381a32bcc2e2eb5841e8c8bf68f127',
30 'info_dict': {
31 'id': '904972',
32 'display_id': '3-bolum',
33 'ext': 'mp4',
34 'title': '3. Bölüm',
35 'description': 'md5:3a8049f05a75c2e8747116a673275de4',
36 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
37 'timestamp': 1569281400,
38 'upload_date': '20190923'
39 },
40 },
41 {
42 'url': 'https://www.startv.com.tr/video/arsiv/dizi/avlu/44-bolum',
43 'only_matching': True
44 },
45 {
46 'url': 'https://www.startv.com.tr/dizi/cocuk/fragmanlar/5-bolum-fragmani',
47 'only_matching': True
48 },
49 {
50 'url': 'https://www.startv.com.tr/dizi/cocuk/ekstralar/5-bolumun-nefes-kesen-final-sahnesi',
51 'only_matching': True
52 },
53 {
54 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/bolumler/1-bolum',
55 'only_matching': True
56 },
57 {
58 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/fragmanlar/2-fragman',
59 'only_matching': True
60 },
61 {
62 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/14-bolumde-hangi-unlu-ne-sordu-',
63 'only_matching': True
64 },
65 {
66 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/buyuk-risk-334-bolum',
67 'only_matching': True
68 },
69 {
70 'url': 'https://www.startv.com.tr/video/arsiv/program/dada/dada-58-bolum',
71 'only_matching': True
72 }
73 ]
74
75 def _real_extract(self, url):
76 display_id = self._match_id(url)
77 webpage = self._download_webpage(url, display_id)
78 info_url = self._search_regex(
79 r'(["\'])videoUrl\1\s*:\s*\1(?P<url>(?:(?!\1).)+)\1\s*',
80 webpage, 'video info url', group='url')
81
82 info = traverse_obj(self._download_json(info_url, display_id), 'data', expected_type=dict)
83 if not info:
84 raise ExtractorError('Failed to extract API data')
85
86 video_id = compat_str(info.get('id'))
87 title = info.get('title') or self._og_search_title(webpage)
88 description = clean_html(info.get('description')) or self._og_search_description(webpage, default=None)
89 thumbnail = self._proto_relative_url(
90 self._og_search_thumbnail(webpage), scheme='http:')
91
92 formats = self._extract_m3u8_formats(
93 traverse_obj(info, ('flavors', 'hls')), video_id, entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
94
95 return {
96 'id': video_id,
97 'display_id': display_id,
98 'title': title,
99 'description': description,
100 'thumbnail': thumbnail,
101 'timestamp': int_or_none(info.get('release_date')),
102 'formats': formats
103 }