]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nate.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / nate.py
CommitLineData
abc07b55
AG
1import itertools
2
3from .common import InfoExtractor
4from ..utils import (
5 int_or_none,
6 str_or_none,
7 traverse_obj,
8 unified_strdate,
9)
10
11
12class NateIE(InfoExtractor):
13 _VALID_URL = r'https?://tv\.nate\.com/clip/(?P<id>[0-9]+)'
14
15 _TESTS = [{
16 'url': 'https://tv.nate.com/clip/1848976',
17 'info_dict': {
18 'id': '1848976',
19 'ext': 'mp4',
20 'title': '[결승 오프닝 타이틀] 2018 LCK 서머 스플릿 결승전 kt Rolster VS Griffin',
21 'description': 'md5:e1b79a7dcf0d8d586443f11366f50e6f',
22 'thumbnail': r're:^https?://.*\.jpg',
23 'upload_date': '20180908',
24 'age_limit': 15,
25 'duration': 73,
26 'uploader': '2018 LCK 서머 스플릿(롤챔스)',
27 'channel': '2018 LCK 서머 스플릿(롤챔스)',
28 'channel_id': '3606',
29 'uploader_id': '3606',
30 'tags': 'count:59',
31 },
32 'params': {'skip_download': True}
33 }, {
34 'url': 'https://tv.nate.com/clip/4300566',
35 'info_dict': {
36 'id': '4300566',
37 'ext': 'mp4',
38 'title': '[심쿵엔딩] 이준호x이세영, 서로를 기억하며 끌어안는 두 사람!💕, MBC 211204 방송',
39 'description': 'md5:be1653502d9c13ce344ddf7828e089fa',
40 'thumbnail': r're:^https?://.*\.jpg',
41 'upload_date': '20211204',
42 'age_limit': 15,
43 'duration': 201,
44 'uploader': '옷소매 붉은 끝동',
45 'channel': '옷소매 붉은 끝동',
46 'channel_id': '27987',
47 'uploader_id': '27987',
48 'tags': 'count:20',
49 },
50 'params': {'skip_download': True}
51 }]
52
53 _QUALITY = {
54 '36': 2160,
55 '35': 1080,
56 '34': 720,
57 '33': 480,
58 '32': 360,
59 '31': 270,
60 }
61
62 def _real_extract(self, url):
63 id = self._match_id(url)
64 video_data = self._download_json(f'https://tv.nate.com/api/v1/clip/{id}', id)
65 formats = [{
66 'format_id': f_url[-2:],
67 'url': f_url,
68 'height': self._QUALITY.get(f_url[-2:]),
69 'quality': int_or_none(f_url[-2:]),
70 } for f_url in video_data.get('smcUriList') or []]
abc07b55
AG
71 return {
72 'id': id,
73 'title': video_data.get('clipTitle'),
74 'description': video_data.get('synopsis'),
75 'thumbnail': video_data.get('contentImg'),
76 'upload_date': unified_strdate(traverse_obj(video_data, 'broadDate', 'regDate')),
77 'age_limit': video_data.get('targetAge'),
78 'duration': video_data.get('playTime'),
79 'formats': formats,
80 'uploader': video_data.get('programTitle'),
81 'channel': video_data.get('programTitle'),
82 'channel_id': str_or_none(video_data.get('programSeq')),
83 'uploader_id': str_or_none(video_data.get('programSeq')),
84 'tags': video_data['hashTag'].split(',') if video_data.get('hashTag') else None,
85 }
86
87
88class NateProgramIE(InfoExtractor):
89 _VALID_URL = r'https?://tv\.nate\.com/program/clips/(?P<id>[0-9]+)'
90
91 _TESTS = [{
92 'url': 'https://tv.nate.com/program/clips/27987',
93 'playlist_mincount': 191,
94 'info_dict': {
95 'id': '27987',
96 },
97 }, {
98 'url': 'https://tv.nate.com/program/clips/3606',
99 'playlist_mincount': 15,
100 'info_dict': {
101 'id': '3606',
102 },
103 }]
104
105 def _entries(self, id):
106 for page_num in itertools.count(1):
107 program_data = self._download_json(f'https://tv.nate.com/api/v1/program/{id}/clip/ranking?size=20&page={page_num}',
108 id, note=f'Downloading page {page_num}')
109 for clip in program_data.get('content') or []:
110 clip_id = clip.get('clipSeq')
111 if clip_id:
112 yield self.url_result(
113 'https://tv.nate.com/clip/%s' % clip_id,
114 ie=NateIE.ie_key(), video_id=clip_id)
115 if program_data.get('last'):
116 break
117
118 def _real_extract(self, url):
119 id = self._match_id(url)
120 return self.playlist_result(self._entries(id), playlist_id=id)