]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/line.py
Update to ytdl-commit-4fb25ff
[yt-dlp.git] / yt_dlp / extractor / line.py
CommitLineData
08250b69
CHY
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
f7ad7160 7from ..compat import compat_str
8from ..utils import (
9 ExtractorError,
10 int_or_none,
11 js_to_json,
12 str_or_none,
13)
08250b69
CHY
14
15
16class LineTVIE(InfoExtractor):
17 _VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
18
19 _TESTS = [{
20 'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
21 'info_dict': {
22 'id': '793123_ep1-1',
23 'ext': 'mp4',
24 'title': 'Goodbye Mr.Black | EP.1-1',
25 'thumbnail': r're:^https?://.*\.jpg$',
26 'duration': 998.509,
27 'view_count': int,
28 },
29 }, {
30 'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
31 'only_matching': True,
32 }]
33
34 def _real_extract(self, url):
35 series_id, segment = re.match(self._VALID_URL, url).groups()
36 video_id = '%s_%s' % (series_id, segment)
37
38 webpage = self._download_webpage(url, video_id)
39
40 player_params = self._parse_json(self._search_regex(
41 r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
42 video_id, transform_source=js_to_json)
43
44 video_info = self._download_json(
45 'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
46 video_id, query={
47 'videoId': player_params['videoId'],
48 'key': player_params['key'],
49 })
50
51 stream = video_info['streams'][0]
52 extra_query = '?__gda__=' + stream['key']['value']
53 formats = self._extract_m3u8_formats(
54 stream['source'] + extra_query, video_id, ext='mp4',
55 entry_protocol='m3u8_native', m3u8_id='hls')
56
57 for a_format in formats:
58 a_format['url'] += extra_query
59
60 duration = None
61 for video in video_info.get('videos', {}).get('list', []):
62 encoding_option = video.get('encodingOption', {})
63 abr = video['bitrate']['audio']
64 vbr = video['bitrate']['video']
65 tbr = abr + vbr
66 formats.append({
67 'url': video['source'],
68 'format_id': 'http-%d' % int(tbr),
69 'height': encoding_option.get('height'),
70 'width': encoding_option.get('width'),
71 'abr': abr,
72 'vbr': vbr,
73 'filesize': video.get('size'),
74 })
75 if video.get('duration') and duration is None:
76 duration = video['duration']
77
78 self._sort_formats(formats)
79
80 if not formats[0].get('width'):
81 formats[0]['vcodec'] = 'none'
82
83 title = self._og_search_title(webpage)
84
85 # like_count requires an additional API request https://tv.line.me/api/likeit/getCount
f3672ac5 86
08250b69
CHY
87 return {
88 'id': video_id,
89 'title': title,
90 'formats': formats,
91 'extra_param_to_segment_url': extra_query[1:],
92 'duration': duration,
93 'thumbnails': [{'url': thumbnail['source']}
94 for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
95 'view_count': video_info.get('meta', {}).get('count'),
96 }
f7ad7160 97
98
99class LineLiveBaseIE(InfoExtractor):
100 _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
101
102 def _parse_broadcast_item(self, item):
103 broadcast_id = compat_str(item['id'])
104 title = item['title']
105 is_live = item.get('isBroadcastingNow')
106
107 thumbnails = []
108 for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
109 if not thumbnail_url:
110 continue
111 thumbnails.append({
112 'id': thumbnail_id,
113 'url': thumbnail_url,
114 })
115
116 channel = item.get('channel') or {}
117 channel_id = str_or_none(channel.get('id'))
118
119 return {
120 'id': broadcast_id,
121 'title': self._live_title(title) if is_live else title,
122 'thumbnails': thumbnails,
123 'timestamp': int_or_none(item.get('createdAt')),
124 'channel': channel.get('name'),
125 'channel_id': channel_id,
126 'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
127 'duration': int_or_none(item.get('archiveDuration')),
128 'view_count': int_or_none(item.get('viewerCount')),
129 'comment_count': int_or_none(item.get('chatCount')),
130 'is_live': is_live,
131 }
132
133
134class LineLiveIE(LineLiveBaseIE):
135 _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
136 _TESTS = [{
137 'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
138 'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
139 'info_dict': {
140 'id': '16331360',
141 'title': '振りコピ講座😙😙😙',
142 'ext': 'mp4',
143 'timestamp': 1617095132,
144 'upload_date': '20210330',
145 'channel': '白川ゆめか',
146 'channel_id': '4867368',
147 'view_count': int,
148 'comment_count': int,
149 'is_live': False,
150 }
151 }, {
152 # archiveStatus == 'DELETED'
153 'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
154 'only_matching': True,
155 }]
156
157 def _real_extract(self, url):
158 channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
159 broadcast = self._download_json(
160 self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
161 broadcast_id)
162 item = broadcast['item']
163 info = self._parse_broadcast_item(item)
164 protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
165 formats = []
166 for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
167 if not v:
168 continue
169 if k == 'abr':
170 formats.extend(self._extract_m3u8_formats(
171 v, broadcast_id, 'mp4', protocol,
172 m3u8_id='hls', fatal=False))
173 continue
174 f = {
175 'ext': 'mp4',
176 'format_id': 'hls-' + k,
177 'protocol': protocol,
178 'url': v,
179 }
180 if not k.isdigit():
181 f['vcodec'] = 'none'
182 formats.append(f)
183 if not formats:
184 archive_status = item.get('archiveStatus')
185 if archive_status != 'ARCHIVED':
186 raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
187 self._sort_formats(formats)
188 info['formats'] = formats
189 return info
190
191
192class LineLiveChannelIE(LineLiveBaseIE):
193 _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
194 _TEST = {
195 'url': 'https://live.line.me/channels/5893542',
196 'info_dict': {
197 'id': '5893542',
198 'title': 'いくらちゃん',
199 'description': 'md5:c3a4af801f43b2fac0b02294976580be',
200 },
201 'playlist_mincount': 29
202 }
203
204 def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
205 while True:
206 for row in (archived_broadcasts.get('rows') or []):
207 share_url = str_or_none(row.get('shareURL'))
208 if not share_url:
209 continue
210 info = self._parse_broadcast_item(row)
211 info.update({
212 '_type': 'url',
213 'url': share_url,
214 'ie_key': LineLiveIE.ie_key(),
215 })
216 yield info
217 if not archived_broadcasts.get('hasNextPage'):
218 return
219 archived_broadcasts = self._download_json(
220 self._API_BASE_URL + channel_id + '/archived_broadcasts',
221 channel_id, query={
222 'lastId': info['id'],
223 })
224
225 def _real_extract(self, url):
226 channel_id = self._match_id(url)
227 channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
228 return self.playlist_result(
229 self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
230 channel_id, channel.get('title'), channel.get('information'))