]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/n1.py
[fragments] Pad fragments before decrypting (#1298)
[yt-dlp.git] / yt_dlp / extractor / n1.py
CommitLineData
fecb20a5 1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .youtube import YoutubeIE
7from .reddit import RedditRIE
8from .common import InfoExtractor
9from ..utils import (
10 unified_timestamp,
11 extract_attributes,
12)
13
14
15class N1InfoAssetIE(InfoExtractor):
16 _VALID_URL = r'https?://best-vod\.umn\.cdn\.united\.cloud/stream\?asset=(?P<id>[^&]+)'
17 _TESTS = [{
18 'url': 'https://best-vod.umn.cdn.united.cloud/stream?asset=ljsottomazilirija3060921-n1info-si-worldwide&stream=hp1400&t=0&player=m3u8v&sp=n1info&u=n1info&p=n1Sh4redSecre7iNf0',
19 'md5': '28b08b32aeaff2b8562736ccd5a66fe7',
20 'info_dict': {
21 'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
22 'ext': 'mp4',
23 'title': 'ljsottomazilirija3060921-n1info-si-worldwide',
24 }
25 }]
26
27 def _real_extract(self, url):
28 video_id = self._match_id(url)
29 formats = self._extract_m3u8_formats(
30 url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
31
32 self._sort_formats(formats)
33
34 return {
35 'id': video_id,
36 'title': video_id,
37 'formats': formats,
38 }
39
40
41class N1InfoIIE(InfoExtractor):
42 IE_NAME = 'N1Info:article'
43 _VALID_URL = r'https?://(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
44 _TESTS = [{
45 # Youtube embedded
46 'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
47 'md5': '01ddb6646d0fd9c4c7d990aa77fe1c5a',
48 'info_dict': {
49 'id': 'L5Hd4hQVUpk',
50 'ext': 'mp4',
51 'upload_date': '20210913',
52 'title': 'Ozmo i USO21, ep. 13: Novak Đoković – Danil Medvedev | Ključevi Poraza, Budućnost | SPORT KLUB TENIS',
53 'description': 'md5:467f330af1effedd2e290f10dc31bb8e',
54 'uploader': 'Sport Klub',
55 'uploader_id': 'sportklub',
56 }
57 }, {
58 'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/',
59 'info_dict': {
60 'id': 'bgmetrosot2409zta20210924174316682-n1info-rs-worldwide',
61 'ext': 'mp4',
62 'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode',
63 'upload_date': '20210924',
64 'timestamp': 1632481347,
65 },
66 'params': {
67 'skip_download': True,
68 },
69 }, {
70 'url': 'https://n1info.si/novice/slovenija/zadnji-dnevi-na-kopaliscu-ilirija-ilirija-ni-umrla-ubili-so-jo/',
71 'info_dict': {
72 'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
73 'ext': 'mp4',
74 'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”',
75 'timestamp': 1632567630,
76 'upload_date': '20210925',
77 },
78 'params': {
79 'skip_download': True,
80 },
81 }, {
82 # Reddit embedded
83 'url': 'https://ba.n1info.com/lifestyle/vucic-bolji-od-tita-ako-izgubi-ja-cu-da-crknem-jugoslavija-je-gotova/',
84 'info_dict': {
85 'id': '2wmfee9eycp71',
86 'ext': 'mp4',
87 'title': '"Ako Vučić izgubi izbore, ja ću da crknem, Jugoslavija je gotova"',
88 'upload_date': '20210924',
89 'timestamp': 1632448649.0,
90 'uploader': 'YouLotWhatDontStop',
91 },
92 'params': {
93 'format': 'bestvideo',
94 'skip_download': True,
95 },
96 }, {
97 'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
98 'only_matching': True,
99 }]
100
101 def _real_extract(self, url):
102 video_id = self._match_id(url)
103 webpage = self._download_webpage(url, video_id)
104
105 title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
106 timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
107
108 videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
109 entries = []
110 for video in videos:
111 video_data = extract_attributes(video)
112 entries.append({
113 '_type': 'url_transparent',
114 'url': video_data.get('data-url'),
115 'id': video_data.get('id'),
116 'title': title,
117 'thumbnail': video_data.get('data-thumbnail'),
118 'timestamp': timestamp,
119 'ie_key': N1InfoAssetIE.ie_key()})
120
121 embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
122 for embedded_video in embedded_videos:
123 video_data = extract_attributes(embedded_video)
124 url = video_data.get('src')
125 if url.startswith('https://www.youtube.com'):
126 entries.append(self.url_result(url, ie=YoutubeIE.ie_key()))
127 elif url.startswith('https://www.redditmedia.com'):
128 entries.append(self.url_result(url, ie=RedditRIE.ie_key()))
129
130 return {
131 '_type': 'playlist',
132 'id': video_id,
133 'title': title,
134 'timestamp': timestamp,
135 'entries': entries,
136 }