]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/cgtn.py
1 from . common
import InfoExtractor
8 class CGTNIE ( InfoExtractor
):
9 _VALID_URL
= r
'https?://news\.cgtn\.com/news/[0-9] {4} -[0-9] {2} -[0-9] {2} /[a-zA-Z0-9-]+-(?P<id>[a-zA-Z0-9-]+)/index\.html'
12 'url' : 'https://news.cgtn.com/news/2021-03-09/Up-and-Out-of-Poverty-Ep-1-A-solemn-promise-YuOUaOzGQU/index.html' ,
16 'title' : 'Up and Out of Poverty Ep. 1: A solemn promise' ,
17 'thumbnail' : r
're:^https?://.*\.jpg$' ,
18 'timestamp' : 1615295940 ,
19 'upload_date' : '20210309' ,
20 'categories' : [ 'Video' ],
26 'url' : 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html' ,
30 'title' : 'China, Indonesia vow to further deepen maritime cooperation' ,
31 'thumbnail' : r
're:^https?://.*\.png$' ,
32 'description' : 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.' ,
34 'categories' : [ 'China' ],
35 'timestamp' : 1622950200 ,
36 'upload_date' : '20210606' ,
39 'skip_download' : False
44 def _real_extract ( self
, url
):
45 video_id
= self
._ match
_ id
( url
)
46 webpage
= self
._ download
_ webpage
( url
, video_id
)
48 download_url
= self
._ html
_ search
_ regex
( r
'data-video ="(?P<url>.+m3u8)"' , webpage
, 'download_url' )
49 datetime_str
= self
._ html
_ search
_ regex
(
50 r
'<span class="date">\s*(.+?)\s*</span>' , webpage
, 'datetime_str' , fatal
= False )
51 category
= self
._ html
_ search
_ regex
(
52 r
'<span class="section">\s*(.+?)\s*</span>' , webpage
, 'category' , fatal
= False )
53 author
= self
._ search
_ regex
(
54 r
'<div class="news-author-name">\s*(.+?)\s*</div>' , webpage
, 'author' , default
= None )
58 'title' : self
._ og
_ search
_ title
( webpage
),
59 'description' : self
._ og
_ search
_ description
( webpage
, default
= None ),
60 'thumbnail' : self
._ og
_ search
_ thumbnail
( webpage
),
61 'formats' : self
._ extract
_ m
3u8_ formats
( download_url
, video_id
, 'mp4' , 'm3u8_native' , m3u8_id
= 'hls' ),
62 'categories' : [ category
] if category
else None ,
63 'creators' : [ author
] if author
else None ,
64 'timestamp' : try_get ( unified_timestamp ( datetime_str
), lambda x
: x
- 8 * 3600 ),