]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/abcnews.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
14 class AbcNewsVideoIE(AMPIE
):
15 IE_NAME
= 'abcnews:video'
21 (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
22 video/(?:embed|itemfeed)\?.*?\bid=
24 fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
30 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
34 'display_id': 'week-exclusive-irans-foreign-minister-zarif',
35 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
36 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
38 'thumbnail': r
're:^https?://.*\.jpg$',
39 'timestamp': 1380454200,
40 'upload_date': '20130929',
44 'skip_download': True,
47 'url': 'http://abcnews.go.com/video/embed?id=46979033',
48 'only_matching': True,
50 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
51 'only_matching': True,
53 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
54 'only_matching': True,
56 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
57 'only_matching': True,
60 def _real_extract(self
, url
):
61 mobj
= self
._match
_valid
_url
(url
)
62 display_id
= mobj
.group('display_id')
63 video_id
= mobj
.group('id')
64 info_dict
= self
._extract
_feed
_info
(
65 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id
)
68 'display_id': display_id
,
73 class AbcNewsIE(InfoExtractor
):
75 _VALID_URL
= r
'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
79 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
82 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
83 'description': 'Billingsley went from a child actor to Hollywood power player.',
87 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
91 'title': 'Justin Timberlake Drops Hints For Secret Single',
92 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
93 'upload_date': '20160505',
94 'timestamp': 1462442280,
98 'skip_download': True,
99 # The embedded YouTube video is blocked due to copyright issues
100 'playlist_items': '1',
102 'add_ie': ['AbcNewsVideo'],
104 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
105 'only_matching': True,
107 # inline.type == 'video'
108 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
109 'only_matching': True,
112 def _real_extract(self
, url
):
113 story_id
= self
._match
_id
(url
)
114 webpage
= self
._download
_webpage
(url
, story_id
)
115 story
= self
._parse
_json
(self
._search
_regex
(
116 r
"window\['__abcnews__'\]\s*=\s*({.+?});",
117 webpage
, 'data'), story_id
)['page']['content']['story']['everscroll'][0]
118 article_contents
= story
.get('articleContents') or {}
121 featured_video
= story
.get('featuredVideo') or {}
122 feed
= try_get(featured_video
, lambda x
: x
['video']['feed'])
126 'id': featured_video
.get('id'),
127 'title': featured_video
.get('name'),
129 'thumbnail': featured_video
.get('images'),
130 'description': featured_video
.get('description'),
131 'timestamp': parse_iso8601(featured_video
.get('uploadDate')),
132 'duration': parse_duration(featured_video
.get('duration')),
133 'ie_key': AbcNewsVideoIE
.ie_key(),
136 for inline
in (article_contents
.get('inlines') or []):
137 inline_type
= inline
.get('type')
138 if inline_type
== 'iframe':
139 iframe_url
= try_get(inline
, lambda x
: x
['attrs']['src'])
141 yield self
.url_result(iframe_url
)
142 elif inline_type
== 'video':
143 video_id
= inline
.get('id')
148 'url': 'http://abcnews.go.com/video/embed?id=' + video_id
,
149 'thumbnail': inline
.get('imgSrc') or inline
.get('imgDefault'),
150 'description': inline
.get('description'),
151 'duration': parse_duration(inline
.get('duration')),
152 'ie_key': AbcNewsVideoIE
.ie_key(),
155 return self
.playlist_result(
156 entries(), story_id
, article_contents
.get('headline'),
157 article_contents
.get('subHead'))