--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+ IE_NAME = 'video.arnes.si'
+ IE_DESC = 'Arnes Video'
+ _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
+ _TESTS = [{
+ 'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+ 'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+ 'info_dict': {
+ 'id': 'a1qrWTOQfVoU',
+ 'ext': 'mp4',
+ 'title': 'Linearna neodvisnost, definicija',
+ 'description': 'Linearna neodvisnost, definicija',
+ 'license': 'PRIVATE',
+ 'creator': 'Polona Oblak',
+ 'timestamp': 1585063725,
+ 'upload_date': '20200324',
+ 'channel': 'Polona Oblak',
+ 'channel_id': 'q6pc04hw24cj',
+ 'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+ 'duration': 596.75,
+ 'view_count': int,
+ 'tags': ['linearna_algebra'],
+ 'start_time': 10,
+ }
+ }, {
+ 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+ 'only_matching': True,
+ }]
+ _BASE_URL = 'https://video.arnes.si'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+ title = video['title']
+
+ formats = []
+ for media in (video.get('media') or []):
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ formats.append({
+ 'url': self._BASE_URL + media_url,
+ 'format_id': remove_start(media.get('format'), 'FORMAT_'),
+ 'format_note': media.get('formatTranslation'),
+ 'width': int_or_none(media.get('width')),
+ 'height': int_or_none(media.get('height')),
+ })
+ self._sort_formats(formats)
+
+ channel = video.get('channel') or {}
+ channel_id = channel.get('url')
+ thumbnail = video.get('thumbnailUrl')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': self._BASE_URL + thumbnail,
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'creator': video.get('author'),
+ 'timestamp': parse_iso8601(video.get('creationTime')),
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'view_count': int_or_none(video.get('views')),
+ 'tags': video.get('hashtags'),
+ 'start_time': int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+ }
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req)
- return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+ cookie = req.get_header('Cookie')
+ if cookie and sys.version_info[0] == 2:
+ cookie = str(cookie)
+ return compat_cookies.SimpleCookie(cookie)
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
ArteTVEmbedIE,
ArteTVPlaylistIE,
)
+from .arnes import ArnesIE
from .asiancrush import (
AsianCrushIE,
AsianCrushPlaylistIE,
PacktPubIE,
PacktPubCourseIE,
)
+from .palcomp3 import (
+ PalcoMP3IE,
+ PalcoMP3ArtistIE,
+ PalcoMP3VideoIE,
+)
from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class PalcoMP3BaseIE(InfoExtractor):
+ _GQL_QUERY_TMPL = '''{
+ artist(slug: "%s") {
+ %s
+ }
+}'''
+ _ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
+ %s
+ }'''
+ _MUSIC_FIELDS = '''duration
+ hls
+ mp3File
+ musicID
+ plays
+ title'''
+
+ def _call_api(self, artist_slug, artist_fields):
+ return self._download_json(
+ 'https://www.palcomp3.com.br/graphql/', artist_slug, query={
+ 'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
+ })['data']
+
+ def _parse_music(self, music):
+ music_id = compat_str(music['musicID'])
+ title = music['title']
+
+ formats = []
+ hls_url = music.get('hls')
+ if hls_url:
+ formats.append({
+ 'url': hls_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'mp4',
+ })
+ mp3_file = music.get('mp3File')
+ if mp3_file:
+ formats.append({
+ 'url': mp3_file,
+ })
+
+ return {
+ 'id': music_id,
+ 'title': title,
+ 'formats': formats,
+ 'duration': int_or_none(music.get('duration')),
+ 'view_count': int_or_none(music.get('plays')),
+ }
+
+ def _real_initialize(self):
+ self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
+
+ def _real_extract(self, url):
+ artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
+ artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
+ music = self._call_api(artist_slug, artist_fields)['artist']['music']
+ return self._parse_music(music)
+
+
+class PalcoMP3IE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:song'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
+ 'md5': '99fd6405b2d8fd589670f6db1ba3b358',
+ 'info_dict': {
+ 'id': '3162927',
+ 'ext': 'mp3',
+ 'title': 'Nossas Composições - CUIDA BEM DELA',
+ 'duration': 210,
+ 'view_count': int,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
+
+
+class PalcoMP3ArtistIE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:artist'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com.br/condedoforro/',
+ 'info_dict': {
+ 'id': '358396',
+ 'title': 'Conde do Forró',
+ },
+ 'playlist_mincount': 188,
+ }]
+ _ARTIST_FIELDS_TMPL = '''artistID
+ musics {
+ nodes {
+ %s
+ }
+ }
+ name'''
+
+ @ classmethod
+ def suitable(cls, url):
+ return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ artist_slug = self._match_id(url)
+ artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
+
+ def entries():
+ for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+ yield self._parse_music(music)
+
+ return self.playlist_result(
+ entries(), str_or_none(artist.get('artistID')), artist.get('name'))
+
+
+class PalcoMP3VideoIE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:video'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '_pD1nR2qqPg',
+ 'ext': 'mp4',
+ 'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
+ 'description': 'md5:7043342c09a224598e93546e98e49282',
+ 'upload_date': '20161107',
+ 'uploader_id': 'maiaramaraisaoficial',
+ 'uploader': 'Maiara e Maraisa',
+ }
+ }]
+ _MUSIC_FIELDS = 'youtubeID'
+
+ def _parse_music(self, music):
+ youtube_id = music['youtubeID']
+ return self.url_result(youtube_id, 'Youtube', youtube_id)
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+ get_element_by_class,
+ int_or_none,
+ remove_start,
+ strip_or_none,
+ unified_strdate,
+)
class ScreencastOMaticIE(InfoExtractor):
- _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
- _TEST = {
+ _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
'md5': '483583cb80d92588f15ccbedd90f0c18',
'info_dict': {
'title': 'Welcome to 3-4 Philosophy @ DECV!',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
- 'duration': 369.163,
+ 'duration': 369,
+ 'upload_date': '20141216',
}
- }
+ }, {
+ 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- jwplayer_data = self._parse_json(
- self._search_regex(
- r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
- video_id, transform_source=js_to_json)
-
- info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
- info_dict.update({
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
+ webpage = self._download_webpage(
+ 'https://screencast-o-matic.com/player/' + video_id, video_id)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info.update({
+ 'id': video_id,
+ 'title': get_element_by_class('overlayTitle', webpage),
+ 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
+ 'duration': int_or_none(self._search_regex(
+ r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
+ webpage, 'duration', default=None)),
+ 'upload_date': unified_strdate(remove_start(
+ get_element_by_class('overlayPublished', webpage), 'Published: ')),
})
- return info_dict
+ return info
(lambda x: x['ownerText']['runs'][0]['text'],
lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
return {
- '_type': 'url_transparent',
+ '_type': 'url',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
'url': video_id,
}, {
'url': 'https://www.youtube.com/TheYoungTurks/live',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/hashtag/cctv9',
+ 'info_dict': {
+ 'id': 'cctv9',
+ 'title': '#cctv9',
+ },
+ 'playlist_mincount': 350,
}]
@classmethod
for entry in self._post_thread_entries(renderer):
yield entry
+ r''' # unused
+ def _rich_grid_entries(self, contents):
+ for content in contents:
+ video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+ if video_renderer:
+ entry = self._video_entry(video_renderer)
+ if entry:
+ yield entry
+ '''
+
@staticmethod
def _build_continuation_query(continuation, ctp=None):
query = {
channel_name = renderer.get('title')
channel_url = renderer.get('channelUrl')
channel_id = renderer.get('externalId')
-
- if not renderer:
+ else:
renderer = try_get(
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
+
if renderer:
title = renderer.get('title')
description = renderer.get('description', '')
'width': int_or_none(t.get('width')),
'height': int_or_none(t.get('height')),
})
-
if playlist_id is None:
playlist_id = item_id
if title is None:
- title = playlist_id
+ title = (
+ try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
+ or playlist_id)
title += format_field(selected_tab, 'title', ' - %s')
metadata = {