Closes #1609, Closes #3232, Closes #4763, Closes #6026, Closes #6322, Closes #7912
Authored by: seproDev
WyborczaPodcastIE,
WyborczaVideoIE,
)
-from .airmozilla import AirMozillaIE
from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
from .aljazeera import AlJazeeraIE
from .arnes import ArnesIE
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
-from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE
from .audimedia import AudiMediaIE
from .audioboom import AudioBoomIE
BitChuteIE,
BitChuteChannelIE,
)
-from .bitwave import (
- BitwaveReplayIE,
- BitwaveStreamIE,
-)
-from .biqle import BIQLEIE
from .blackboardcollaborate import BlackboardCollaborateIE
from .bleacherreport import (
BleacherReportIE,
from .box import BoxIE
from .boxcast import BoxCastVideoIE
from .bpb import BpbIE
-from .br import (
- BRIE,
- BRMediathekIE,
-)
+from .br import BRIE
from .bravotv import BravoTVIE
from .brainpop import (
BrainPOPIE,
BrainPOPFrIE,
BrainPOPIlIE,
)
-from .breakcom import BreakIE
from .breitbart import BreitBartIE
from .brightcove import (
BrightcoveLegacyIE,
from .cammodels import CamModelsIE
from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE
-from .camwithher import CamWithHerIE
from .canal1 import Canal1IE
from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .caracoltv import CaracolTvPlayIE
-from .carambatv import (
- CarambaTVIE,
- CarambaTVPageIE,
-)
from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
from .cellebrite import CellebriteIE
from .ceskatelevize import CeskaTelevizeIE
from .cgtn import CGTNIE
-from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE
ChingariIE,
ChingariUserIE,
)
-from .chirbit import (
- ChirbitIE,
- ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE
-from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE
from .cloudflarestream import CloudflareStreamIE
-from .cloudy import CloudyIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
DacastVODIE,
DacastPlaylistIE,
)
-from .daftsex import DaftsexIE
from .dailymail import DailyMailIE
from .dailymotion import (
DailymotionIE,
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE
-from .dotsub import DotsubIE
from .douyutv import (
DouyuShowIE,
DouyuTVIE,
DubokuPlaylistIE
)
from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
from .deuxm import (
DeuxMIE,
DeuxMNewsIE
from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
-from .echomsk import EchoMskIE
from .egghead import (
EggheadCourseIE,
EggheadLessonIE,
)
-from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .eitb import EitbIE
-from .elevensports import ElevenSportsIE
-from .ellentube import (
- EllenTubeIE,
- EllenTubeVideoIE,
- EllenTubePlaylistIE,
-)
from .elonet import ElonetIE
from .elpais import ElPaisIE
from .eltrecetv import ElTreceTVIE
from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
from .epicon import (
EpiconIE,
EpiconSeriesIE,
ERTFlixIE,
ERTWebtvEmbedIE,
)
-from .escapist import EscapistIE
from .espn import (
ESPNIE,
WatchESPNIE,
FiveThirtyEightIE,
ESPNCricInfoIE,
)
-from .esri import EsriVideoIE
from .ettutv import EttuTvIE
from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
from .euscreen import EUScreenIE
-from .expotv import ExpoTVIE
from .expressen import ExpressenIE
-from .extremetube import ExtremeTubeIE
from .eyedotv import EyedoTVIE
from .facebook import (
FacebookIE,
PornerBrosIE,
FuxIE,
)
-from .fourzerostudio import (
- FourZeroStudioArchiveIE,
- FourZeroStudioClipIE,
-)
from .fox import FOXIE
from .fox9 import (
FOX9IE,
FOX9NewsIE,
)
-from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
FoxNewsArticleIE,
)
from .funk import FunkIE
from .funker530 import Funker530IE
-from .fusion import FusionIE
from .fuyintv import FuyinTVIE
from .gab import (
GabTVIE,
GettrIE,
GettrStreamingIE,
)
-from .gfycat import GfycatIE
from .giantbomb import GiantBombIE
from .giga import GigaIE
from .glide import GlideIE
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
from .hgtv import HGTVComShowIE
from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hollywoodreporter import (
HollywoodReporterIE,
HotStarSeasonIE,
HotStarSeriesIE,
)
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
from .hrefli import HrefLiRedirectIE
from .hrfensehen import HRFernsehenIE
from .hrti import (
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
from .kelbyone import KelbyOneIE
from .khanacademy import (
KhanAcademyIE,
LA7PodcastEpisodeIE,
LA7PodcastIE,
)
-from .laola1tv import (
- Laola1TvEmbedIE,
- Laola1TvIE,
- EHFTVIE,
- ITTFIE,
-)
from .lastfm import (
LastFMIE,
LastFMPlaylistIE,
LinkedInLearningIE,
LinkedInLearningCourseIE,
)
-from .linuxacademy import LinuxAcademyIE
from .liputan6 import Liputan6IE
from .listennotes import ListenNotesIE
from .litv import LiTVIE
LyndaIE,
LyndaCourseIE
)
-from .m6 import M6IE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE
from .mailru import (
from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
-from .meta import METAIE
-from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
from .mgtv import MGTVIE
from .miaopai import MiaoPaiIE
from .microsoftstream import MicrosoftStreamIE
)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
-from .miomio import MioMioIE
from .mirrativ import (
MirrativIE,
MirrativUserIE,
MLBArticleIE,
)
from .mlssoccer import MLSSoccerIE
-from .mnet import MnetIE
from .mocha import MochaVideoIE
-from .moevideo import MoeVideoIE
-from .mofosex import (
- MofosexIE,
- MofosexEmbedIE,
-)
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
MotherlessGalleryIE,
)
from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
from .moviepilot import MoviepilotIE
from .moview import MoviewPlayIE
from .moviezine import MoviezineIE
MusicdexArtistIE,
MusicdexPlaylistIE,
)
-from .mwave import MwaveIE, MwaveMeetGreetIE
from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
)
-from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
-from .myvi import (
- MyviIE,
- MyviEmbedIE,
-)
from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE
from .mzaalo import MzaaloIE
NewgroundsUserIE,
)
from .newspicks import NewsPicksIE
-from .newstube import NewstubeIE
from .newsy import NewsyIE
from .nextmedia import (
NextMediaIE,
NickIE,
NickBrIE,
NickDeIE,
- NickNightIE,
NickRuIE,
)
from .niconico import (
from .nonktube import NonkTubeIE
from .noodlemagazine import NoodleMagazineIE
from .noovo import NoovoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
from .nosnl import NOSNLArticleIE
from .nova import (
NovaEmbedIE,
OnetPlIE,
)
from .onionstudios import OnionStudiosIE
-from .ooyala import (
- OoyalaIE,
- OoyalaExternalIE,
-)
from .opencast import (
OpencastIE,
OpencastPlaylistIE,
PalcoMP3ArtistIE,
PalcoMP3VideoIE,
)
-from .pandoratv import PandoraTVIE
from .panopto import (
PanoptoIE,
PanoptoListIE,
PelotonIE,
PelotonLiveIE
)
-from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (
PeriscopeIE,
PlatziIE,
PlatziCourseIE,
)
-from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE
-from .plays import PlaysTVIE
from .playstuff import PlayStuffIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
from .playwire import PlaywireIE
from .plutotv import PlutoTVIE
from .pluralsight import (
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .pornbox import PornboxIE
-from .porncom import PornComIE
from .pornflip import PornFlipIE
-from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
PornHubUserIE,
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
-from .pornez import PornezIE
from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
)
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
from .radiofrance import (
FranceCultureIE,
RadioFranceIE,
RCTIPlusTVIE,
)
from .rds import RDSIE
-from .recurbate import RecurbateIE
from .redbee import ParliamentLiveUKIE, RTBFIE
from .redbulltv import (
RedBullTVIE,
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
-from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
RTLLuLiveIE,
RTLLuRadioIE,
)
-from .rtl2 import (
- RTL2IE,
- RTL2YouIE,
- RTL2YouSeriesIE,
-)
+from .rtl2 import RTL2IE
from .rtnews import (
RTNewsIE,
RTDocumentryIE,
RTVEInfantilIE,
RTVETelevisionIE,
)
-from .rtvnh import RTVNHIE
from .rtvs import RTVSIE
from .rtvslo import RTVSLOIE
-from .ruhd import RUHDIE
from .rule34video import Rule34VideoIE
from .rumble import (
RumbleEmbedIE,
ShahidIE,
ShahidShowIE,
)
-from .shared import (
- SharedIE,
- VivoIE,
-)
from .sharevideos import ShareVideosEmbedIE
from .sibnet import SibnetEmbedIE
from .shemaroome import ShemarooMeIE
SpankBangIE,
SpankBangPlaylistIE,
)
-from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .spike import (
BellatorIE,
StoryFireSeriesIE,
)
from .streamable import StreamableIE
-from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streamff import StreamFFIE
from .streetvoice import StreetVoiceIE
SVTSeriesIE,
)
from .swearnet import SwearnetEpisodeIE
-from .swrmediathek import SWRMediathekIE
from .syvdk import SYVDKIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
ConanClassicIE,
)
from .teamtreehouse import TeamTreeHouseIE
-from .techtalks import TechTalksIE
from .ted import (
TedEmbedIE,
TedPlaylistIE,
TikTokLiveIE,
DouyinIE,
)
-from .tinypic import TinyPicIE
from .tmz import TMZIE
from .tnaflix import (
TNAFlixNetworkEmbedIE,
from .toggo import (
ToggoIE,
)
-from .tokentube import (
- TokentubeIE,
- TokentubeChannelIE
-)
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
TrillerUserIE,
TrillerShortIE,
)
-from .trilulilu import TriluliluIE
from .trovo import (
TrovoIE,
TrovoVodIE,
TuneInPodcastEpisodeIE,
TuneInShortenerIE,
)
-from .tunepk import TunePkIE
from .turbo import TurboIE
from .tv2 import (
TV2IE,
from .tviplayer import TVIPlayerIE
from .tvland import TVLandIE
from .tvn24 import TVN24IE
-from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
-from .tvnow import (
- TVNowIE,
- TVNowFilmIE,
- TVNowNewIE,
- TVNowSeasonIE,
- TVNowAnnualIE,
- TVNowShowIE,
-)
from .tvopengr import (
TVOpenGrWatchIE,
TVOpenGrEmbedIE,
)
from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
from .twitcasting import (
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
-from .unscripted import UnscriptedNewsVideoIE
from .unsupported import KnownDRMIE, KnownPiracyIE
from .uol import UOLIE
from .uplynk import (
from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
from .veo import VeoIE
from .veoh import (
VeohIE,
ViceArticleIE,
ViceShowIE,
)
-from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videa import VideaIE
from .videocampus_sachsen import (
VimmIE,
VimmRecordingIE,
)
-from .vimple import VimpleIE
from .vine import (
VineIE,
VineUserIE,
VKPlayLiveIE,
)
from .vocaroo import VocarooIE
-from .vodlocker import VodlockerIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
-from .voicerepublic import VoiceRepublicIE
from .voicy import (
VoicyIE,
VoicyChannelIE,
KetnetIE,
DagelijkseKostIE,
)
-from .vrak import VrakIE
-from .vrv import (
- VRVIE,
- VRVSeriesIE,
-)
-from .vshare import VShareIE
from .vtm import VTMIE
from .medialaan import MedialaanIE
from .vuclip import VuClipIE
-from .vupload import VuploadIE
from .vvvvid import (
VVVVIDIE,
VVVVIDShowIE,
)
-from .vyborymos import VyboryMosIE
-from .vzaar import VzaarIE
-from .wakanim import WakanimIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
WASDTVClipIE,
)
from .wat import WatIE
-from .watchbox import WatchBoxIE
-from .watchindianporn import WatchIndianPornIE
from .wdr import (
WDRIE,
WDRPageIE,
from .weyyak import WeyyakIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE
-from .willow import WillowIE
from .wimbledon import WimbledonIE
from .wimtv import WimTVIE
from .whowatch import WhoWatchIE
WykopPostCommentIE,
)
from .xanimu import XanimuIE
-from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import (
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
from .xvideos import (
XVideosIE,
XVideosQuickiesIE
YappyIE,
YappyProfileIE,
)
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
from .yle_areena import YleAreenaIE
-from .ynet import YnetIE
from .youjizz import YouJizzIE
from .youku import (
YoukuIE,
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_iso8601,
-)
-
-
-class AirMozillaIE(InfoExtractor):
- _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
- _TEST = {
- 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
- 'md5': '8d02f53ee39cf006009180e21df1f3ba',
- 'info_dict': {
- 'id': '6x4q2w',
- 'ext': 'mp4',
- 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
- 'thumbnail': r're:https?://.*/poster\.jpg',
- 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
- 'timestamp': 1422487800,
- 'upload_date': '20150128',
- 'location': 'SFO Commons',
- 'duration': 3780,
- 'view_count': int,
- 'categories': ['Main', 'Privacy'],
- }
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
-
- embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
- jwconfig = self._parse_json(self._search_regex(
- r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
-
- info_dict = self._parse_jwplayer_data(jwconfig, video_id)
- view_count = int_or_none(self._html_search_regex(
- r'Views since archived: ([0-9]+)',
- webpage, 'view count', fatal=False))
- timestamp = parse_iso8601(self._html_search_regex(
- r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
- duration = parse_duration(self._search_regex(
- r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
- webpage, 'duration', fatal=False))
-
- info_dict.update({
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'url': self._og_search_url(webpage),
- 'display_id': display_id,
- 'description': self._og_search_description(webpage),
- 'timestamp': timestamp,
- 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
- 'duration': duration,
- 'view_count': view_count,
- 'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
- })
-
- return info_dict
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import unified_strdate
-
-
-class ATTTechChannelIE(InfoExtractor):
- _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
- _TEST = {
- 'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
- 'info_dict': {
- 'id': '11316',
- 'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
- 'ext': 'flv',
- 'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
- 'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20140127',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_url = self._search_regex(
- r"url\s*:\s*'(rtmp://[^']+)'",
- webpage, 'video URL')
-
- video_id = self._search_regex(
- r'mediaid\s*=\s*(\d+)',
- webpage, 'video id', fatal=False)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- upload_date = unified_strdate(self._search_regex(
- r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
- webpage, 'upload date', fatal=False), False)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'url': video_url,
- 'ext': 'flv',
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- }
class BehindKinkIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
_TEST = {
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate
-# TODO Remove - Reason: Outdated Site
-
class BetIE(MTVServicesInfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [
{
class BFIPlayerIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'bfi:player'
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
_TEST = {
+++ /dev/null
-from .common import InfoExtractor
-from .vk import VKIE
-from ..compat import compat_b64decode
-from ..utils import (
- int_or_none,
- js_to_json,
- traverse_obj,
- unified_timestamp,
-)
-
-
-class BIQLEIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
- _TESTS = [{
- 'url': 'https://biqle.ru/watch/-2000421746_85421746',
- 'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
- 'info_dict': {
- 'id': '-2000421746_85421746',
- 'ext': 'mp4',
- 'title': 'Forsaken By Hope Studio Clip',
- 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
- 'upload_date': '19700101',
- 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
- 'timestamp': 0,
- },
- }, {
- 'url': 'http://biqle.org/watch/-44781847_168547604',
- 'md5': '7f24e72af1db0edf7c1aaba513174f97',
- 'info_dict': {
- 'id': '-44781847_168547604',
- 'ext': 'mp4',
- 'title': 'Ребенок в шоке от автоматической мойки',
- 'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
- 'timestamp': 1396633454,
- 'upload_date': '20140404',
- 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_meta('name', webpage, 'Title', fatal=False)
- timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
- description = self._html_search_meta('description', webpage, 'Description', default=None)
-
- global_embed_url = self._search_regex(
- r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
- webpage, 'global Embed url')
- hash = self._search_regex(
- r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
-
- embed_url = global_embed_url + hash
-
- if VKIE.suitable(embed_url):
- return self.url_result(embed_url, VKIE.ie_key(), video_id)
-
- embed_page = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
-
- glob_params = self._parse_json(self._search_regex(
- r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
- embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
- host_name = compat_b64decode(glob_params['server'][::-1]).decode()
-
- item = self._download_json(
- f'https://{host_name}/method/video.get/{video_id}', video_id,
- headers={'Referer': url}, query={
- 'token': glob_params['video']['access_token'],
- 'videos': video_id,
- 'ckey': glob_params['c_key'],
- 'credentials': glob_params['video']['credentials'],
- })['response']['items'][0]
-
- formats = []
- for f_id, f_url in item.get('files', {}).items():
- if f_id == 'external':
- return self.url_result(f_url)
- ext, height = f_id.split('_')
- height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
- if height_extra_key:
- formats.append({
- 'format_id': f'{height}p',
- 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
- 'height': int_or_none(height),
- 'ext': ext,
- })
-
- thumbnails = []
- for k, v in item.items():
- if k.startswith('photo_') and v:
- width = k.replace('photo_', '')
- thumbnails.append({
- 'id': width,
- 'url': v,
- 'width': int_or_none(width),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'comment_count': int_or_none(item.get('comments')),
- 'description': description,
- 'duration': int_or_none(item.get('duration')),
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'view_count': int_or_none(item.get('views')),
- }
+++ /dev/null
-from .common import InfoExtractor
-
-
-class BitwaveReplayIE(InfoExtractor):
- IE_NAME = 'bitwave:replay'
- _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
- _TEST = {
- 'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
- 'only_matching': True
- }
-
- def _real_extract(self, url):
- replay_id = self._match_id(url)
- replay = self._download_json(
- 'https://api.bitwave.tv/v1/replays/' + replay_id,
- replay_id
- )
-
- return {
- 'id': replay_id,
- 'title': replay['data']['title'],
- 'uploader': replay['data']['name'],
- 'uploader_id': replay['data']['name'],
- 'url': replay['data']['url'],
- 'thumbnails': [
- {'url': x} for x in replay['data']['thumbnails']
- ],
- }
-
-
-class BitwaveStreamIE(InfoExtractor):
- IE_NAME = 'bitwave:stream'
- _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
- _TEST = {
- 'url': 'https://bitwave.tv/doomtube',
- 'only_matching': True
- }
-
- def _real_extract(self, url):
- username = self._match_id(url)
- channel = self._download_json(
- 'https://api.bitwave.tv/v1/channels/' + username,
- username)
-
- formats = self._extract_m3u8_formats(
- channel['data']['url'], username,
- 'mp4')
-
- return {
- 'id': username,
- 'title': channel['data']['title'],
- 'uploader': username,
- 'uploader_id': username,
- 'formats': formats,
- 'thumbnail': channel['data']['thumbnail'],
- 'is_live': True,
- 'view_count': channel['data']['viewCount']
- }
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
- 'add_ie': ['Ooyala'],
+ 'skip': 'Video removed',
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': '6a5cd403418c7b01719248ca97fb0692',
video_type = video['type']
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
- elif video_type == 'ooyala.com':
- info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
-import json
-
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
int_or_none,
parse_duration,
- parse_iso8601,
xpath_element,
xpath_text,
)
class BRIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'Bayerischer Rundfunk'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails
-
-
-class BRMediathekIE(InfoExtractor):
- IE_DESC = 'Bayerischer Rundfunk Mediathek'
- _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek//?video/(?:[^/?&#]+?-)?(?P<id>av:[0-9a-f]{24})'
-
- _TESTS = [{
- 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
- 'md5': 'fdc3d485835966d1622587d08ba632ec',
- 'info_dict': {
- 'id': 'av:5a1e6a6e8fce6d001871cc8e',
- 'ext': 'mp4',
- 'title': 'Die Sendung vom 28.11.2017',
- 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
- 'timestamp': 1511942766,
- 'upload_date': '20171129',
- }
- }, {
- 'url': 'https://www.br.de/mediathek//video/av:61b0db581aed360007558c12',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- clip_id = self._match_id(url)
-
- clip = self._download_json(
- 'https://proxy-base.master.mango.express/graphql',
- clip_id, data=json.dumps({
- "query": """{
- viewer {
- clip(id: "%s") {
- title
- description
- duration
- createdAt
- ageRestriction
- videoFiles {
- edges {
- node {
- publicLocation
- fileSize
- videoProfile {
- width
- height
- bitrate
- encoding
- }
- }
- }
- }
- captionFiles {
- edges {
- node {
- publicLocation
- }
- }
- }
- teaserImages {
- edges {
- node {
- imageFiles {
- edges {
- node {
- publicLocation
- width
- height
- }
- }
- }
- }
- }
- }
- }
- }
-}""" % clip_id}).encode(), headers={
- 'Content-Type': 'application/json',
- })['data']['viewer']['clip']
- title = clip['title']
-
- formats = []
- for edge in clip.get('videoFiles', {}).get('edges', []):
- node = edge.get('node', {})
- n_url = node.get('publicLocation')
- if not n_url:
- continue
- ext = determine_ext(n_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- n_url, clip_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- video_profile = node.get('videoProfile', {})
- tbr = int_or_none(video_profile.get('bitrate'))
- format_id = 'http'
- if tbr:
- format_id += '-%d' % tbr
- formats.append({
- 'format_id': format_id,
- 'url': n_url,
- 'width': int_or_none(video_profile.get('width')),
- 'height': int_or_none(video_profile.get('height')),
- 'tbr': tbr,
- 'filesize': int_or_none(node.get('fileSize')),
- })
-
- subtitles = {}
- for edge in clip.get('captionFiles', {}).get('edges', []):
- node = edge.get('node', {})
- n_url = node.get('publicLocation')
- if not n_url:
- continue
- subtitles.setdefault('de', []).append({
- 'url': n_url,
- })
-
- thumbnails = []
- for edge in clip.get('teaserImages', {}).get('edges', []):
- for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
- node = image_edge.get('node', {})
- n_url = node.get('publicLocation')
- if not n_url:
- continue
- thumbnails.append({
- 'url': n_url,
- 'width': int_or_none(node.get('width')),
- 'height': int_or_none(node.get('height')),
- })
-
- return {
- 'id': clip_id,
- 'title': title,
- 'description': clip.get('description'),
- 'duration': int_or_none(clip.get('duration')),
- 'timestamp': parse_iso8601(clip.get('createdAt')),
- 'age_limit': int_or_none(clip.get('ageRestriction')),
- 'formats': formats,
- 'subtitles': subtitles,
- 'thumbnails': thumbnails,
- }
+++ /dev/null
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..utils import (
- int_or_none,
- url_or_none,
-)
-
-
-class BreakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
- _TESTS = [{
- 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
- 'info_dict': {
- 'id': '2468056',
- 'ext': 'mp4',
- 'title': 'When Girls Act Like D-Bags',
- 'age_limit': 13,
- },
- }, {
- # youtube embed
- 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
- 'info_dict': {
- 'id': 'RrrDLdeL2HQ',
- 'ext': 'mp4',
- 'title': 'Whale Watching Boat Crashing Into San Diego Dock',
- 'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
- 'upload_date': '20160331',
- 'uploader': 'Steve Holden',
- 'uploader_id': 'sdholden07',
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- 'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id, video_id = self._match_valid_url(url).groups()
-
- webpage = self._download_webpage(url, display_id)
-
- youtube_url = YoutubeIE._extract_url(webpage)
- if youtube_url:
- return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
-
- content = self._parse_json(
- self._search_regex(
- r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
- 'content'),
- display_id)
-
- formats = []
- for video in content:
- video_url = url_or_none(video.get('url'))
- if not video_url:
- continue
- bitrate = int_or_none(self._search_regex(
- r'(\d+)_kbps', video_url, 'tbr', default=None))
- formats.append({
- 'url': video_url,
- 'format_id': 'http-%d' % bitrate if bitrate else 'http',
- 'tbr': bitrate,
- })
-
- title = self._search_regex(
- (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
- r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
-
- def get(key, name):
- return int_or_none(self._search_regex(
- r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
- default=None))
-
- age_limit = get('ratings', 'age limit')
- video_id = video_id or get('pid', 'video id') or display_id
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'age_limit': age_limit,
- 'formats': formats,
- }
class BYUtvIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{
- # ooyalaVOD
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
'params': {
'skip_download': True,
},
- 'add_ie': ['Ooyala'],
}, {
# dvr
'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
'x-byutv-platformkey': 'xsaaw9c7y5',
})
- ep = video.get('ooyalaVOD')
- if ep:
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % ep['providerId'],
- 'id': video_id,
- 'display_id': display_id,
- 'title': ep.get('title'),
- 'description': ep.get('description'),
- 'thumbnail': ep.get('imageThumbnail'),
- }
-
info = {}
formats = []
subtitles = {}
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- unified_strdate,
-)
-
-
-class CamWithHerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
-
- _TESTS = [{
- 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
- 'info_dict': {
- 'id': '5644',
- 'ext': 'flv',
- 'title': 'Periscope Tease',
- 'description': 'In the clouds teasing on periscope to my favorite song',
- 'duration': 240,
- 'view_count': int,
- 'comment_count': int,
- 'uploader': 'MileenaK',
- 'upload_date': '20160322',
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
- 'only_matching': True,
- }, {
- 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
- 'only_matching': True,
- }, {
- 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- flv_id = self._html_search_regex(
- r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
-
- # Video URL construction algorithm is reverse-engineered from cwhplayer.swf
- rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
- ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
-
- title = self._html_search_regex(
- r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
- description = self._html_search_regex(
- r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
-
- runtime = self._search_regex(
- r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
- if runtime:
- runtime = re.sub(r'[\s-]', '', runtime)
- duration = parse_duration(runtime)
- view_count = int_or_none(self._search_regex(
- r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
- comment_count = int_or_none(self._search_regex(
- r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
-
- uploader = self._search_regex(
- r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
- upload_date = unified_strdate(self._search_regex(
- r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
-
- return {
- 'id': flv_id,
- 'url': rtmp_url,
- 'ext': 'flv',
- 'no_resume': True,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'uploader': uploader,
- 'upload_date': upload_date,
- 'age_limit': 18
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- format_field,
- float_or_none,
- int_or_none,
- try_get,
-)
-
-from .videomore import VideomoreIE
-
-
-class CarambaTVIE(InfoExtractor):
- _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://video1.carambatv.ru/v/191910501',
- 'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
- 'info_dict': {
- 'id': '191910501',
- 'ext': 'mp4',
- 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 2678.31,
- },
- }, {
- 'url': 'carambatv:191910501',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
- video_id)
-
- title = video['title']
-
- base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
-
- formats = [{
- 'url': base_url + f['fn'],
- 'height': int_or_none(f.get('height')),
- 'format_id': format_field(f, 'height', '%sp'),
- } for f in video['qualities'] if f.get('fn')]
-
- thumbnail = video.get('splash')
- duration = float_or_none(try_get(
- video, lambda x: x['annotations'][0]['end_time'], compat_str))
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class CarambaTVPageIE(InfoExtractor):
- _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
- 'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
- 'info_dict': {
- 'id': '475222',
- 'ext': 'flv',
- 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
- 'thumbnail': r're:^https?://.*\.jpg',
- # duration reported by videomore is incorrect
- 'duration': int,
- },
- 'add_ie': [VideomoreIE.ie_key()],
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- videomore_url = VideomoreIE._extract_url(webpage)
- if not videomore_url:
- videomore_id = self._search_regex(
- r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
- default=None)
- if videomore_id:
- videomore_url = 'videomore:%s' % videomore_id
- if videomore_url:
- title = self._og_search_title(webpage)
- return {
- '_type': 'url_transparent',
- 'url': videomore_url,
- 'ie_key': VideomoreIE.ie_key(),
- 'title': title,
- }
-
- video_url = self._og_search_property('video:iframe', webpage, default=None)
-
- if not video_url:
- video_id = self._search_regex(
- r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
- webpage, 'video id')
- video_url = 'carambatv:%s' % video_id
-
- return self.url_result(video_url, CarambaTVIE.ie_key())
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- parse_iso8601,
- qualities,
- unescapeHTML,
-)
-
-
-class Channel9IE(InfoExtractor):
- IE_DESC = 'Channel 9'
- IE_NAME = 'channel9'
- _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
- _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b']
-
- _TESTS = [{
- 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
- 'md5': '32083d4eaf1946db6d454313f44510ca',
- 'info_dict': {
- 'id': '6c413323-383a-49dc-88f9-a22800cab024',
- 'ext': 'wmv',
- 'title': 'Developer Kick-Off Session: Stuff We Love',
- 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
- 'duration': 4576,
- 'thumbnail': r're:https?://.*\.jpg',
- 'timestamp': 1377717420,
- 'upload_date': '20130828',
- 'session_code': 'KOS002',
- 'session_room': 'Arena 1A',
- 'session_speakers': 'count:5',
- },
- }, {
- 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
- 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
- 'info_dict': {
- 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
- 'ext': 'wmv',
- 'title': 'Self-service BI with Power BI - nuclear testing',
- 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
- 'duration': 1540,
- 'thumbnail': r're:https?://.*\.jpg',
- 'timestamp': 1386381991,
- 'upload_date': '20131207',
- 'authors': ['Mike Wilmot'],
- },
- }, {
- # low quality mp4 is best
- 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
- 'info_dict': {
- 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
- 'ext': 'mp4',
- 'title': 'Ranges for the Standard Library',
- 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
- 'duration': 5646,
- 'thumbnail': r're:https?://.*\.jpg',
- 'upload_date': '20150930',
- 'timestamp': 1443640735,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
- 'info_dict': {
- 'id': 'Events/DEVintersection/DEVintersection-2016',
- 'title': 'DEVintersection 2016 Orlando Sessions',
- },
- 'playlist_mincount': 14,
- }, {
- 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
- 'only_matching': True,
- }, {
- 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
- 'only_matching': True,
- }]
-
- _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
-
- def _extract_list(self, video_id, rss_url=None):
- if not rss_url:
- rss_url = self._RSS_URL % video_id
- rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
- entries = [self.url_result(session_url.text, 'Channel9')
- for session_url in rss.findall('./channel/item/link')]
- title_text = rss.find('./channel/title').text
- return self.playlist_result(entries, video_id, title_text)
-
- def _real_extract(self, url):
- content_path, rss = self._match_valid_url(url).groups()
-
- if rss:
- return self._extract_list(content_path, url)
-
- webpage = self._download_webpage(
- url, content_path, 'Downloading web page')
-
- episode_data = self._search_regex(
- r"data-episode='([^']+)'", webpage, 'episode data', default=None)
- if episode_data:
- episode_data = self._parse_json(unescapeHTML(
- episode_data), content_path)
- content_id = episode_data['contentId']
- is_session = '/Sessions(' in episode_data['api']
- content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
- if is_session:
- content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
- else:
- content_url += 'Authors,Body&$expand=Authors'
- content_data = self._download_json(content_url, content_id)
- title = content_data['Title']
-
- QUALITIES = (
- 'mp3',
- 'wmv', 'mp4',
- 'wmv-low', 'mp4-low',
- 'wmv-mid', 'mp4-mid',
- 'wmv-high', 'mp4-high',
- )
-
- quality_key = qualities(QUALITIES)
-
- def quality(quality_id, format_url):
- return (len(QUALITIES) if '_Source.' in format_url
- else quality_key(quality_id))
-
- formats = []
- urls = set()
-
- SITE_QUALITIES = {
- 'MP3': 'mp3',
- 'MP4': 'mp4',
- 'Low Quality WMV': 'wmv-low',
- 'Low Quality MP4': 'mp4-low',
- 'Mid Quality WMV': 'wmv-mid',
- 'Mid Quality MP4': 'mp4-mid',
- 'High Quality WMV': 'wmv-high',
- 'High Quality MP4': 'mp4-high',
- }
-
- formats_select = self._search_regex(
- r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
- 'formats select', default=None)
- if formats_select:
- for mobj in re.finditer(
- r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
- formats_select):
- format_url = mobj.group('url')
- if format_url in urls:
- continue
- urls.add(format_url)
- format_id = mobj.group('format')
- quality_id = SITE_QUALITIES.get(format_id, format_id)
- formats.append({
- 'url': format_url,
- 'format_id': quality_id,
- 'quality': quality(quality_id, format_url),
- 'vcodec': 'none' if quality_id == 'mp3' else None,
- })
-
- API_QUALITIES = {
- 'VideoMP4Low': 'mp4-low',
- 'VideoWMV': 'wmv-mid',
- 'VideoMP4Medium': 'mp4-mid',
- 'VideoMP4High': 'mp4-high',
- 'VideoWMVHQ': 'wmv-hq',
- }
-
- for format_id, q in API_QUALITIES.items():
- q_url = content_data.get(format_id)
- if not q_url or q_url in urls:
- continue
- urls.add(q_url)
- formats.append({
- 'url': q_url,
- 'format_id': q,
- 'quality': quality(q, q_url),
- })
-
- slides = content_data.get('Slides')
- zip_file = content_data.get('ZipFile')
-
- if not formats and not slides and not zip_file:
- self.raise_no_formats(
- 'None of recording, slides or zip are available for %s' % content_path)
-
- subtitles = {}
- for caption in content_data.get('Captions', []):
- caption_url = caption.get('Url')
- if not caption_url:
- continue
- subtitles.setdefault(caption.get('Language', 'en'), []).append({
- 'url': caption_url,
- 'ext': 'vtt',
- })
-
- common = {
- 'id': content_id,
- 'title': title,
- 'description': clean_html(content_data.get('Description') or content_data.get('Body')),
- 'thumbnail': content_data.get('VideoPlayerPreviewImage'),
- 'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
- 'timestamp': parse_iso8601(content_data.get('PublishedDate')),
- 'avg_rating': int_or_none(content_data.get('Rating')),
- 'rating_count': int_or_none(content_data.get('RatingCount')),
- 'view_count': int_or_none(content_data.get('Views')),
- 'comment_count': int_or_none(content_data.get('CommentCount')),
- 'subtitles': subtitles,
- }
- if is_session:
- speakers = []
- for s in content_data.get('Speakers', []):
- speaker_name = s.get('FullName')
- if not speaker_name:
- continue
- speakers.append(speaker_name)
-
- common.update({
- 'session_code': content_data.get('Code'),
- 'session_room': content_data.get('Room'),
- 'session_speakers': speakers,
- })
- else:
- authors = []
- for a in content_data.get('Authors', []):
- author_name = a.get('DisplayName')
- if not author_name:
- continue
- authors.append(author_name)
- common['authors'] = authors
-
- contents = []
-
- if slides:
- d = common.copy()
- d.update({'title': title + '-Slides', 'url': slides})
- contents.append(d)
-
- if zip_file:
- d = common.copy()
- d.update({'title': title + '-Zip', 'url': zip_file})
- contents.append(d)
-
- if formats:
- d = common.copy()
- d.update({'title': title, 'formats': formats})
- contents.append(d)
- return self.playlist_result(contents)
- else:
- return self._extract_list(content_path)
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_b64decode
-from ..utils import parse_duration
-
-
-class ChirbitIE(InfoExtractor):
- IE_NAME = 'chirbit'
- _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://chirb.it/be2abG',
- 'info_dict': {
- 'id': 'be2abG',
- 'ext': 'mp3',
- 'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
- 'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
- 'duration': 306,
- 'uploader': 'Gerryaudio',
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
- 'only_matching': True,
- }, {
- 'url': 'https://chirb.it/wp/MN58c2',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- audio_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://chirb.it/%s' % audio_id, audio_id)
-
- data_fd = self._search_regex(
- r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'data fd', group='url')
-
- # Reverse engineered from https://chirb.it/js/chirbit.player.js (look
- # for soundURL)
- audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
-
- title = self._search_regex(
- r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
- description = self._search_regex(
- r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
- webpage, 'description', default=None)
- duration = parse_duration(self._search_regex(
- r'class=["\']c-length["\'][^>]*>([^<]+)',
- webpage, 'duration', fatal=False))
- uploader = self._search_regex(
- r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
- webpage, 'uploader', fatal=False)
-
- return {
- 'id': audio_id,
- 'url': audio_url,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'uploader': uploader,
- }
-
-
-class ChirbitProfileIE(InfoExtractor):
- IE_NAME = 'chirbit:profile'
- _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
- _TEST = {
- 'url': 'http://chirbit.com/ScarletBeauty',
- 'info_dict': {
- 'id': 'ScarletBeauty',
- },
- 'playlist_mincount': 3,
- }
-
- def _real_extract(self, url):
- profile_id = self._match_id(url)
-
- webpage = self._download_webpage(url, profile_id)
-
- entries = [
- self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
- for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
-
- return self.playlist_result(entries, profile_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- unified_strdate,
- xpath_text,
-)
-
-
-class CinchcastIE(InfoExtractor):
- _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
- _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1']
-
- _TESTS = [{
- 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
- 'info_dict': {
- 'id': '5258197',
- 'ext': 'mp3',
- 'title': 'Train Your Brain to Up Your Game with Coach Mandy',
- 'upload_date': '20130816',
- },
- }, {
- # Actual test is run in generic, look for undergroundwellness
- 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- doc = self._download_xml(
- 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
- video_id)
-
- item = doc.find('.//item')
- title = xpath_text(item, './title', fatal=True)
- date_str = xpath_text(
- item, './{http://developer.longtailvideo.com/trac/}date')
- upload_date = unified_strdate(date_str, day_first=False)
- # duration is present but wrong
- formats = [{
- 'format_id': 'main',
- 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
- }]
- backup_url = xpath_text(
- item, './{http://developer.longtailvideo.com/trac/}backupContent')
- if backup_url:
- formats.append({
- 'preference': 2, # seems to be more reliable
- 'format_id': 'backup',
- 'url': backup_url,
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'upload_date': upload_date,
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- find_xpath_attr,
- fix_xml_ampersands
-)
-
-
-class ClipsyndicateIE(InfoExtractor):
- _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
- 'md5': '4d7d549451bad625e0ff3d7bd56d776c',
- 'info_dict': {
- 'id': '4629301',
- 'ext': 'mp4',
- 'title': 'Brick Briscoe',
- 'duration': 612,
- 'thumbnail': r're:^https?://.+\.jpg',
- },
- }, {
- 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- js_player = self._download_webpage(
- 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
- video_id, 'Downlaoding player')
- # it includes a required token
- flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
-
- pdoc = self._download_xml(
- 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
- video_id, 'Downloading video info',
- transform_source=fix_xml_ampersands)
-
- track_doc = pdoc.find('trackList/track')
-
- def find_param(name):
- node = find_xpath_attr(track_doc, './/param', 'name', name)
- if node is not None:
- return node.attrib['value']
-
- return {
- 'id': video_id,
- 'title': find_param('title'),
- 'url': track_doc.find('location').text,
- 'thumbnail': find_param('thumbnail'),
- 'duration': int(find_param('duration')),
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- str_to_int,
- unified_strdate,
-)
-
-
-class CloudyIE(InfoExtractor):
- _IE_DESC = 'cloudy.ec'
- _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
- _TESTS = [{
- 'url': 'https://www.cloudy.ec/v/af511e2527aac',
- 'md5': '29832b05028ead1b58be86bf319397ca',
- 'info_dict': {
- 'id': 'af511e2527aac',
- 'ext': 'mp4',
- 'title': 'Funny Cats and Animals Compilation june 2013',
- 'upload_date': '20130913',
- 'view_count': int,
- }
- }, {
- 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://www.cloudy.ec/embed.php', video_id, query={
- 'id': video_id,
- 'playerPage': 1,
- 'autoplay': 1,
- })
-
- info = self._parse_html5_media_entries(url, webpage, video_id)[0]
-
- webpage = self._download_webpage(
- 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
-
- if webpage:
- info.update({
- 'title': self._search_regex(
- r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
- 'upload_date': unified_strdate(self._search_regex(
- r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
- 'upload date', fatal=False)),
- 'view_count': str_to_int(self._search_regex(
- r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
- })
-
- if not info.get('title'):
- info['title'] = video_id
-
- info['id'] = video_id
-
- return info
class ClubicIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
_TESTS = [{
class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
IE_NAME = 'cmt.com'
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_b64decode
-from ..utils import (
- ExtractorError,
- int_or_none,
- js_to_json,
- parse_count,
- parse_duration,
- traverse_obj,
- try_get,
- unified_timestamp,
-)
-
-
-class DaftsexIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)'
- _TESTS = [{
- 'url': 'https://daft.sex/watch/-35370899_456246186',
- 'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd',
- 'info_dict': {
- 'id': '-35370899_456246186',
- 'ext': 'mp4',
- 'title': 'just relaxing',
- 'description': 'just relaxing – Watch video Watch video in high quality',
- 'upload_date': '20201113',
- 'timestamp': 1605261911,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'duration': 15.0,
- 'view_count': int
- },
- }, {
- 'url': 'https://daft.sex/watch/-156601359_456242791',
- 'info_dict': {
- 'id': '-156601359_456242791',
- 'ext': 'mp4',
- 'title': 'Skye Blue - Dinner And A Show',
- 'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality',
- 'upload_date': '20200916',
- 'timestamp': 1600250735,
- 'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
- },
- 'skip': 'deleted / private'
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- title = self._html_search_meta('name', webpage, 'title')
- timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
- description = self._html_search_meta('description', webpage, 'Description', default=None)
-
- duration = parse_duration(self._search_regex(
- r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})',
- webpage, 'duration', fatal=False))
- views = parse_count(self._search_regex(
- r'Views: ([0-9 ]+)',
- webpage, 'views', fatal=False))
-
- player_hash = self._search_regex(
- r'DaxabPlayer\.Init\({[\s\S]*hash:\s*"([0-9a-zA-Z_\-]+)"[\s\S]*}',
- webpage, 'player hash')
- player_color = self._search_regex(
- r'DaxabPlayer\.Init\({[\s\S]*color:\s*"([0-9a-z]+)"[\s\S]*}',
- webpage, 'player color', fatal=False) or ''
-
- embed_page = self._download_webpage(
- 'https://dxb.to/player/%s?color=%s' % (player_hash, player_color),
- video_id, headers={'Referer': url})
- video_params = self._parse_json(
- self._search_regex(
- r'window\.globParams\s*=\s*({[\S\s]+})\s*;\s*<\/script>',
- embed_page, 'video parameters'),
- video_id, transform_source=js_to_json)
-
- server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8')
-
- cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {}
- if cdn_files:
- formats = []
- for format_id, format_data in cdn_files.items():
- ext, height = format_id.split('_')
- formats.append({
- 'format_id': format_id,
- 'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}',
- 'height': int_or_none(height),
- 'ext': ext,
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'description': description,
- 'duration': duration,
- 'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')),
- 'timestamp': timestamp,
- 'view_count': views,
- 'age_limit': 18,
- }
-
- items = self._download_json(
- f'{server_domain}/method/video.get/{video_id}', video_id,
- headers={'Referer': url}, query={
- 'token': video_params['video']['access_token'],
- 'videos': video_id,
- 'ckey': video_params['c_key'],
- 'credentials': video_params['video']['credentials'],
- })['response']['items']
-
- if not items:
- raise ExtractorError('Video is not available', video_id=video_id, expected=True)
-
- item = items[0]
- formats = []
- for f_id, f_url in item.get('files', {}).items():
- if f_id == 'external':
- return self.url_result(f_url)
- ext, height = f_id.split('_')
- height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height))
- if height_extra_key:
- formats.append({
- 'format_id': f'{height}p',
- 'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
- 'height': int_or_none(height),
- 'ext': ext,
- })
-
- thumbnails = []
- for k, v in item.items():
- if k.startswith('photo_') and v:
- width = k.replace('photo_', '')
- thumbnails.append({
- 'id': width,
- 'url': v,
- 'width': int_or_none(width),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'comment_count': int_or_none(item.get('comments')),
- 'description': description,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'view_count': views,
- 'age_limit': 18,
- }
+++ /dev/null
-from .common import InfoExtractor
-
-
-class DefenseGouvFrIE(InfoExtractor):
- IE_NAME = 'defense.gouv.fr'
- _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
-
- _TEST = {
- 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
- 'md5': '75bba6124da7e63d2d60b5244ec9430c',
- 'info_dict': {
- 'id': '11213',
- 'ext': 'mp4',
- 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
- }
- }
-
- def _real_extract(self, url):
- title = self._match_id(url)
- webpage = self._download_webpage(url, title)
-
- video_id = self._search_regex(
- r"flashvars.pvg_id=\"(\d+)\";",
- webpage, 'ID')
-
- json_url = (
- 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
- video_id)
- info = self._download_json(json_url, title, 'Downloading JSON config')
- video_url = info['renditions'][0]['url']
-
- return {
- 'id': video_id,
- 'ext': 'mp4',
- 'url': video_url,
- 'title': title,
- }
class DHMIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
_VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
-)
-
-
-class DotsubIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
- _TESTS = [{
- 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
- 'md5': '21c7ff600f545358134fea762a6d42b6',
- 'info_dict': {
- 'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09',
- 'ext': 'flv',
- 'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever',
- 'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6',
- 'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p',
- 'duration': 198,
- 'uploader': 'liuxt',
- 'timestamp': 1385778501.104,
- 'upload_date': '20131130',
- 'view_count': int,
- }
- }, {
- 'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
- 'md5': '2bb4a83896434d5c26be868c609429a3',
- 'info_dict': {
- 'id': '168006778',
- 'ext': 'mp4',
- 'title': 'Apartments and flats in Raipur the white symphony',
- 'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
- 'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
- 'duration': 290,
- 'timestamp': 1476767794.2809999,
- 'upload_date': '20161018',
- 'uploader': 'parthivi001',
- 'uploader_id': 'user52596202',
- 'view_count': int,
- },
- 'add_ie': ['Vimeo'],
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- info = self._download_json(
- 'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
- video_url = info.get('mediaURI')
-
- if not video_url:
- webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
- webpage, 'video url', default=None)
- info_dict = {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'flv',
- }
-
- if not video_url:
- setup_data = self._parse_json(self._html_search_regex(
- r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
- webpage, 'setup data', group='content'), video_id)
- info_dict = {
- '_type': 'url_transparent',
- 'url': setup_data['src'],
- }
-
- info_dict.update({
- 'title': info['title'],
- 'description': info.get('description'),
- 'thumbnail': info.get('screenshotURI'),
- 'duration': int_or_none(info.get('duration'), 1000),
- 'uploader': info.get('user'),
- 'timestamp': float_or_none(info.get('dateCreated'), 1000),
- 'view_count': int_or_none(info.get('numberOfViews')),
- })
-
- return info_dict
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-
-
-class EchoMskIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.echo.msk.ru/sounds/1464134.html',
- 'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
- 'info_dict': {
- 'id': '1464134',
- 'ext': 'mp3',
- 'title': 'Особое мнение - 29 декабря 2014, 19:08',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- audio_url = self._search_regex(
- r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
-
- title = self._html_search_regex(
- r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
- webpage, 'title')
-
- air_date = self._html_search_regex(
- r'(?s)<div class="date">(.+?)</div>',
- webpage, 'date', fatal=False, default=None)
-
- if air_date:
- air_date = re.sub(r'(\s)\1+', r'\1', air_date)
- if air_date:
- title = '%s - %s' % (title, air_date)
-
- return {
- 'id': video_id,
- 'url': audio_url,
- 'title': title,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-
-
-class EHowIE(InfoExtractor):
- IE_NAME = 'eHow'
- _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
- 'md5': '9809b4e3f115ae2088440bcb4efbf371',
- 'info_dict': {
- 'id': '12245069',
- 'ext': 'flv',
- 'title': 'Hardwood Flooring Basics',
- 'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
- 'uploader': 'Erick Nathan',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
- final_url = compat_urllib_parse_unquote(video_url)
- uploader = self._html_search_meta('uploader', webpage)
- title = self._og_search_title(webpage).replace(' | eHow', '')
-
- return {
- 'id': video_id,
- 'url': final_url,
- 'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'description': self._og_search_description(webpage),
- 'uploader': uploader,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- parse_iso8601,
- traverse_obj,
- url_or_none,
-)
-
-
-class ElevenSportsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P<id>\w+)'
- _TESTS = [{
- 'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk',
- 'md5': 'c0958d9ff90e4503a75544358758921d',
- 'info_dict': {
- 'id': 'clf46yr3kenn80jgrqsjmwefk',
- 'title': 'Cleveland SC vs Lionsbridge FC',
- 'ext': 'mp4',
- 'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58',
- 'upload_date': '20230323',
- 'timestamp': 1679612400,
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- },
- 'params': {'skip_download': 'm3u8'}
- }, {
- 'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf',
- 'md5': 'c0958d9ff90e4503a75544358758921d',
- 'info_dict': {
- 'id': 'clhpyd53b06160jez74qhgkmf',
- 'title': 'AJNLF vs ARRAF',
- 'ext': 'mp4',
- 'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1',
- 'upload_date': '20230521',
- 'timestamp': 1684684800,
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- },
- 'params': {'skip_download': 'm3u8'}
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId']
- event_data = self._download_json(
- f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id,
- headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'})
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls')
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'subtitles': subtitles,
- **traverse_obj(event_data, {
- 'title': ('title', {str}),
- 'description': ('description', {str}),
- 'timestamp': ('start_time', {parse_iso8601}),
- 'thumbnail': ('thumbnail_url', {url_or_none}),
- }),
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- extract_attributes,
- float_or_none,
- int_or_none,
- try_get,
-)
-
-
-class EllenTubeBaseIE(InfoExtractor):
- def _extract_data_config(self, webpage, video_id):
- details = self._search_regex(
- r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage,
- 'details')
- return self._parse_json(
- extract_attributes(details)['data-config'], video_id)
-
- def _extract_video(self, data, video_id):
- title = data['title']
-
- formats = []
- duration = None
- for entry in data.get('media'):
- if entry.get('id') == 'm3u8':
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- entry['url'], video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
- duration = int_or_none(entry.get('duration'))
- break
-
- def get_insight(kind):
- return int_or_none(try_get(
- data, lambda x: x['insight']['%ss' % kind]))
-
- return {
- 'extractor_key': EllenTubeIE.ie_key(),
- 'id': video_id,
- 'title': title,
- 'description': data.get('description'),
- 'duration': duration,
- 'thumbnail': data.get('thumbnail'),
- 'timestamp': float_or_none(data.get('publishTime'), scale=1000),
- 'view_count': get_insight('view'),
- 'like_count': get_insight('like'),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class EllenTubeIE(EllenTubeBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- ellentube:|
- https://api-prod\.ellentube\.com/ellenapi/api/item/
- )
- (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
- '''
- _TESTS = [{
- 'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3',
- 'md5': '2fabc277131bddafdd120e0fc0f974c9',
- 'info_dict': {
- 'id': '0822171c-3829-43bf-b99f-d77358ae75e3',
- 'ext': 'mp4',
- 'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck',
- 'description': 'md5:76e3355e2242a78ad9e3858e5616923f',
- 'thumbnail': r're:^https?://.+?',
- 'duration': 514,
- 'timestamp': 1508505120,
- 'upload_date': '20171020',
- 'view_count': int,
- 'like_count': int,
- }
- }, {
- 'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- data = self._download_json(
- 'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id,
- video_id)
- return self._extract_video(data, video_id)
-
-
-class EllenTubeVideoIE(EllenTubeBaseIE):
- _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html'
- _TEST = {
- 'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._extract_data_config(webpage, display_id)['id']
- return self.url_result(
- 'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(),
- video_id=video_id)
-
-
-class EllenTubePlaylistIE(EllenTubeBaseIE):
- _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html'
- _TESTS = [{
- 'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html',
- 'info_dict': {
- 'id': 'dax-shepard-jordan-fisher-haim',
- 'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM",
- 'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c',
- },
- 'playlist_count': 6,
- }, {
- 'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- data = self._extract_data_config(webpage, display_id)['data']
- feed = self._download_json(
- 'https://api-prod.ellentube.com/ellenapi/api/feed/?%s'
- % data['filter'], display_id)
- entries = [
- self._extract_video(elem, elem['id'])
- for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')]
- return self.playlist_result(
- entries, display_id, data.get('title'),
- clean_html(data.get('description')))
+++ /dev/null
-from .common import InfoExtractor
-
-
-class EngadgetIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
-
- _TESTS = [{
- # video with vidible ID
- 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self.url_result('aol-video:%s' % video_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- clean_html,
- int_or_none,
- float_or_none,
-)
-
-
-def _decrypt_config(key, string):
- a = ''
- i = ''
- r = ''
-
- while len(a) < (len(string) / 2):
- a += key
-
- a = a[0:int(len(string) / 2)]
-
- t = 0
- while t < len(string):
- i += chr(int(string[t] + string[t + 1], 16))
- t += 2
-
- icko = [s for s in i]
-
- for t, c in enumerate(a):
- r += chr(ord(c) ^ ord(icko[t]))
-
- return r
-
-
-class EscapistIE(InfoExtractor):
- _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
- 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
- 'info_dict': {
- 'id': '6618',
- 'ext': 'mp4',
- 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
- 'title': "Breaking Down Baldur's Gate",
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 264,
- 'uploader': 'The Escapist',
- }
- }, {
- 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
- 'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
- 'info_dict': {
- 'id': '10044',
- 'ext': 'mp4',
- 'description': 'This week, Zero Punctuation reviews Evolve.',
- 'title': 'Evolve - One vs Multiplayer',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 304,
- 'uploader': 'The Escapist',
- }
- }, {
- 'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
- 'only_matching': True,
- }, {
- 'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- ims_video = self._parse_json(
- self._search_regex(
- r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
- video_id)
- video_id = ims_video['videoID']
- key = ims_video['hash']
-
- config = self._download_webpage(
- 'http://www.escapistmagazine.com/videos/vidconfig.php',
- video_id, 'Downloading video config', headers={
- 'Referer': url,
- }, query={
- 'videoID': video_id,
- 'hash': key,
- })
-
- data = self._parse_json(_decrypt_config(key, config), video_id)
-
- video_data = data['videoData']
-
- title = clean_html(video_data['title'])
-
- formats = [{
- 'url': video['src'],
- 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
- 'height': int_or_none(video.get('res')),
- } for video in data['files']['videos']]
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
- 'description': self._og_search_description(webpage),
- 'duration': float_or_none(video_data.get('duration'), 1000),
- 'uploader': video_data.get('publisher'),
- 'series': video_data.get('show'),
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- int_or_none,
- parse_filesize,
- unified_strdate,
-)
-
-
-class EsriVideoIE(InfoExtractor):
- _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
- 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
- 'info_dict': {
- 'id': '1124',
- 'ext': 'mp4',
- 'title': 'ArcGIS Online - Developing Applications',
- 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 185,
- 'upload_date': '20120419',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- formats = []
- for width, height, content in re.findall(
- r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
- for video_url, ext, filesize in re.findall(
- r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content):
- formats.append({
- 'url': compat_urlparse.urljoin(url, video_url),
- 'ext': ext.lower(),
- 'format_id': '%s-%s' % (ext.lower(), height),
- 'width': int(width),
- 'height': int(height),
- 'filesize_approx': parse_filesize(filesize),
- })
-
- title = self._html_search_meta('title', webpage, 'title')
- description = self._html_search_meta(
- 'description', webpage, 'description', fatal=False)
-
- thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
- if thumbnail:
- thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
-
- duration = int_or_none(self._search_regex(
- [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
- webpage, 'duration', fatal=False))
-
- upload_date = unified_strdate(self._html_search_meta(
- 'last-modified', webpage, 'upload date', fatal=False))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'upload_date': upload_date,
- 'formats': formats
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
-)
-
-
-class ExpoTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
- _TEST = {
- 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
- 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
- 'info_dict': {
- 'id': '667916',
- 'ext': 'mp4',
- 'title': 'NYX Butter Lipstick Little Susie',
- 'description': 'Goes on like butter, but looks better!',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Stephanie S.',
- 'upload_date': '20150520',
- 'view_count': int,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- player_key = self._search_regex(
- r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
- config = self._download_json(
- 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key),
- video_id, 'Downloading video configuration')
-
- formats = []
- for fcfg in config['sources']:
- media_url = fcfg.get('file')
- if not media_url:
- continue
- if fcfg.get('type') == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
- else:
- formats.append({
- 'url': media_url,
- 'height': int_or_none(fcfg.get('height')),
- 'format_id': fcfg.get('label'),
- 'ext': self._search_regex(
- r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
- 'file extension', default=None) or fcfg.get('type'),
- })
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = config.get('image')
- view_count = int_or_none(self._search_regex(
- r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
- uploader = self._search_regex(
- r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
- fatal=False)
- upload_date = unified_strdate(self._search_regex(
- r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
- fatal=False), day_first=False)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': description,
- 'view_count': view_count,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'upload_date': upload_date,
- }
+++ /dev/null
-from ..utils import str_to_int
-from .keezmovies import KeezMoviesIE
-
-
-class ExtremeTubeIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
- _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
- _TESTS = [{
- 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
- 'md5': '92feaafa4b58e82f261e5419f39c60cb',
- 'info_dict': {
- 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
- 'ext': 'mp4',
- 'title': 'Music Video 14 british euro brit european cumshots swallow',
- 'uploader': 'anonim',
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'http://www.extremetube.com/gay/video/abcde-1234',
- 'only_matching': True,
- }, {
- 'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
- 'only_matching': True,
- }, {
- 'url': 'http://www.extremetube.com/video/652431',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- webpage, info = self._extract_info(url)
-
- if not info['title']:
- info['title'] = self._search_regex(
- r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
-
- uploader = self._html_search_regex(
- r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
- webpage, 'uploader', fatal=False)
- view_count = str_to_int(self._search_regex(
- r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
- webpage, 'view count', fatal=False))
-
- info.update({
- 'uploader': uploader,
- 'view_count': view_count,
- })
-
- return info
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import traverse_obj, unified_timestamp
-
-
-class FourZeroStudioArchiveIE(InfoExtractor):
- _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive'
- IE_NAME = '0000studio:archive'
- _TESTS = [{
- 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive',
- 'info_dict': {
- 'id': '1290f433-fce0-4909-a24a-5f7df09665dc',
- 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)',
- 'timestamp': 1653802534,
- 'release_timestamp': 1653796604,
- 'thumbnails': 'count:1',
- 'comments': 'count:7',
- 'uploader': '『中崎雄心』の執務室。',
- 'uploader_id': 'mumeijiten',
- }
- }]
-
- def _real_extract(self, url):
- video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
- webpage = self._download_webpage(url, video_id)
- nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
-
- pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False)
- uploader_internal_id = traverse_obj(nuxt_data, (
- 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
-
- formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
-
- return {
- 'id': video_id,
- 'title': pcb.get('title'),
- 'age_limit': 18 if pcb.get('isAdult') else None,
- 'timestamp': unified_timestamp(pcb.get('finishTime')),
- 'release_timestamp': unified_timestamp(pcb.get('createdAt')),
- 'thumbnails': [{
- 'url': pcb['thumbnailUrl'],
- 'ext': 'png',
- }] if pcb.get('thumbnailUrl') else None,
- 'formats': formats,
- 'subtitles': subs,
- 'comments': [{
- 'author': c.get('username'),
- 'author_id': c.get('postedUserId'),
- 'author_thumbnail': c.get('userThumbnailUrl'),
- 'id': c.get('id'),
- 'text': c.get('body'),
- 'timestamp': unified_timestamp(c.get('createdAt')),
- 'like_count': c.get('likeCount'),
- 'is_favorited': c.get('isLikedByOwner'),
- 'author_is_uploader': c.get('postedUserId') == uploader_internal_id,
- } for c in traverse_obj(nuxt_data, (
- 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []],
- 'uploader_id': uploader_id,
- 'uploader': traverse_obj(nuxt_data, (
- 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
- }
-
-
-class FourZeroStudioClipIE(InfoExtractor):
- _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)'
- IE_NAME = '0000studio:clip'
- _TESTS = [{
- 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
- 'info_dict': {
- 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
- 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!',
- 'timestamp': 1652109105,
- 'like_count': 1,
- 'uploader': 'ソエジマケイタ',
- 'uploader_id': 'soeji',
- }
- }]
-
- def _real_extract(self, url):
- video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
- webpage = self._download_webpage(url, video_id)
- nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
-
- clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False)
-
- info = next((
- m for m in self._parse_html5_media_entries(url, webpage, video_id)
- if 'mp4' in traverse_obj(m, ('formats', ..., 'ext'))
- ), None)
- if not info:
- self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.')
- info = {
- 'formats': [{
- 'ext': 'mp4',
- 'url': url,
- } for url in clip_info.get('mediaFiles') or [] if url],
- }
- return {
- **info,
- 'id': video_id,
- 'title': clip_info.get('clipComment'),
- 'timestamp': unified_timestamp(clip_info.get('createdAt')),
- 'like_count': clip_info.get('likeCount'),
- 'uploader_id': uploader_id,
- 'uploader': traverse_obj(nuxt_data, (
- 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
- }
+++ /dev/null
-import itertools
-
-from .common import InfoExtractor
-from ..utils import (
- get_element_by_id,
- int_or_none,
- remove_end,
-)
-
-
-class FoxgayIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
- _TEST = {
- 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
- 'md5': '344558ccfea74d33b7adbce22e577f54',
- 'info_dict': {
- 'id': '2582',
- 'ext': 'mp4',
- 'title': 'Fuck Turkish-style',
- 'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
- 'age_limit': 18,
- 'thumbnail': r're:https?://.*\.jpg$',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com')
- description = get_element_by_id('inf_tit', webpage)
-
- # The default user-agent with foxgay cookies leads to pages without videos
- self.cookiejar.clear('.foxgay.com')
- # Find the URL for the iFrame which contains the actual video.
- iframe_url = self._html_search_regex(
- r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
- 'video frame', group='url')
- iframe = self._download_webpage(
- iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'},
- note='Downloading video frame')
- video_data = self._parse_json(self._search_regex(
- r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id)
-
- formats = [{
- 'url': source,
- 'height': int_or_none(resolution),
- } for source, resolution in zip(
- video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'description': description,
- 'thumbnail': video_data.get('act_vid', {}).get('thumb'),
- 'age_limit': 18,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- mimetype2ext,
- parse_iso8601,
-)
-
-
-class FusionIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
- 'info_dict': {
- 'id': '3145868',
- 'ext': 'mp4',
- 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
- 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
- 'duration': 140.0,
- 'timestamp': 1442589635,
- 'uploader': 'UNIVISON',
- 'upload_date': '20150918',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Anvato'],
- }, {
- 'url': 'http://fusion.tv/video/201781',
- 'only_matching': True,
- }, {
- 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video = self._download_json(
- 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
-
- info = {
- 'id': video_id,
- 'title': video['title'],
- 'description': video.get('excerpt'),
- 'timestamp': parse_iso8601(video.get('published')),
- 'series': video.get('show'),
- }
-
- formats = []
- src = video.get('src') or {}
- for f_id, f in src.items():
- for q_id, q in f.items():
- q_url = q.get('url')
- if not q_url:
- continue
- ext = determine_ext(q_url, mimetype2ext(q.get('type')))
- if ext == 'smil':
- formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
- elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
- formats.extend(self._extract_m3u8_formats(
- q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'format_id': '-'.join([f_id, q_id]),
- 'url': q_url,
- 'width': int_or_none(q.get('width')),
- 'height': int_or_none(q.get('height')),
- 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
- 'ext': 'mp4' if ext == 'm3u8' else ext,
- 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
- })
- if formats:
- info['formats'] = formats
- else:
- info.update({
- '_type': 'url',
- 'url': 'anvato:uni:' + video['video_ids']['anvato'],
- 'ie_key': 'Anvato',
- })
-
- return info
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
- # ooyala video
- {
- 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
- 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
- 'info_dict': {
- 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
- 'ext': 'mp4',
- 'title': '2cc213299525360.mov', # that's what we get
- 'duration': 238.231,
- },
- 'add_ie': ['Ooyala'],
- },
- {
- # ooyala video embedded with http://player.ooyala.com/iframe.js
- 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
- 'info_dict': {
- 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
- 'ext': 'mp4',
- 'title': '"Steve Jobs: Man in the Machine" trailer',
- 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
- 'duration': 135.427,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'movie expired',
- },
- # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
- {
- 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
- 'info_dict': {
- 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
- 'ext': 'mp4',
- 'title': 'Steampunk Fest Comes to Honesdale',
- 'duration': 43.276,
- },
- 'params': {
- 'skip_download': True,
- }
- },
# embed.ly video
{
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
'title': 'Ужастики, русский трейлер (2015)',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 153,
- }
+ },
+ 'skip': 'Site dead',
},
# XHamster embed
{
'playlist_mincount': 1,
'add_ie': ['Youtube'],
},
- # Cinchcast embed
+ # Libsyn embed
{
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
'info_dict': {
- 'id': '7141703',
+ 'id': '3793998',
'ext': 'mp3',
'upload_date': '20141126',
- 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+ 'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing',
+ 'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90',
+ 'duration': 3989.0,
}
},
# Cinerama player
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
},
},
- {
- # vzaar embed
- 'url': 'http://help.vzaar.com/article/165-embedding-video',
- 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
- 'info_dict': {
- 'id': '8707641',
- 'ext': 'mp4',
- 'title': 'Building A Business Online: Principal Chairs Q & A',
- },
- },
{
# multiple HTML5 videos on one page
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- float_or_none,
- qualities,
- ExtractorError,
-)
-
-
-class GfycatIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
- _EMBED_REGEX = [rf'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
- _TESTS = [{
- 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
- 'info_dict': {
- 'id': 'DeadlyDecisiveGermanpinscher',
- 'ext': 'mp4',
- 'title': 'Ghost in the Shell',
- 'timestamp': 1410656006,
- 'upload_date': '20140914',
- 'uploader': 'anonymous',
- 'duration': 10.4,
- 'view_count': int,
- 'like_count': int,
- 'categories': list,
- 'age_limit': 0,
- 'uploader_id': 'anonymous',
- 'description': '',
- }
- }, {
- 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
- 'info_dict': {
- 'id': 'JauntyTimelyAmazontreeboa',
- 'ext': 'mp4',
- 'title': 'JauntyTimelyAmazontreeboa',
- 'timestamp': 1411720126,
- 'upload_date': '20140926',
- 'uploader': 'anonymous',
- 'duration': 3.52,
- 'view_count': int,
- 'like_count': int,
- 'categories': list,
- 'age_limit': 0,
- 'uploader_id': 'anonymous',
- 'description': '',
- }
- }, {
- 'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
- 'info_dict': {
- 'id': 'alienatedsolidgreathornedowl',
- 'ext': 'mp4',
- 'upload_date': '20211226',
- 'uploader_id': 'reactions',
- 'timestamp': 1640536930,
- 'like_count': int,
- 'description': '',
- 'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
- 'categories': list,
- 'age_limit': 0,
- 'duration': 2.9583333333333335,
- 'uploader': 'Reaction GIFs',
- 'view_count': int,
- }
- }, {
- 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
- 'only_matching': True
- }, {
- 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
- 'only_matching': True
- }, {
- 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
- 'only_matching': True
- }, {
- 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
- 'only_matching': True
- }, {
- 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
- 'only_matching': True
- }, {
- 'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa',
- 'only_matching': True
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- gfy = self._download_json(
- 'https://api.gfycat.com/v1/gfycats/%s' % video_id,
- video_id, 'Downloading video info')
- if 'error' in gfy:
- raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
- gfy = gfy['gfyItem']
-
- title = gfy.get('title') or gfy['gfyName']
- description = gfy.get('description')
- timestamp = int_or_none(gfy.get('createDate'))
- uploader = gfy.get('userName') or gfy.get('username')
- view_count = int_or_none(gfy.get('views'))
- like_count = int_or_none(gfy.get('likes'))
- dislike_count = int_or_none(gfy.get('dislikes'))
- age_limit = 18 if gfy.get('nsfw') == '1' else 0
-
- width = int_or_none(gfy.get('width'))
- height = int_or_none(gfy.get('height'))
- fps = int_or_none(gfy.get('frameRate'))
- num_frames = int_or_none(gfy.get('numFrames'))
-
- duration = float_or_none(num_frames, fps) if num_frames and fps else None
-
- categories = gfy.get('tags') or gfy.get('extraLemmas') or []
-
- FORMATS = ('gif', 'webm', 'mp4')
- quality = qualities(FORMATS)
-
- formats = []
- for format_id in FORMATS:
- video_url = gfy.get('%sUrl' % format_id)
- if not video_url:
- continue
- filesize = int_or_none(gfy.get('%sSize' % format_id))
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'width': width,
- 'height': height,
- 'fps': fps,
- 'filesize': filesize,
- 'quality': quality(format_id),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'uploader': gfy.get('userDisplayName') or uploader,
- 'uploader_id': uploader,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'categories': categories,
- 'age_limit': age_limit,
- 'formats': formats,
- }
}
_PROVIDERS = {
- 'ooyala': ('ooyala:%s', 'Ooyala'),
'youtube': ('%s', 'Youtube'),
}
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import js_to_json
-
-
-class HelsinkiIE(InfoExtractor):
- IE_DESC = 'helsinki.fi'
- _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)'
- _TEST = {
- 'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258',
- 'info_dict': {
- 'id': '20258',
- 'ext': 'mp4',
- 'title': 'Tietotekniikkafoorumi-iltapäivä',
- 'description': 'md5:f5c904224d43c133225130fe156a5ee0',
- },
- 'params': {
- 'skip_download': True, # RTMP
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- params = self._parse_json(self._html_search_regex(
- r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
- webpage, 'player code'), video_id, transform_source=js_to_json)
- formats = [{
- 'url': s['file'],
- 'ext': 'mp4',
- } for s in params['sources']]
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage).replace('Video: ', ''),
- 'description': self._og_search_description(webpage),
- 'formats': formats,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- clean_html,
- determine_ext,
- float_or_none,
- int_or_none,
- parse_iso8601,
-)
-
-
-class HitboxIE(InfoExtractor):
- IE_NAME = 'hitbox'
- _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.hitbox.tv/video/203213',
- 'info_dict': {
- 'id': '203213',
- 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
- 'alt_title': 'hitboxlive - Aug 9th #6',
- 'description': '',
- 'ext': 'mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 215.1666,
- 'resolution': 'HD 720p',
- 'uploader': 'hitboxlive',
- 'view_count': int,
- 'timestamp': 1407576133,
- 'upload_date': '20140809',
- 'categories': ['Live Show'],
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
- 'only_matching': True,
- }]
-
- def _extract_metadata(self, url, video_id):
- thumb_base = 'https://edge.sf.hitbox.tv'
- metadata = self._download_json(
- '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
-
- date = 'media_live_since'
- media_type = 'livestream'
- if metadata.get('media_type') == 'video':
- media_type = 'video'
- date = 'media_date_added'
-
- video_meta = metadata.get(media_type, [])[0]
- title = video_meta.get('media_status')
- alt_title = video_meta.get('media_title')
- description = clean_html(
- video_meta.get('media_description')
- or video_meta.get('media_description_md'))
- duration = float_or_none(video_meta.get('media_duration'))
- uploader = video_meta.get('media_user_name')
- views = int_or_none(video_meta.get('media_views'))
- timestamp = parse_iso8601(video_meta.get(date), ' ')
- categories = [video_meta.get('category_name')]
- thumbs = [{
- 'url': thumb_base + video_meta.get('media_thumbnail'),
- 'width': 320,
- 'height': 180
- }, {
- 'url': thumb_base + video_meta.get('media_thumbnail_large'),
- 'width': 768,
- 'height': 432
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'alt_title': alt_title,
- 'description': description,
- 'ext': 'mp4',
- 'thumbnails': thumbs,
- 'duration': duration,
- 'uploader': uploader,
- 'view_count': views,
- 'timestamp': timestamp,
- 'categories': categories,
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- player_config = self._download_json(
- 'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
- video_id, 'Downloading video JSON')
-
- formats = []
- for video in player_config['clip']['bitrates']:
- label = video.get('label')
- if label == 'Auto':
- continue
- video_url = video.get('url')
- if not video_url:
- continue
- bitrate = int_or_none(video.get('bitrate'))
- if determine_ext(video_url) == 'm3u8':
- if not video_url.startswith('http'):
- continue
- formats.append({
- 'url': video_url,
- 'ext': 'mp4',
- 'tbr': bitrate,
- 'format_note': label,
- 'protocol': 'm3u8_native',
- })
- else:
- formats.append({
- 'url': video_url,
- 'tbr': bitrate,
- 'format_note': label,
- })
-
- metadata = self._extract_metadata(
- 'https://www.smashcast.tv/api/media/video', video_id)
- metadata['formats'] = formats
-
- return metadata
-
-
-class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE
- IE_NAME = 'hitbox:live'
- _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.hitbox.tv/dimak',
- 'info_dict': {
- 'id': 'dimak',
- 'ext': 'mp4',
- 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
- 'timestamp': int,
- 'upload_date': compat_str,
- 'title': compat_str,
- 'uploader': 'Dimak',
- },
- 'params': {
- # live
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.smashcast.tv/dimak',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- player_config = self._download_json(
- 'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
- video_id)
-
- formats = []
- cdns = player_config.get('cdns')
- servers = []
- for cdn in cdns:
- # Subscribe URLs are not playable
- if cdn.get('rtmpSubscribe') is True:
- continue
- base_url = cdn.get('netConnectionUrl')
- host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
- if base_url not in servers:
- servers.append(base_url)
- for stream in cdn.get('bitrates'):
- label = stream.get('label')
- if label == 'Auto':
- continue
- stream_url = stream.get('url')
- if not stream_url:
- continue
- bitrate = int_or_none(stream.get('bitrate'))
- if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
- if not stream_url.startswith('http'):
- continue
- formats.append({
- 'url': stream_url,
- 'ext': 'mp4',
- 'tbr': bitrate,
- 'format_note': label,
- 'rtmp_live': True,
- })
- else:
- formats.append({
- 'url': '%s/%s' % (base_url, stream_url),
- 'ext': 'mp4',
- 'tbr': bitrate,
- 'rtmp_live': True,
- 'format_note': host,
- 'page_url': url,
- 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
- })
-
- metadata = self._extract_metadata(
- 'https://www.smashcast.tv/api/media/live', video_id)
- metadata['formats'] = formats
- metadata['is_live'] = True
- metadata['title'] = metadata.get('title')
-
- return metadata
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import parse_iso8601
-
-
-class HowcastIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
- 'md5': '7d45932269a288149483144f01b99789',
- 'info_dict': {
- 'id': '390161',
- 'ext': 'mp4',
- 'title': 'How to Tie a Square Knot Properly',
- 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
- 'timestamp': 1276081287,
- 'upload_date': '20100609',
- 'duration': 56.823,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Ooyala'],
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- embed_code = self._search_regex(
- r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
- webpage, 'ooyala embed code')
-
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % embed_code,
- 'id': video_id,
- 'timestamp': parse_iso8601(self._html_search_meta(
- 'article:published_time', webpage, 'timestamp')),
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- find_xpath_attr,
- int_or_none,
- js_to_json,
- unescapeHTML,
- determine_ext,
-)
-
-
-class HowStuffWorksIE(InfoExtractor):
- _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
- _TESTS = [
- {
- 'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
- 'md5': '76646a5acc0c92bf7cd66751ca5db94d',
- 'info_dict': {
- 'id': '855410',
- 'ext': 'mp4',
- 'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
- 'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
- },
- },
- {
- 'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- clip_js = self._search_regex(
- r'(?s)var clip = ({.*?});', webpage, 'clip info')
- clip_info = self._parse_json(
- clip_js, display_id, transform_source=js_to_json)
-
- video_id = clip_info['content_id']
- formats = []
- m3u8_url = clip_info.get('m3u8')
- if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True))
- flv_url = clip_info.get('flv_url')
- if flv_url:
- formats.append({
- 'url': flv_url,
- 'format_id': 'flv',
- })
- for video in clip_info.get('mp4', []):
- formats.append({
- 'url': video['src'],
- 'format_id': 'mp4-%s' % video['bitrate'],
- 'vbr': int_or_none(video['bitrate'].rstrip('k')),
- })
-
- if not formats:
- smil = self._download_xml(
- 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id,
- video_id, 'Downloading video SMIL')
-
- http_base = find_xpath_attr(
- smil,
- './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
- 'name',
- 'httpBase').get('content')
-
- URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'
-
- for video in smil.findall(
- './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
- vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)
- formats.append({
- 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
- 'format_id': '%dk' % vbr,
- 'vbr': vbr,
- })
-
- return {
- 'id': '%s' % video_id,
- 'display_id': display_id,
- 'title': unescapeHTML(clip_info['clip_title']),
- 'description': unescapeHTML(clip_info.get('caption')),
- 'thumbnail': clip_info.get('video_still_url'),
- 'duration': int_or_none(clip_info.get('duration')),
- 'formats': formats,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..aes import aes_decrypt_text
-from ..compat import compat_urllib_parse_unquote
-from ..utils import (
- determine_ext,
- format_field,
- int_or_none,
- str_to_int,
- strip_or_none,
- url_or_none,
-)
-
-
-class KeezMoviesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
- 'md5': '2ac69cdb882055f71d82db4311732a1a',
- 'info_dict': {
- 'id': '18070681',
- 'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
- 'ext': 'mp4',
- 'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
- 'thumbnail': None,
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'http://www.keezmovies.com/video/18070681',
- 'only_matching': True,
- }]
-
- def _extract_info(self, url, fatal=True):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- display_id = (mobj.group('display_id')
- if 'display_id' in mobj.groupdict()
- else None) or mobj.group('id')
-
- webpage = self._download_webpage(
- url, display_id, headers={'Cookie': 'age_verified=1'})
-
- formats = []
- format_urls = set()
-
- title = None
- thumbnail = None
- duration = None
- encrypted = False
-
- def extract_format(format_url, height=None):
- format_url = url_or_none(format_url)
- if not format_url or not format_url.startswith(('http', '//')):
- return
- if format_url in format_urls:
- return
- format_urls.add(format_url)
- tbr = int_or_none(self._search_regex(
- r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
- if not height:
- height = int_or_none(self._search_regex(
- r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
- if encrypted:
- format_url = aes_decrypt_text(
- video_url, title, 32).decode('utf-8')
- formats.append({
- 'url': format_url,
- 'format_id': format_field(height, None, '%dp'),
- 'height': height,
- 'tbr': tbr,
- })
-
- flashvars = self._parse_json(
- self._search_regex(
- r'flashvars\s*=\s*({.+?});', webpage,
- 'flashvars', default='{}'),
- display_id, fatal=False)
-
- if flashvars:
- title = flashvars.get('video_title')
- thumbnail = flashvars.get('image_url')
- duration = int_or_none(flashvars.get('video_duration'))
- encrypted = flashvars.get('encrypted') is True
- for key, value in flashvars.items():
- mobj = re.search(r'quality_(\d+)[pP]', key)
- if mobj:
- extract_format(value, int(mobj.group(1)))
- video_url = flashvars.get('video_url')
- if video_url and determine_ext(video_url, None):
- extract_format(video_url)
-
- video_url = self._html_search_regex(
- r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
- webpage, 'video url', default=None, group='url')
- if video_url:
- extract_format(compat_urllib_parse_unquote(video_url))
-
- if not formats:
- if 'title="This video is no longer available"' in webpage:
- self.raise_no_formats(
- 'Video %s is no longer available' % video_id, expected=True)
-
- if not title:
- title = self._html_search_regex(
- r'<h1[^>]*>([^<]+)', webpage, 'title')
-
- return webpage, {
- 'id': video_id,
- 'display_id': display_id,
- 'title': strip_or_none(title),
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'age_limit': 18,
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- webpage, info = self._extract_info(url, fatal=False)
- if not info['formats']:
- return self.url_result(url, 'Generic')
- info['view_count'] = str_to_int(self._search_regex(
- r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
- return info
kinjavideo|
mcp|
megaphone|
- ooyala|
soundcloud(?:-playlist)?|
tumblr-post|
twitch-stream|
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
'only_matching': True,
- }, {
- 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
- 'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
'only_matching': True,
'jwplayer-video': _JWPLATFORM_PROVIDER,
'jwp-video': _JWPLATFORM_PROVIDER,
'megaphone': ('player.megaphone.fm/', 'Generic'),
- 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
video_id, playlist_id = video_id.split('/')
result_url = provider[0] % (video_id, playlist_id)
else:
- if video_type == 'ooyala':
- video_id = video_id.split('/')[0]
result_url = provider[0] + video_id
return self.url_result('http://' + result_url, provider[1])
+++ /dev/null
-import json
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- unified_strdate,
- urlencode_postdata,
- xpath_element,
- xpath_text,
- update_url_query,
- js_to_json,
-)
-
-
-class Laola1TvEmbedIE(InfoExtractor):
- IE_NAME = 'laola1tv:embed'
- _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
- _TESTS = [{
- # flashvars.premium = "false";
- 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
- 'info_dict': {
- 'id': '708065',
- 'ext': 'mp4',
- 'title': 'MA Long CHN - FAN Zhendong CHN',
- 'uploader': 'ITTF - International Table Tennis Federation',
- 'upload_date': '20161211',
- },
- }]
-
- def _extract_token_url(self, stream_access_url, video_id, data):
- return self._download_json(
- self._proto_relative_url(stream_access_url, 'https:'), video_id,
- headers={
- 'Content-Type': 'application/json',
- }, data=json.dumps(data).encode())['data']['stream-access'][0]
-
- def _extract_formats(self, token_url, video_id):
- token_doc = self._download_xml(
- token_url, video_id, 'Downloading token',
- headers=self.geo_verification_headers())
-
- token_attrib = xpath_element(token_doc, './/token').attrib
-
- if token_attrib['status'] != '0':
- raise ExtractorError(
- 'Token error: %s' % token_attrib['comment'], expected=True)
-
- formats = self._extract_akamai_formats(
- '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
- video_id)
- return formats
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- flash_vars = self._search_regex(
- r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
-
- def get_flashvar(x, *args, **kwargs):
- flash_var = self._search_regex(
- r'%s\s*:\s*"([^"]+)"' % x,
- flash_vars, x, default=None)
- if not flash_var:
- flash_var = self._search_regex([
- r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
- r'%s\s*=\s*"([^"]+)"' % x],
- webpage, x, *args, **kwargs)
- return flash_var
-
- hd_doc = self._download_xml(
- 'http://www.laola1.tv/server/hd_video.php', video_id, query={
- 'play': get_flashvar('streamid'),
- 'partner': get_flashvar('partnerid'),
- 'portal': get_flashvar('portalid'),
- 'lang': get_flashvar('sprache'),
- 'v5ident': '',
- })
-
- _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
- title = _v('title', fatal=True)
-
- token_url = None
- premium = get_flashvar('premium', default=None)
- if premium:
- token_url = update_url_query(
- _v('url', fatal=True), {
- 'timestamp': get_flashvar('timestamp'),
- 'auth': get_flashvar('auth'),
- })
- else:
- data_abo = urlencode_postdata(
- dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
- stream_access_url = update_url_query(
- 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
- 'videoId': _v('id'),
- 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
- 'label': _v('label'),
- 'area': _v('area'),
- })
- token_url = self._extract_token_url(stream_access_url, video_id, data_abo)
-
- formats = self._extract_formats(token_url, video_id)
-
- categories_str = _v('meta_sports')
- categories = categories_str.split(',') if categories_str else []
- is_live = _v('islive') == 'true'
-
- return {
- 'id': video_id,
- 'title': title,
- 'upload_date': unified_strdate(_v('time_date')),
- 'uploader': _v('meta_organisation'),
- 'categories': categories,
- 'is_live': is_live,
- 'formats': formats,
- }
-
-
-class Laola1TvBaseIE(Laola1TvEmbedIE): # XXX: Do not subclass from concrete IE
- def _extract_video(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- if 'Dieser Livestream ist bereits beendet.' in webpage:
- raise ExtractorError('This live stream has already finished.', expected=True)
-
- conf = self._parse_json(self._search_regex(
- r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
- display_id,
- transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
- video_id = conf['videoid']
-
- config = self._download_json(conf['configUrl'], video_id, query={
- 'videoid': video_id,
- 'partnerid': conf['partnerid'],
- 'language': conf.get('language', ''),
- 'portal': conf.get('portalid', ''),
- })
- error = config.get('error')
- if error:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
- video_data = config['video']
- title = video_data['title']
- is_live = video_data.get('isLivestream') and video_data.get('isLive')
- meta = video_data.get('metaInformation')
- sports = meta.get('sports')
- categories = sports.split(',') if sports else []
-
- token_url = self._extract_token_url(
- video_data['streamAccess'], video_id,
- video_data['abo']['required'])
-
- formats = self._extract_formats(token_url, video_id)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('image'),
- 'categories': categories,
- 'formats': formats,
- 'is_live': is_live,
- }
-
-
-class Laola1TvIE(Laola1TvBaseIE):
- IE_NAME = 'laola1tv'
- _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
- 'info_dict': {
- 'id': '227883',
- 'display_id': 'straubing-tigers-koelner-haie',
- 'ext': 'flv',
- 'title': 'Straubing Tigers - Kölner Haie',
- 'upload_date': '20140912',
- 'is_live': False,
- 'categories': ['Eishockey'],
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
- 'info_dict': {
- 'id': '464602',
- 'display_id': 'straubing-tigers-koelner-haie',
- 'ext': 'flv',
- 'title': 'Straubing Tigers - Kölner Haie',
- 'upload_date': '20160129',
- 'is_live': False,
- 'categories': ['Eishockey'],
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
- 'info_dict': {
- 'id': '487850',
- 'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
- 'ext': 'flv',
- 'title': 'Belogorie BELGOROD - TRENTINO Diatec',
- 'upload_date': '20160322',
- 'uploader': 'CEV - Europäischer Volleyball Verband',
- 'is_live': True,
- 'categories': ['Volleyball'],
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This live stream has already finished.',
- }]
-
- def _real_extract(self, url):
- return self._extract_video(url)
-
-
-class EHFTVIE(Laola1TvBaseIE):
- IE_NAME = 'ehftv'
- _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
- 'info_dict': {
- 'id': '1166761',
- 'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
- 'ext': 'mp4',
- 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
- 'is_live': False,
- 'categories': ['Handball'],
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- return self._extract_video(url)
-
-
-class ITTFIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- return self.url_result(
- update_url_query('https://www.laola1.tv/titanplayer.php', {
- 'videoid': self._match_id(url),
- 'type': 'V',
- 'lang': 'en',
- 'portal': 'int',
- 'customer': 1024,
- }), Laola1TvEmbedIE.ie_key())
+++ /dev/null
-import json
-import random
-
-from .common import InfoExtractor
-from ..compat import compat_b64decode, compat_str
-from ..networking.exceptions import HTTPError
-from ..utils import (
- clean_html,
- ExtractorError,
- js_to_json,
- parse_duration,
- try_get,
- unified_timestamp,
- urlencode_postdata,
- urljoin,
-)
-
-
-class LinuxAcademyIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?linuxacademy\.com/cp/
- (?:
- courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
- modules/view/id/(?P<course_id>\d+)
- )
- '''
- _TESTS = [{
- 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
- 'info_dict': {
- 'id': '7971-2',
- 'ext': 'mp4',
- 'title': 'What Is Data Science',
- 'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
- 'timestamp': int, # The timestamp and upload date changes
- 'upload_date': r're:\d+',
- 'duration': 304,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Requires Linux Academy account credentials',
- }, {
- 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
- 'only_matching': True,
- }, {
- 'url': 'https://linuxacademy.com/cp/modules/view/id/154',
- 'info_dict': {
- 'id': '154',
- 'title': 'AWS Certified Cloud Practitioner',
- 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
- 'duration': 28835,
- },
- 'playlist_count': 41,
- 'skip': 'Requires Linux Academy account credentials',
- }, {
- 'url': 'https://linuxacademy.com/cp/modules/view/id/39',
- 'info_dict': {
- 'id': '39',
- 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
- 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
- 'duration': 89280,
- },
- 'playlist_count': 73,
- 'skip': 'Requires Linux Academy account credentials',
- }]
-
- _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
- _ORIGIN_URL = 'https://linuxacademy.com'
- _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
- _NETRC_MACHINE = 'linuxacademy'
-
- def _perform_login(self, username, password):
- def random_string():
- return ''.join(random.choices(
- '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32))
-
- webpage, urlh = self._download_webpage_handle(
- self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
- 'client_id': self._CLIENT_ID,
- 'response_type': 'token id_token',
- 'response_mode': 'web_message',
- 'redirect_uri': self._ORIGIN_URL,
- 'scope': 'openid email user_impersonation profile',
- 'audience': self._ORIGIN_URL,
- 'state': random_string(),
- 'nonce': random_string(),
- })
-
- login_data = self._parse_json(
- self._search_regex(
- r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'login info', group='value'), None,
- transform_source=lambda x: compat_b64decode(x).decode('utf-8')
- )['extraParams']
-
- login_data.update({
- 'client_id': self._CLIENT_ID,
- 'redirect_uri': self._ORIGIN_URL,
- 'tenant': 'lacausers',
- 'connection': 'Username-Password-ACG-Proxy',
- 'username': username,
- 'password': password,
- 'sso': 'true',
- })
-
- login_state_url = urlh.url
-
- try:
- login_page = self._download_webpage(
- 'https://login.linuxacademy.com/usernamepassword/login', None,
- 'Downloading login page', data=json.dumps(login_data).encode(),
- headers={
- 'Content-Type': 'application/json',
- 'Origin': 'https://login.linuxacademy.com',
- 'Referer': login_state_url,
- })
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 401:
- error = self._parse_json(e.cause.response.read(), None)
- message = error.get('description') or error['code']
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, message), expected=True)
- raise
-
- callback_page, urlh = self._download_webpage_handle(
- 'https://login.linuxacademy.com/login/callback', None,
- 'Downloading callback page',
- data=urlencode_postdata(self._hidden_inputs(login_page)),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Origin': 'https://login.linuxacademy.com',
- 'Referer': login_state_url,
- })
-
- access_token = self._search_regex(
- r'access_token=([^=&]+)', urlh.url,
- 'access token', default=None)
- if not access_token:
- access_token = self._parse_json(
- self._search_regex(
- r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
- 'authorization response'), None,
- transform_source=js_to_json)['response']['access_token']
-
- self._download_webpage(
- 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
- % access_token, None, 'Downloading token validation page')
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
- item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
-
- webpage = self._download_webpage(url, item_id)
-
- # course path
- if course_id:
- module = self._parse_json(
- self._search_regex(
- r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
- item_id)
- entries = []
- chapter_number = None
- chapter = None
- chapter_id = None
- for item in module['items']:
- if not isinstance(item, dict):
- continue
-
- def type_field(key):
- return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
- type_fields = (type_field('name'), type_field('slug'))
- # Move to next module section
- if 'section' in type_fields:
- chapter = item.get('course_name')
- chapter_id = item.get('course_module')
- chapter_number = 1 if not chapter_number else chapter_number + 1
- continue
- # Skip non-lessons
- if 'lesson' not in type_fields:
- continue
- lesson_url = urljoin(url, item.get('url'))
- if not lesson_url:
- continue
- title = item.get('title') or item.get('lesson_name')
- description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
- entries.append({
- '_type': 'url_transparent',
- 'url': lesson_url,
- 'ie_key': LinuxAcademyIE.ie_key(),
- 'title': title,
- 'description': description,
- 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
- 'duration': parse_duration(item.get('duration')),
- 'chapter': chapter,
- 'chapter_id': chapter_id,
- 'chapter_number': chapter_number,
- })
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'id': course_id,
- 'title': module.get('title'),
- 'description': module.get('md_desc') or clean_html(module.get('desc')),
- 'duration': parse_duration(module.get('duration')),
- }
-
- # single video path
- m3u8_url = self._parse_json(
- self._search_regex(
- r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
- item_id)[0]['file']
- formats = self._extract_m3u8_formats(
- m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- info = {
- 'id': item_id,
- 'formats': formats,
- }
- lesson = self._parse_json(
- self._search_regex(
- (r'window\.lesson\s*=\s*({.+?})\s*;',
- r'player\.lesson\s*=\s*({.+?})\s*;'),
- webpage, 'lesson', default='{}'), item_id, fatal=False)
- if lesson:
- info.update({
- 'title': lesson.get('lesson_name'),
- 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
- 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
- 'duration': parse_duration(lesson.get('duration')),
- })
- if not info.get('title'):
- info['title'] = self._search_regex(
- (r'>Lecture\s*:\s*(?P<value>[^<]+)',
- r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
- 'title', group='value')
- return info
+++ /dev/null
-from .common import InfoExtractor
-
-
-class M6IE(InfoExtractor):
- IE_NAME = 'm6'
- _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
-
- _TEST = {
- 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
- 'md5': '242994a87de2c316891428e0176bcb77',
- 'info_dict': {
- 'id': '11323908',
- 'ext': 'mp4',
- 'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
- 'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
- 'duration': 100,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)
+++ /dev/null
-from .common import InfoExtractor
-from .pladform import PladformIE
-from ..utils import (
- unescapeHTML,
- int_or_none,
- ExtractorError,
-)
-
-
-class METAIE(InfoExtractor):
- _VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://video.meta.ua/5502115.video',
- 'md5': '71b6f3ee274bef16f1ab410f7f56b476',
- 'info_dict': {
- 'id': '5502115',
- 'ext': 'mp4',
- 'title': 'Sony Xperia Z camera test [HQ]',
- 'description': 'Xperia Z shoots video in FullHD HDR.',
- 'uploader_id': 'nomobile',
- 'uploader': 'CHЁZA.TV',
- 'upload_date': '20130211',
- },
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'http://video.meta.ua/iframe/5502115',
- 'only_matching': True,
- }, {
- # pladform embed
- 'url': 'http://video.meta.ua/7121015.video',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- st_html5 = self._search_regex(
- r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
-
- if st_html5:
- # uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
- json_str = ''
- for i in range(0, len(st_html5), 3):
- json_str += '�%s;' % st_html5[i:i + 3]
- uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
- error = uppod_data.get('customnotfound')
- if error:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
- video_url = uppod_data['file']
- info = {
- 'id': video_id,
- 'url': video_url,
- 'title': uppod_data.get('comment') or self._og_search_title(webpage),
- 'description': self._og_search_description(webpage, default=None),
- 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
- 'duration': int_or_none(self._og_search_property(
- 'video:duration', webpage, default=None)),
- }
- if 'youtube.com/' in video_url:
- info.update({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- })
- return info
-
- pladform_url = PladformIE._extract_url(webpage)
- if pladform_url:
- return self.url_result(pladform_url)
+++ /dev/null
-import json
-import re
-import urllib.parse
-
-from .common import InfoExtractor
-from ..compat import compat_parse_qs, compat_urllib_parse_unquote
-from ..utils import (
- ExtractorError,
- determine_ext,
- get_element_by_attribute,
- int_or_none,
- mimetype2ext,
-)
-
-
-class MetacafeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<id>[^/]+)/(?P<display_id>[^/?#]+)'
- _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
- _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
- IE_NAME = 'metacafe'
- _TESTS = [
- # Youtube video
- {
- 'add_ie': ['Youtube'],
- 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
- 'info_dict': {
- 'id': '_aUehQsCQtM',
- 'ext': 'mp4',
- 'upload_date': '20090102',
- 'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
- 'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
- 'uploader': 'PBS',
- 'uploader_id': 'PBS'
- }
- },
- # Normal metacafe video
- {
- 'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
- 'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
- 'info_dict': {
- 'id': '11121940',
- 'ext': 'mp4',
- 'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
- 'uploader': 'ign',
- 'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
- },
- 'skip': 'Page is temporarily unavailable.',
- },
- # metacafe video with family filter
- {
- 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
- 'md5': 'b06082c5079bbdcde677a6291fbdf376',
- 'info_dict': {
- 'id': '2155630',
- 'ext': 'mp4',
- 'title': 'Adult Art By David Hart 156',
- 'uploader': '63346',
- 'description': 'md5:9afac8fc885252201ad14563694040fc',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # AnyClip video
- {
- 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
- 'info_dict': {
- 'id': 'an-dVVXnuY7Jh77J',
- 'ext': 'mp4',
- 'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
- 'uploader': 'AnyClip',
- 'description': 'md5:cbef0460d31e3807f6feb4e7a5952e5b',
- },
- },
- # age-restricted video
- {
- 'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
- 'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
- 'info_dict': {
- 'id': '5186653',
- 'ext': 'mp4',
- 'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
- 'uploader': 'Dwayne Pipe',
- 'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
- 'age_limit': 18,
- },
- },
- # cbs video
- {
- 'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
- 'info_dict': {
- 'id': '8VD4r_Zws8VP',
- 'ext': 'flv',
- 'title': 'Open: This is Face the Nation, February 9',
- 'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
- 'duration': 96,
- 'uploader': 'CBSI-NEW',
- 'upload_date': '20140209',
- 'timestamp': 1391959800,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- },
- # Movieclips.com video
- {
- 'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
- 'info_dict': {
- 'id': 'mv-Wy7ZU',
- 'ext': 'mp4',
- 'title': 'My Week with Marilyn - Do You Love Me?',
- 'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
- 'uploader': 'movie_trailers',
- 'duration': 176,
- },
- 'params': {
- 'skip_download': 'requires rtmpdump',
- }
- }
- ]
-
- def report_disclaimer(self):
- self.to_screen('Retrieving disclaimer')
-
- def _real_extract(self, url):
- # Extract id and simplified title from URL
- video_id, display_id = self._match_valid_url(url).groups()
-
- # the video may come from an external site
- m_external = re.match(r'^(\w{2})-(.*)$', video_id)
- if m_external is not None:
- prefix, ext_id = m_external.groups()
- # Check if video comes from YouTube
- if prefix == 'yt':
- return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
- # CBS videos use theplatform.com
- if prefix == 'cb':
- return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
-
- headers = {
- # Disable family filter
- 'Cookie': 'user=%s; ' % urllib.parse.quote(json.dumps({'ffilter': False}))
- }
-
- # AnyClip videos require the flashversion cookie so that we get the link
- # to the mp4 file
- if video_id.startswith('an-'):
- headers['Cookie'] += 'flashVersion=0; '
-
- # Retrieve video webpage to extract further information
- webpage = self._download_webpage(url, video_id, headers=headers)
-
- error = get_element_by_attribute(
- 'class', 'notfound-page-title', webpage)
- if error:
- raise ExtractorError(error, expected=True)
-
- video_title = self._html_search_meta(
- ['og:title', 'twitter:title'], webpage, 'title', default=None) or self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
-
- # Extract URL, uploader and title from webpage
- self.report_extraction(video_id)
- video_url = None
- mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage)
- if mobj is not None:
- mediaURL = compat_urllib_parse_unquote(mobj.group(1))
- video_ext = determine_ext(mediaURL)
-
- # Extract gdaKey if available
- mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
- if mobj is None:
- video_url = mediaURL
- else:
- gdaKey = mobj.group(1)
- video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
- if video_url is None:
- mobj = re.search(r'<video src="([^"]+)"', webpage)
- if mobj:
- video_url = mobj.group(1)
- video_ext = 'mp4'
- if video_url is None:
- flashvars = self._search_regex(
- r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
- default=None)
- if flashvars:
- vardict = compat_parse_qs(flashvars)
- if 'mediaData' not in vardict:
- raise ExtractorError('Unable to extract media URL')
- mobj = re.search(
- r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
- if mobj is None:
- raise ExtractorError('Unable to extract media URL')
- mediaURL = mobj.group('mediaURL').replace('\\/', '/')
- video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
- video_ext = determine_ext(video_url)
- if video_url is None:
- player_url = self._search_regex(
- r"swfobject\.embedSWF\('([^']+)'",
- webpage, 'config URL', default=None)
- if player_url:
- config_url = self._search_regex(
- r'config=(.+)$', player_url, 'config URL')
- config_doc = self._download_xml(
- config_url, video_id,
- note='Downloading video config')
- smil_url = config_doc.find('.//properties').attrib['smil_file']
- smil_doc = self._download_xml(
- smil_url, video_id,
- note='Downloading SMIL document')
- base_url = smil_doc.find('./head/meta').attrib['base']
- video_url = []
- for vn in smil_doc.findall('.//video'):
- br = int(vn.attrib['system-bitrate'])
- play_path = vn.attrib['src']
- video_url.append({
- 'format_id': 'smil-%d' % br,
- 'url': base_url,
- 'play_path': play_path,
- 'page_url': url,
- 'player_url': player_url,
- 'ext': play_path.partition(':')[0],
- })
- if video_url is None:
- flashvars = self._parse_json(self._search_regex(
- r'flashvars\s*=\s*({.*});', webpage, 'flashvars',
- default=None), video_id, fatal=False)
- if flashvars:
- video_url = []
- for source in flashvars.get('sources'):
- source_url = source.get('src')
- if not source_url:
- continue
- ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
- if ext == 'm3u8':
- video_url.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- video_url.append({
- 'url': source_url,
- 'ext': ext,
- })
-
- if video_url is None:
- raise ExtractorError('Unsupported video type')
-
- description = self._html_search_meta(
- ['og:description', 'twitter:description', 'description'],
- webpage, 'title', fatal=False)
- thumbnail = self._html_search_meta(
- ['og:image', 'twitter:image'], webpage, 'title', fatal=False)
- video_uploader = self._html_search_regex(
- r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
- webpage, 'uploader nickname', fatal=False)
- duration = int_or_none(
- self._html_search_meta('video:duration', webpage, default=None))
- age_limit = (
- 18
- if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
- else 0)
-
- if isinstance(video_url, list):
- formats = video_url
- else:
- formats = [{
- 'url': video_url,
- 'ext': video_ext,
- }]
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'description': description,
- 'uploader': video_uploader,
- 'title': video_title,
- 'thumbnail': thumbnail,
- 'age_limit': age_limit,
- 'formats': formats,
- 'duration': duration,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- qualities,
- unified_strdate,
-)
-
-
-class MgoonIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?
- (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)|
- video\.mgoon\.com)/(?P<id>[0-9]+)'''
- _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}'
- _TESTS = [
- {
- 'url': 'http://m.mgoon.com/ch/hi6618/v/5582148',
- 'md5': 'dd46bb66ab35cf6d51cc812fd82da79d',
- 'info_dict': {
- 'id': '5582148',
- 'uploader_id': 'hi6618',
- 'duration': 240.419,
- 'upload_date': '20131220',
- 'ext': 'mp4',
- 'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- },
- {
- 'url': 'http://www.mgoon.com/play/view/5582148',
- 'only_matching': True,
- },
- {
- 'url': 'http://video.mgoon.com/5582148',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
-
- data = self._download_json(self._API_URL.format(video_id), video_id)
-
- if data.get('errorInfo', {}).get('code') != 'NONE':
- raise ExtractorError('%s encountered an error: %s' % (
- self.IE_NAME, data['errorInfo']['message']), expected=True)
-
- v_info = data['videoInfo']
- title = v_info.get('v_title')
- thumbnail = v_info.get('v_thumbnail')
- duration = v_info.get('v_duration')
- upload_date = unified_strdate(v_info.get('v_reg_date'))
- uploader_id = data.get('userInfo', {}).get('u_alias')
- if duration:
- duration /= 1000.0
-
- age_limit = None
- if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT':
- age_limit = 18
-
- formats = []
- get_quality = qualities(['360p', '480p', '720p', '1080p'])
- for fmt in data['videoFiles']:
- formats.append({
- 'format_id': fmt['label'],
- 'quality': get_quality(fmt['label']),
- 'url': fmt['url'],
- 'ext': fmt['format'],
-
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'upload_date': upload_date,
- 'uploader_id': uploader_id,
- 'age_limit': age_limit,
- }
+++ /dev/null
-import random
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..networking import Request
-from ..utils import ExtractorError, int_or_none, xpath_text
-
-
-class MioMioIE(InfoExtractor):
- IE_NAME = 'miomio.tv'
- _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
- _TESTS = [{
- # "type=video" in flashvars
- 'url': 'http://www.miomio.tv/watch/cc88912/',
- 'info_dict': {
- 'id': '88912',
- 'ext': 'flv',
- 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
- 'duration': 5923,
- },
- 'skip': 'Unable to load videos',
- }, {
- 'url': 'http://www.miomio.tv/watch/cc184024/',
- 'info_dict': {
- 'id': '43729',
- 'title': '《动漫同人插画绘制》',
- },
- 'playlist_mincount': 86,
- 'skip': 'Unable to load videos',
- }, {
- 'url': 'http://www.miomio.tv/watch/cc173113/',
- 'info_dict': {
- 'id': '173113',
- 'title': 'The New Macbook 2015 上手试玩与简评'
- },
- 'playlist_mincount': 2,
- 'skip': 'Unable to load videos',
- }, {
- # new 'h5' player
- 'url': 'http://www.miomio.tv/watch/cc273997/',
- 'md5': '0b27a4b4495055d826813f8c3a6b2070',
- 'info_dict': {
- 'id': '273997',
- 'ext': 'mp4',
- 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
- },
- 'skip': 'Unable to load videos',
- }]
-
- def _extract_mioplayer(self, webpage, video_id, title, http_headers):
- xml_config = self._search_regex(
- r'flashvars="type=(?:sina|video)&(.+?)&',
- webpage, 'xml config')
-
- # skipping the following page causes lags and eventually connection drop-outs
- self._request_webpage(
- 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
- video_id)
-
- vid_config_request = Request(
- 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
- headers=http_headers)
-
- # the following xml contains the actual configuration information on the video file(s)
- vid_config = self._download_xml(vid_config_request, video_id)
-
- if not int_or_none(xpath_text(vid_config, 'timelength')):
- raise ExtractorError('Unable to load videos!', expected=True)
-
- entries = []
- for f in vid_config.findall('./durl'):
- segment_url = xpath_text(f, 'url', 'video url')
- if not segment_url:
- continue
- order = xpath_text(f, 'order', 'order')
- segment_id = video_id
- segment_title = title
- if order:
- segment_id += '-%s' % order
- segment_title += ' part %s' % order
- entries.append({
- 'id': segment_id,
- 'url': segment_url,
- 'title': segment_title,
- 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
- 'http_headers': http_headers,
- })
-
- return entries
-
- def _download_chinese_webpage(self, *args, **kwargs):
- # Requests with English locales return garbage
- headers = {
- 'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3',
- }
- kwargs.setdefault('headers', {}).update(headers)
- return self._download_webpage(*args, **kwargs)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_chinese_webpage(
- url, video_id)
-
- title = self._html_search_meta(
- 'description', webpage, 'title', fatal=True)
-
- mioplayer_path = self._search_regex(
- r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
-
- if '_h5' in mioplayer_path:
- player_url = compat_urlparse.urljoin(url, mioplayer_path)
- player_webpage = self._download_chinese_webpage(
- player_url, video_id,
- note='Downloading player webpage', headers={'Referer': url})
- entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
- http_headers = {'Referer': player_url}
- else:
- http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
- entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
-
- if len(entries) == 1:
- segment = entries[0]
- segment['id'] = video_id
- segment['title'] = title
- segment['http_headers'] = http_headers
- return segment
-
- return {
- '_type': 'multi_video',
- 'id': video_id,
- 'entries': entries,
- 'title': title,
- 'http_headers': http_headers,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_iso8601,
-)
-
-
-class MnetIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.mnet.com/tv/vod/171008',
- 'info_dict': {
- 'id': '171008',
- 'title': 'SS_이해인@히든박스',
- 'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55',
- 'duration': 88,
- 'upload_date': '20151231',
- 'timestamp': 1451564040,
- 'age_limit': 0,
- 'thumbnails': 'mincount:5',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'ext': 'flv',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://mnet.interest.me/tv/vod/172790',
- 'only_matching': True,
- }, {
- 'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- # TODO: extract rtmp formats
- # no stype -> rtmp url
- # stype=H -> m3u8 url
- # stype=M -> mpd url
- info = self._download_json(
- 'http://content.api.mnet.com/player/vodConfig',
- video_id, 'Downloading vod config JSON', query={
- 'id': video_id,
- 'ctype': 'CLIP',
- 'stype': 'H',
- })['data']['info']
-
- title = info['title']
-
- cdn_data = self._download_json(
- info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0]
- m3u8_url = cdn_data['url']
- token = cdn_data.get('token')
- if token and token != '-':
- m3u8_url += '?' + token
- formats = self._extract_wowza_formats(
- m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
-
- description = info.get('ment')
- duration = parse_duration(info.get('time'))
- timestamp = parse_iso8601(info.get('date'), delimiter=' ')
- age_limit = info.get('adult')
- if age_limit is not None:
- age_limit = 0 if age_limit == 'N' else 18
- thumbnails = [{
- 'id': thumb_format,
- 'url': thumb['url'],
- 'width': int_or_none(thumb.get('width')),
- 'height': int_or_none(thumb.get('height')),
- } for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'age_limit': age_limit,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
-)
-
-
-class MoeVideoIE(InfoExtractor):
- IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
- _VALID_URL = r'''(?x)
- https?://(?P<host>(?:www\.)?
- (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
- (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
- _API_URL = 'http://api.letitbit.net/'
- _API_KEY = 'tVL0gjqo5'
- _TESTS = [
- {
- 'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
- 'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
- 'info_dict': {
- 'id': '00297.0036103fe3d513ef27915216fd29',
- 'ext': 'flv',
- 'title': 'Sink cut out machine',
- 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'width': 540,
- 'height': 360,
- 'duration': 179,
- 'filesize': 17822500,
- },
- 'skip': 'Video has been removed',
- },
- {
- 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
- 'md5': '74f0a014d5b661f0f0e2361300d1620e',
- 'info_dict': {
- 'id': '77107.7f325710a627383d40540d8e991a',
- 'ext': 'flv',
- 'title': 'Operacion Condor.',
- 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'width': 480,
- 'height': 296,
- 'duration': 6027,
- 'filesize': 588257923,
- },
- 'skip': 'Video has been removed',
- },
- ]
-
- def _real_extract(self, url):
- host, video_id = self._match_valid_url(url).groups()
-
- webpage = self._download_webpage(
- 'http://%s/video/%s' % (host, video_id),
- video_id, 'Downloading webpage')
-
- title = self._og_search_title(webpage)
-
- embed_webpage = self._download_webpage(
- 'http://%s/embed/%s' % (host, video_id),
- video_id, 'Downloading embed webpage')
- video = self._parse_json(self._search_regex(
- r'mvplayer\("#player"\s*,\s*({.+})',
- embed_webpage, 'mvplayer'), video_id)['video']
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
- 'description': clean_html(self._og_search_description(webpage)),
- 'duration': int_or_none(self._og_search_property('video:duration', webpage)),
- 'url': video['ourUrl'],
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- str_to_int,
- unified_strdate,
-)
-from .keezmovies import KeezMoviesIE
-
-
-class MofosexIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
- _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
- _TESTS = [{
- 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
- 'md5': '558fcdafbb63a87c019218d6e49daf8a',
- 'info_dict': {
- 'id': '318131',
- 'display_id': 'amateur-teen-playing-and-masturbating-318131',
- 'ext': 'mp4',
- 'title': 'amateur teen playing and masturbating',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20121114',
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'age_limit': 18,
- }
- }, {
- # This video is no longer available
- 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- webpage, info = self._extract_info(url)
-
- view_count = str_to_int(self._search_regex(
- r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
- like_count = int_or_none(self._search_regex(
- r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
- 'like count', fatal=False))
- dislike_count = int_or_none(self._search_regex(
- r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
- 'like count', fatal=False))
- upload_date = unified_strdate(self._html_search_regex(
- r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
-
- info.update({
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'upload_date': upload_date,
- 'thumbnail': self._og_search_thumbnail(webpage),
- })
-
- return info
-
-
-class MofosexEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
- _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)']
- _TESTS = [{
- 'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self.url_result(
- 'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
- ie=MofosexIE.ie_key(), video_id=video_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- smuggle_url,
- float_or_none,
- parse_iso8601,
- update_url_query,
-)
-
-
-class MovieClipsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
- _TEST = {
- 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
- 'md5': '42b5a0352d4933a7bd54f2104f481244',
- 'info_dict': {
- 'id': 'pKIGmG83AqD9',
- 'ext': 'mp4',
- 'title': 'Warcraft Trailer 1',
- 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1446843055,
- 'upload_date': '20151106',
- 'uploader': 'Movieclips',
- },
- 'add_ie': ['ThePlatform'],
- 'skip': 'redirects to YouTube',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- video = next(v for v in self._parse_json(self._search_regex(
- r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
- webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
-
- return {
- '_type': 'url_transparent',
- 'ie_key': 'ThePlatform',
- 'url': smuggle_url(update_url_query(
- video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
- 'title': self._og_search_title(webpage),
- 'description': self._html_search_meta('description', webpage),
- 'duration': float_or_none(video.get('duration')),
- 'timestamp': parse_iso8601(video.get('dateCreated')),
- 'thumbnail': video.get('defaultImage'),
- 'uploader': video.get('provider'),
- }
class MSNIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_duration,
-)
-
-
-class MwaveIE(InfoExtractor):
- _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
- _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
- _TESTS = [{
- 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
- # md5 is unstable
- 'info_dict': {
- 'id': '168859',
- 'ext': 'flv',
- 'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'M COUNTDOWN',
- 'duration': 206,
- 'view_count': int,
- }
- }, {
- 'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- vod_info = self._download_json(
- 'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id,
- video_id, 'Download vod JSON')
-
- formats = []
- for num, cdn_info in enumerate(vod_info['cdn']):
- stream_url = cdn_info.get('url')
- if not stream_url:
- continue
- stream_name = cdn_info.get('name') or compat_str(num)
- f4m_stream = self._download_json(
- stream_url, video_id,
- 'Download %s stream JSON' % stream_name)
- f4m_url = f4m_stream.get('fileurl')
- if not f4m_url:
- continue
- formats.extend(
- self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
-
- return {
- 'id': video_id,
- 'title': vod_info['title'],
- 'thumbnail': vod_info.get('cover'),
- 'uploader': vod_info.get('program_title'),
- 'duration': parse_duration(vod_info.get('time')),
- 'view_count': int_or_none(vod_info.get('hit')),
- 'formats': formats,
- }
-
-
-class MwaveMeetGreetIE(InfoExtractor):
- _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://mwave.interest.me/meetgreet/view/256',
- 'info_dict': {
- 'id': '173294',
- 'ext': 'flv',
- 'title': '[MEET&GREET] Park BoRam',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Mwave',
- 'duration': 3634,
- 'view_count': int,
- }
- }, {
- 'url': 'http://mwave.interest.me/en/meetgreet/view/256',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- clip_id = self._html_search_regex(
- r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)',
- webpage, 'clip ID')
- clip_url = MwaveIE._URL_TEMPLATE % clip_id
- return self.url_result(clip_url, 'Mwave', clip_id)
+++ /dev/null
-from .common import InfoExtractor
-
-
-class MyChannelsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?mychannels\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416',
- 'md5': 'b8993daad4262dd68d89d651c0c52c45',
- 'info_dict': {
- 'id': 'wUUDZZep6vQD',
- 'ext': 'mp4',
- 'title': 'Miss Holland joins VOTE LEAVE',
- 'description': 'Miss Holland | #13 Not a potato',
- 'uploader': 'Miss Holland',
- }
- }
-
- def _real_extract(self, url):
- id_type, url_id = self._match_valid_url(url).groups()
- webpage = self._download_webpage(url, url_id)
- video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data')
-
- def extract_data_val(attr, fatal=False):
- return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
- minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
-
- return {
- '_type': 'url_transparent',
- 'url': 'minoto:%s' % minoto_id,
- 'id': url_id,
- 'title': extract_data_val('title', True),
- 'description': extract_data_val('description'),
- 'thumbnail': extract_data_val('image'),
- 'uploader': extract_data_val('channel'),
- }
+++ /dev/null
-from .common import InfoExtractor
-from .vimple import SprutoBaseIE
-
-
-class MyviIE(SprutoBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- https?://
- (?:www\.)?
- myvi\.
- (?:
- (?:ru/player|tv)/
- (?:
- (?:
- embed/html|
- flash|
- api/Video/Get
- )/|
- content/preloader\.swf\?.*\bid=
- )|
- ru/watch/
- )|
- myvi:
- )
- (?P<id>[\da-zA-Z_-]+)
- '''
- _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1']
- _TESTS = [{
- 'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
- 'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
- 'info_dict': {
- 'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
- 'ext': 'mp4',
- 'title': 'хозяин жизни',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 25,
- },
- }, {
- 'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
- 'only_matching': True,
- }, {
- 'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
- 'only_matching': True,
- }, {
- 'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
- 'only_matching': True,
- }, {
- 'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
- 'only_matching': True,
- }, {
- 'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
- 'only_matching': True,
- }, {
- 'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- spruto = self._download_json(
- 'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
-
- return self._extract_spruto(spruto, video_id)
-
-
-class MyviEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
- _TESTS = [{
- 'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
- 'info_dict': {
- 'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
- 'ext': 'mp4',
- 'title': 'Твоя (original song).mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 277,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://www.myvi.tv/embed/%s' % video_id, video_id)
-
- myvi_id = self._search_regex(
- r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
- webpage, 'video id')
-
- return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())
+++ /dev/null
-import base64
-import hashlib
-
-from .common import InfoExtractor
-from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
-from ..utils import (
- int_or_none,
- parse_codecs,
- parse_duration,
-)
-
-
-class NewstubeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
- _TEST = {
- 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
- 'md5': '9d10320ad473444352f72f746ccb8b8c',
- 'info_dict': {
- 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
- 'ext': 'mp4',
- 'title': 'Телеканал CNN переместил город Славянск в Крым',
- 'description': 'md5:419a8c9f03442bc0b0a794d689360335',
- 'duration': 31.05,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- page = self._download_webpage(url, video_id)
- title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True)
-
- video_guid = self._html_search_regex(
- r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
- page, 'video GUID')
-
- enc_data = base64.b64decode(self._download_webpage(
- 'https://www.newstube.ru/embed/api/player/getsources2',
- video_guid, query={
- 'guid': video_guid,
- 'ff': 3,
- }))
- key = hashlib.pbkdf2_hmac(
- 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16]
- dec_data = unpad_pkcs7(aes_cbc_decrypt_bytes(enc_data[32:], key, enc_data[16:32]))
- sources = self._parse_json(dec_data, video_guid)
-
- formats = []
- for source in sources:
- source_url = source.get('Src')
- if not source_url:
- continue
- height = int_or_none(source.get('Height'))
- f = {
- 'format_id': 'http' + ('-%dp' % height if height else ''),
- 'url': source_url,
- 'width': int_or_none(source.get('Width')),
- 'height': height,
- }
- source_type = source.get('Type')
- if source_type:
- f.update(parse_codecs(self._search_regex(
- r'codecs="([^"]+)"', source_type, 'codecs', fatal=False)))
- formats.append(f)
-
- self._check_formats(formats, video_guid)
-
- return {
- 'id': video_guid,
- 'title': title,
- 'description': self._html_search_meta(['description', 'og:description'], page),
- 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page),
- 'duration': parse_duration(self._html_search_meta('duration', page)),
- 'formats': formats,
- }
return self._remove_template_parameter(config['feedWithQueryParams'])
-class NickNightIE(NickDeIE): # XXX: Do not subclass from concrete IE
- IE_NAME = 'nicknight'
- _VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
- 'only_matching': True,
- }, {
- 'url': 'http://www.nicknight.at/shows/977-awkward',
- 'only_matching': True,
- }, {
- 'url': 'http://www.nicknight.at/shows/1900-faking-it',
- 'only_matching': True,
- }]
-
- def _extract_mrss_url(self, webpage, *args):
- return self._search_regex(
- r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
- 'mrss url', group='url')
-
-
class NickRuIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeonru'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+++ /dev/null
-from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-
-from ..utils import (
- unified_strdate,
-)
-
-
-class NormalbootsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
- _TEST = {
- 'url': 'http://normalboots.com/video/home-alone-games-jontron/',
- 'info_dict': {
- 'id': 'home-alone-games-jontron',
- 'ext': 'mp4',
- 'title': 'Home Alone Games - JonTron - NormalBoots',
- 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
- 'uploader': 'JonTron',
- 'upload_date': '20140125',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['JWPlatform'],
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_uploader = self._html_search_regex(
- r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
- webpage, 'uploader', fatal=False)
- video_upload_date = unified_strdate(self._html_search_regex(
- r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
- webpage, 'date', fatal=False))
-
- jwplatform_url = JWPlatformIE._extract_url(webpage)
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': jwplatform_url,
- 'ie_key': JWPlatformIE.ie_key(),
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader': video_uploader,
- 'upload_date': video_upload_date,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..networking import Request
-from ..utils import (
- ExtractorError,
- urlencode_postdata,
- xpath_text,
- xpath_with_ns,
-)
-
-_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
-
-
-class NosVideoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
- r'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
- _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
- _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
- _TEST = {
- 'url': 'http://nosvideo.com/?v=mu8fle7g7rpq',
- 'md5': '6124ed47130d8be3eacae635b071e6b6',
- 'info_dict': {
- 'id': 'mu8fle7g7rpq',
- 'ext': 'mp4',
- 'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- fields = {
- 'id': video_id,
- 'op': 'download1',
- 'method_free': 'Continue to Video',
- }
- req = Request(url, urlencode_postdata(fields))
- req.headers['Content-type'] = 'application/x-www-form-urlencoded'
- webpage = self._download_webpage(req, video_id,
- 'Downloading download page')
- if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id,
- expected=True)
-
- xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
- playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
- playlist = self._download_xml(playlist_url, video_id)
-
- track = playlist.find(_x('.//xspf:track'))
- if track is None:
- raise ExtractorError(
- 'XML playlist is missing the \'track\' element',
- expected=True)
- title = xpath_text(track, _x('./xspf:title'), 'title')
- url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
- thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
- if title is not None:
- title = title.strip()
-
- formats = [{
- 'format_id': 'sd',
- 'url': url,
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
class NRLTVIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
+++ /dev/null
-import base64
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_str,
-)
-from ..utils import (
- determine_ext,
- float_or_none,
- int_or_none,
- smuggle_url,
- try_get,
- unsmuggle_url,
-)
-
-
-class OoyalaBaseIE(InfoExtractor):
- _PLAYER_BASE = 'http://player.ooyala.com/'
- _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
- _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s'
-
- def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None):
- content_tree = self._download_json(content_tree_url, video_id)['content_tree']
- metadata = content_tree[list(content_tree)[0]]
- embed_code = metadata['embed_code']
- pcode = metadata.get('asset_pcode') or embed_code
- title = metadata['title']
-
- auth_data = self._download_json(
- self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code),
- video_id, headers=self.geo_verification_headers(), query={
- 'domain': domain or 'player.ooyala.com',
- 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
- 'embedToken': embed_token,
- })['authorization_data'][embed_code]
-
- urls = []
- formats = []
- streams = auth_data.get('streams') or [{
- 'delivery_type': 'hls',
- 'url': {
- 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(),
- }
- }]
- for stream in streams:
- url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
- if not url_data:
- continue
- s_url = compat_b64decode(url_data).decode('utf-8')
- if not s_url or s_url in urls:
- continue
- urls.append(s_url)
- ext = determine_ext(s_url, None)
- delivery_type = stream.get('delivery_type')
- if delivery_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif delivery_type == 'hds' or ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
- elif delivery_type == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- s_url, embed_code, mpd_id='dash', fatal=False))
- elif delivery_type == 'smooth':
- self._extract_ism_formats(
- s_url, embed_code, ism_id='mss', fatal=False)
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- s_url, embed_code, fatal=False))
- else:
- formats.append({
- 'url': s_url,
- 'ext': ext or delivery_type,
- 'vcodec': stream.get('video_codec'),
- 'format_id': delivery_type,
- 'width': int_or_none(stream.get('width')),
- 'height': int_or_none(stream.get('height')),
- 'abr': int_or_none(stream.get('audio_bitrate')),
- 'vbr': int_or_none(stream.get('video_bitrate')),
- 'fps': float_or_none(stream.get('framerate')),
- })
- if not formats and not auth_data.get('authorized'):
- self.raise_no_formats('%s said: %s' % (
- self.IE_NAME, auth_data['message']), expected=True)
-
- subtitles = {}
- for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
- sub_url = sub.get('url')
- if not sub_url:
- continue
- subtitles[lang] = [{
- 'url': sub_url,
- }]
-
- return {
- 'id': embed_code,
- 'title': title,
- 'description': metadata.get('description'),
- 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
- 'duration': float_or_none(metadata.get('duration'), 1000),
- 'subtitles': subtitles,
- 'formats': formats,
- }
-
-
-class OoyalaIE(OoyalaBaseIE):
- _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
-
- _TESTS = [
- {
- # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
- 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- 'info_dict': {
- 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- 'ext': 'mp4',
- 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
- 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
- 'duration': 853.386,
- },
- # The video in the original webpage now uses PlayWire
- 'skip': 'Ooyala said: movie expired',
- }, {
- # Only available for ipad
- 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
- 'info_dict': {
- 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
- 'ext': 'mp4',
- 'title': 'Simulation Overview - Levels of Simulation',
- 'duration': 194.948,
- },
- },
- {
- # Information available only through SAS api
- # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
- 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
- 'md5': 'a84001441b35ea492bc03736e59e7935',
- 'info_dict': {
- 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
- 'ext': 'mp4',
- 'title': 'Divide Tool Path.mp4',
- 'duration': 204.405,
- }
- },
- {
- # empty stream['url']['data']
- 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
- 'only_matching': True,
- }
- ]
-
- def _extract_from_webpage(self, url, webpage):
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
- or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
- or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
- if mobj is not None:
- embed_token = self._search_regex(
- r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
- webpage, 'ooyala embed token', default=None)
- yield self._build_url_result(smuggle_url(
- mobj.group('ec'), {
- 'domain': url,
- 'embed_token': embed_token,
- }))
- return
-
- # Look for multiple Ooyala embeds on SBN network websites
- mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
- if mobj is not None:
- for v in self._parse_json(mobj.group(1), self._generic_id(url), fatal=False) or []:
- yield self._build_url_result(smuggle_url(v['provider_video_id'], {'domain': url}))
-
- @staticmethod
- def _url_for_embed_code(embed_code):
- return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
-
- @classmethod
- def _build_url_result(cls, embed_code):
- return cls.url_result(cls._url_for_embed_code(embed_code),
- ie=cls.ie_key())
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- embed_code = self._match_id(url)
- domain = smuggled_data.get('domain')
- supportedformats = smuggled_data.get('supportedformats')
- embed_token = smuggled_data.get('embed_token')
- content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
- return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token)
-
-
-class OoyalaExternalIE(OoyalaBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- ooyalaexternal:|
- https?://.+?\.ooyala\.com/.*?\bexternalId=
- )
- (?P<partner_id>[^:]+)
- :
- (?P<id>.+)
- (?:
- :|
- .*?&pcode=
- )
- (?P<pcode>.+?)
- (?:&|$)
- '''
-
- _TEST = {
- 'url': 'https://player.ooyala.com/player.js?externalId=espn:10365079&pcode=1kNG061cgaoolOncv54OAO1ceO-I&adSetCode=91cDU6NuXTGKz3OdjOxFdAgJVtQcKJnI&callback=handleEvents&hasModuleParams=1&height=968&playerBrandingId=7af3bd04449c444c964f347f11873075&targetReplaceId=videoPlayer&width=1656&wmode=opaque&allowScriptAccess=always',
- 'info_dict': {
- 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
- 'ext': 'mp4',
- 'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
- 'duration': 1302.0,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- partner_id, video_id, pcode = self._match_valid_url(url).groups()
- content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id)
- return self._extract(content_tree_url, video_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- float_or_none,
- parse_duration,
- parse_qs,
- str_to_int,
- urlencode_postdata,
-)
-
-
-class PandoraTVIE(InfoExtractor):
- IE_NAME = 'pandora.tv'
- IE_DESC = '판도라TV'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:www\.)?pandora\.tv/view/(?P<user_id>[^/]+)/(?P<id>\d+)| # new format
- (?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?| # old format
- m\.pandora\.tv/?\? # mobile
- )
- '''
- _TESTS = [{
- 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
- 'info_dict': {
- 'id': '53294230',
- 'ext': 'flv',
- 'title': '頭を撫でてくれる?',
- 'description': '頭を撫でてくれる?',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 39,
- 'upload_date': '20151218',
- 'uploader': 'カワイイ動物まとめ',
- 'uploader_id': 'mikakim',
- 'view_count': int,
- 'like_count': int,
- }
- }, {
- 'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
- 'info_dict': {
- 'id': '54721744',
- 'ext': 'flv',
- 'title': '[HD] JAPAN COUNTDOWN 170423',
- 'description': '[HD] JAPAN COUNTDOWN 170423',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1704.9,
- 'upload_date': '20170423',
- 'uploader': 'GOGO_UCC',
- 'uploader_id': 'gogoucc',
- 'view_count': int,
- 'like_count': int,
- },
- 'params': {
- # Test metadata only
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.pandora.tv/view/mikakim/53294230#36797454_new',
- 'only_matching': True,
- }, {
- 'url': 'http://m.pandora.tv/?c=view&ch_userid=mikakim&prgid=54600346',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- user_id = mobj.group('user_id')
- video_id = mobj.group('id')
-
- if not user_id or not video_id:
- qs = parse_qs(url)
- video_id = qs.get('prgid', [None])[0]
- user_id = qs.get('ch_userid', [None])[0]
- if any(not f for f in (video_id, user_id,)):
- raise ExtractorError('Invalid URL', expected=True)
-
- data = self._download_json(
- 'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s'
- % (user_id, video_id), video_id)
-
- info = data['data']['rows']['vod_play_info']['result']
-
- formats = []
- for format_id, format_url in info.items():
- if not format_url:
- continue
- height = self._search_regex(
- r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
- if not height:
- continue
-
- play_url = self._download_json(
- 'http://m.pandora.tv/?c=api&m=play_url', video_id,
- data=urlencode_postdata({
- 'prgid': video_id,
- 'runtime': info.get('runtime'),
- 'vod_url': format_url,
- }),
- headers={
- 'Origin': url,
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
- format_url = play_url.get('url')
- if not format_url:
- continue
-
- formats.append({
- 'format_id': '%sp' % height,
- 'url': format_url,
- 'height': int(height),
- })
-
- return {
- 'id': video_id,
- 'title': info['subject'],
- 'description': info.get('body'),
- 'thumbnail': info.get('thumbnail') or info.get('poster'),
- 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
- 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
- 'uploader': info.get('nickname'),
- 'uploader_id': info.get('upload_userid'),
- 'view_count': str_to_int(info.get('hit')),
- 'like_count': str_to_int(info.get('likecnt')),
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-
-
-class PeopleIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html'
-
- _TEST = {
- 'url': 'http://www.people.com/people/videos/0,,20995451,00.html',
- 'info_dict': {
- 'id': 'ref:20995451',
- 'ext': 'mp4',
- 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”',
- 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 246.318,
- 'timestamp': 1458720585,
- 'upload_date': '20160323',
- 'uploader_id': '416418724',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['BrightcoveNew'],
- }
-
- def _real_extract(self, url):
- return self.url_result(
- 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s'
- % self._match_id(url), 'BrightcoveNew')
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_iso8601,
-)
-
-
-class PlayFMIE(InfoExtractor):
- IE_NAME = 'play.fm'
- _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
-
- _TEST = {
- 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
- 'md5': 'c505f8307825a245d0c7ad1850001f22',
- 'info_dict': {
- 'id': '71276',
- 'ext': 'mp3',
- 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
- 'description': '',
- 'duration': 5627,
- 'timestamp': 1406033781,
- 'upload_date': '20140722',
- 'uploader': 'Dan Drastic',
- 'uploader_id': '71170',
- 'view_count': int,
- 'comment_count': int,
- },
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- slug = mobj.group('slug')
-
- recordings = self._download_json(
- 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
-
- error = recordings.get('error')
- if isinstance(error, dict):
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error.get('message')),
- expected=True)
-
- audio_url = recordings['audio']
- video_id = compat_str(recordings.get('id') or video_id)
- title = recordings['title']
- description = recordings.get('description')
- duration = int_or_none(recordings.get('recordingDuration'))
- timestamp = parse_iso8601(recordings.get('created_at'))
- uploader = recordings.get('page', {}).get('title')
- uploader_id = compat_str(recordings.get('page', {}).get('id'))
- view_count = int_or_none(recordings.get('playCount'))
- comment_count = int_or_none(recordings.get('commentCount'))
- categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
-
- return {
- 'id': video_id,
- 'url': audio_url,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'categories': categories,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class PlaysTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
- _TESTS = [{
- 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
- 'md5': 'dfeac1198506652b5257a62762cec7bc',
- 'info_dict': {
- 'id': '56af17f56c95335490',
- 'ext': 'mp4',
- 'title': 'Bjergsen - When you outplay the Azir wall',
- 'description': 'Posted by Bjergsen',
- }
- }, {
- 'url': 'https://plays.tv/embeds/56af17f56c95335490',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'https://plays.tv/video/%s' % video_id, video_id)
-
- info = self._search_json_ld(webpage, video_id,)
-
- mpd_url, sources = re.search(
- r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
- webpage).groups()
- formats = self._extract_mpd_formats(
- self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
- for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
- formats.append({
- 'url': self._proto_relative_url(format_url),
- 'format_id': 'http-' + format_id,
- 'height': int_or_none(height),
- })
-
- info.update({
- 'id': video_id,
- 'description': self._og_search_description(webpage),
- 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
- 'formats': formats,
- })
-
- return info
+++ /dev/null
-import re
-import urllib.parse
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-from ..utils import ExtractorError, clean_html
-
-
-class PlayvidIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
- _TESTS = [{
- 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
- 'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
- 'info_dict': {
- 'id': 'RnmBNgtrrJu',
- 'ext': 'mp4',
- 'title': 'md5:9256d01c6317e3f703848b5906880dc8',
- 'duration': 82,
- 'age_limit': 18,
- },
- 'skip': 'Video removed due to ToS',
- }, {
- 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
- 'md5': '39d49df503ad7b8f23a4432cbf046477',
- 'info_dict': {
- 'id': 'hwb0GpNkzgH',
- 'ext': 'mp4',
- 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
- 'age_limit': 18,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- m_error = re.search(
- r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
- if m_error:
- raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
-
- video_title = None
- duration = None
- video_thumbnail = None
- formats = []
-
- # most of the information is stored in the flashvars
- flashvars = self._html_search_regex(
- r'flashvars="(.+?)"', webpage, 'flashvars')
-
- infos = compat_urllib_parse_unquote(flashvars).split(r'&')
- for info in infos:
- videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
- if videovars_match:
- key = videovars_match.group(1)
- val = videovars_match.group(2)
-
- if key == 'title':
- video_title = urllib.parse.unquote_plus(val)
- if key == 'duration':
- try:
- duration = int(val)
- except ValueError:
- pass
- if key == 'big_thumb':
- video_thumbnail = val
-
- videourl_match = re.match(
- r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
- if videourl_match:
- height = int(videourl_match.group('resolution'))
- formats.append({
- 'height': height,
- 'url': val,
- })
-
- # Extract title - should be in the flashvars; if not, look elsewhere
- if video_title is None:
- video_title = self._html_extract_title(webpage)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': video_title,
- 'thumbnail': video_thumbnail,
- 'duration': duration,
- 'description': None,
- 'age_limit': 18
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- int_or_none,
- js_to_json,
- parse_filesize,
- str_to_int,
-)
-
-
-class PornComIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339',
- 'md5': '3f30ce76267533cd12ba999263156de7',
- 'info_dict': {
- 'id': '2603339',
- 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
- 'ext': 'mp4',
- 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 551,
- 'view_count': int,
- 'age_limit': 18,
- 'categories': list,
- 'tags': list,
- },
- }, {
- 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
-
- webpage = self._download_webpage(url, display_id)
-
- config = self._parse_json(
- self._search_regex(
- (r'=\s*({.+?})\s*;\s*v1ar\b',
- r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
- webpage, 'config', default='{}'),
- display_id, transform_source=js_to_json, fatal=False)
-
- if config:
- title = config['title']
- formats = [{
- 'url': stream['url'],
- 'format_id': stream.get('id'),
- 'height': int_or_none(self._search_regex(
- r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None))
- } for stream in config['streams'] if stream.get('url')]
- thumbnail = (compat_urlparse.urljoin(
- config['thumbCDN'], config['poster'])
- if config.get('thumbCDN') and config.get('poster') else None)
- duration = int_or_none(config.get('length'))
- else:
- title = self._search_regex(
- (r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'),
- webpage, 'title')
- formats = [{
- 'url': compat_urlparse.urljoin(url, format_url),
- 'format_id': '%sp' % height,
- 'height': int(height),
- 'filesize_approx': parse_filesize(filesize),
- } for format_url, height, filesize in re.findall(
- r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
- webpage)]
- thumbnail = None
- duration = None
-
- view_count = str_to_int(self._search_regex(
- (r'Views:\s*</span>\s*<span>\s*([\d,.]+)',
- r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage,
- 'view count', fatal=False))
-
- def extract_list(kind):
- s = self._search_regex(
- (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(),
- r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()),
- webpage, kind, fatal=False)
- return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- 'age_limit': 18,
- 'categories': extract_list('categories'),
- 'tags': extract_list('tags'),
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- get_element_by_class,
- urljoin,
-)
-
-
-class PornezIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P<id>\w+)|watch)/'
- _TESTS = [{
- 'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
- 'info_dict': {
- 'id': '344819',
- 'ext': 'mp4',
- 'title': 'mistresst funny_penis_names wmv',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- },
- 'params': {'skip_download': 'm3u8'},
- }, {
- 'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/',
- 'info_dict': {
- 'id': '156161',
- 'ext': 'mp4',
- 'title': 'Watch leana lovings stiff for stepdaughter porn video.',
- 'age_limit': 18,
- },
- 'params': {'skip_download': 'm3u8'},
- }, {
- 'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- if not video_id:
- video_id = self._search_regex(
- r'<link[^>]+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id')
-
- iframe_src = self._html_search_regex(r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe')
- iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id)
-
- entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0]
- for fmt in entries['formats']:
- height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height')
- fmt['format_id'] = '%sp' % height
- fmt['height'] = int_or_none(height)
-
- entries.update({
- 'id': video_id,
- 'title': (clean_html(get_element_by_class('video-title', webpage))
- or self._html_search_meta(
- ['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)),
- 'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None),
- 'age_limit': 18,
- })
- return entries
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- js_to_json,
- merge_dicts,
- urljoin,
-)
-
-
-class PornHdIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
- _TESTS = [{
- 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
- 'md5': '87f1540746c1d32ec7a2305c12b96b25',
- 'info_dict': {
- 'id': '9864',
- 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
- 'ext': 'mp4',
- 'title': 'Restroom selfie masturbation',
- 'description': 'md5:3748420395e03e31ac96857a8f125b2b',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'view_count': int,
- 'like_count': int,
- 'age_limit': 18,
- },
- 'skip': 'HTTP Error 404: Not Found',
- }, {
- 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
- 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
- 'info_dict': {
- 'id': '1962',
- 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
- 'ext': 'mp4',
- 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
- 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'view_count': int,
- 'like_count': int,
- 'age_limit': 18,
- },
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id or video_id)
-
- title = self._html_search_regex(
- [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
- r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
-
- sources = self._parse_json(js_to_json(self._search_regex(
- r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
- webpage, 'sources', default='{}')), video_id)
-
- info = {}
- if not sources:
- entries = self._parse_html5_media_entries(url, webpage, video_id)
- if entries:
- info = entries[0]
-
- if not sources and not info:
- message = self._html_search_regex(
- r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
- webpage, 'error message', group='value')
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
-
- formats = []
- for format_id, video_url in sources.items():
- video_url = urljoin(url, video_url)
- if not video_url:
- continue
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]', format_id, 'height', default=None))
- formats.append({
- 'url': video_url,
- 'ext': determine_ext(video_url, 'mp4'),
- 'format_id': format_id,
- 'height': height,
- })
- if formats:
- info['formats'] = formats
-
- description = self._html_search_regex(
- (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
- r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
- webpage, 'description', fatal=False,
- group='value') or self._html_search_meta(
- 'description', webpage, default=None) or self._og_search_description(webpage)
- view_count = int_or_none(self._html_search_regex(
- r'(\d+) views\s*<', webpage, 'view count', fatal=False))
- thumbnail = self._search_regex(
- r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
- 'thumbnail', default=None, group='url')
-
- like_count = int_or_none(self._search_regex(
- (r'(\d+)</span>\s*likes',
- r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
- r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
- webpage, 'like count', fatal=False))
-
- return merge_dicts(info, {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'view_count': view_count,
- 'like_count': like_count,
- 'formats': formats,
- 'age_limit': 18,
- })
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import parse_duration
-
-
-class RadioBremenIE(InfoExtractor):
- _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
- IE_NAME = 'radiobremen'
-
- _TEST = {
- 'url': 'http://www.radiobremen.de/mediathek/?id=141876',
- 'info_dict': {
- 'id': '141876',
- 'ext': 'mp4',
- 'duration': 178,
- 'width': 512,
- 'title': 'Druck auf Patrick Öztürk',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id
- meta_doc = self._download_webpage(
- meta_url, video_id, 'Downloading metadata')
- title = self._html_search_regex(
- r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title')
- description = self._html_search_regex(
- r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False)
- duration = parse_duration(self._html_search_regex(
- r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>',
- meta_doc, 'duration', fatal=False))
-
- page_doc = self._download_webpage(
- url, video_id, 'Downloading video information')
- mobj = re.search(
- r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
- page_doc)
- video_url = (
- "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
- (video_id, video_id, mobj.group("secret"), mobj.group('width')))
-
- formats = [{
- 'url': video_url,
- 'ext': 'mp4',
- 'width': int(mobj.group('width')),
- }]
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'formats': formats,
- 'thumbnail': mobj.group('thumbnail'),
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..networking.exceptions import HTTPError
-from ..utils import ExtractorError, merge_dicts
-
-
-class RecurbateIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://recurbate.com/play.php?video=39161415',
- 'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f',
- 'info_dict': {
- 'id': '39161415',
- 'ext': 'mp4',
- 'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51',
- 'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive – Recurbate',
- 'age_limit': 18,
- },
- 'skip': 'Website require membership.',
- }]
-
- def _real_extract(self, url):
- SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.'
- video_id = self._match_id(url)
- try:
- webpage = self._download_webpage(url, video_id)
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 403:
- self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
- raise
- token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token')
- video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}'
-
- video_webpage = self._download_webpage(video_url, video_id)
- if video_webpage == 'shall_subscribe':
- self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
- entries = self._parse_html5_media_entries(video_url, video_webpage, video_id)
- return merge_dicts({
- 'id': video_id,
- 'title': self._html_extract_title(webpage, 'title'),
- 'description': self._og_search_description(webpage),
- 'age_limit': self._rta_search(webpage),
- }, entries[0])
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_parse_qs
-from ..utils import (
- xpath_text,
- xpath_element,
- int_or_none,
- parse_iso8601,
- ExtractorError,
-)
-
-
-class RICEIE(InfoExtractor):
- _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
- _TEST = {
- 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
- 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
- 'info_dict': {
- 'id': 'YEWIvbhb40aqdjMD1ALSqw',
- 'ext': 'mp4',
- 'title': 'Active Learning in Archeology',
- 'upload_date': '20140616',
- 'timestamp': 1402926346,
- }
- }
- _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
-
- def _real_extract(self, url):
- qs = compat_parse_qs(self._match_valid_url(url).group('query'))
- if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'):
- raise ExtractorError('Invalid URL', expected=True)
-
- portal_id = qs['PortalID'][0]
- playlist_id = qs['DestinationID'][0]
- content_id = qs['ContentID'][0]
-
- content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={
- 'portalId': portal_id,
- 'playlistId': playlist_id,
- 'contentId': content_id
- })
- metadata = xpath_element(content_data, './/metaData', fatal=True)
- title = xpath_text(metadata, 'primaryTitle', fatal=True)
- encodings = xpath_element(content_data, './/encodings', fatal=True)
- player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={
- 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True),
- 'contentId': content_id,
- })
-
- common_fmt = {}
- dimensions = xpath_text(encodings, 'dimensions')
- if dimensions:
- wh = dimensions.split('x')
- if len(wh) == 2:
- common_fmt.update({
- 'width': int_or_none(wh[0]),
- 'height': int_or_none(wh[1]),
- })
-
- formats = []
- rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS))
- if rtsp_path:
- fmt = {
- 'url': rtsp_path,
- 'format_id': 'rtsp',
- }
- fmt.update(common_fmt)
- formats.append(fmt)
- for source in player_data.findall(self._xpath_ns('.//Source', self._NS)):
- video_url = xpath_text(source, self._xpath_ns('File', self._NS))
- if not video_url:
- continue
- if '.m3u8' in video_url:
- formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- fmt = {
- 'url': video_url,
- 'format_id': video_url.split(':')[0],
- }
- fmt.update(common_fmt)
- rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
- if rtmp:
- fmt.update({
- 'url': rtmp.group('url'),
- 'play_path': rtmp.group('playpath'),
- 'app': rtmp.group('app'),
- 'ext': 'flv',
- })
- formats.append(fmt)
-
- thumbnails = []
- for content_asset in content_data.findall('.//contentAssets'):
- asset_type = xpath_text(content_asset, 'type')
- if asset_type == 'image':
- image_url = xpath_text(content_asset, 'httpPath')
- if not image_url:
- continue
- thumbnails.append({
- 'id': xpath_text(content_asset, 'ID'),
- 'url': image_url,
- })
-
- return {
- 'id': content_id,
- 'title': title,
- 'description': xpath_text(metadata, 'abstract'),
- 'duration': int_or_none(xpath_text(metadata, 'duration')),
- 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')),
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
import re
from .common import InfoExtractor
-from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
-from ..compat import (
- compat_b64decode,
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- strip_or_none,
-)
+from ..utils import int_or_none
class RTL2IE(InfoExtractor):
'duration': int_or_none(video_info.get('duration')),
'formats': formats,
}
-
-
-class RTL2YouBaseIE(InfoExtractor):
- _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
-
-
-class RTL2YouIE(RTL2YouBaseIE):
- IE_NAME = 'rtl2:you'
- _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
- 'info_dict': {
- 'id': '15740',
- 'ext': 'mp4',
- 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!',
- 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
- 'age_limit': 12,
- },
- }, {
- 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
- 'only_matching': True,
- }]
- _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
- _GEO_COUNTRIES = ['DE']
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- stream_data = self._download_json(
- self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
-
- data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
- stream_url = unpad_pkcs7(aes_cbc_decrypt_bytes(
- compat_b64decode(data), self._AES_KEY, compat_b64decode(iv)))
- if b'rtl2_you_video_not_found' in stream_url:
- raise ExtractorError('video not found', expected=True)
-
- formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native')
-
- video_data = self._download_json(
- self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
-
- series = video_data.get('formatTitle')
- title = episode = video_data.get('title') or series
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'description': strip_or_none(video_data.get('description')),
- 'thumbnail': video_data.get('image'),
- 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
- 'series': series,
- 'episode': episode,
- 'age_limit': int_or_none(video_data.get('minimumAge')),
- }
-
-
-class RTL2YouSeriesIE(RTL2YouBaseIE):
- IE_NAME = 'rtl2:you:series'
- _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://you.rtl2.de/videos/115/dragon-ball',
- 'info_dict': {
- 'id': '115',
- },
- 'playlist_mincount': 5,
- }
-
- def _real_extract(self, url):
- series_id = self._match_id(url)
- stream_data = self._download_json(
- self._BACKWERK_BASE_URL + 'videos',
- series_id, query={
- 'formatId': series_id,
- 'limit': 1000000000,
- })
-
- entries = []
- for video in stream_data.get('videos', []):
- video_id = compat_str(video['videoId'])
- if not video_id:
- continue
- entries.append(self.url_result(
- 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
- 'RTL2You', video_id))
- return self.playlist_result(entries, series_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class RTVNHIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.rtvnh.nl/video/131946',
- 'md5': 'cdbec9f44550763c8afc96050fa747dc',
- 'info_dict': {
- 'id': '131946',
- 'ext': 'mp4',
- 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
- 'thumbnail': r're:^https?:.*\.jpg$'
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- meta = self._parse_json(self._download_webpage(
- 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)
-
- status = meta.get('status')
- if status != 200:
- raise ExtractorError(
- '%s returned error code %d' % (self.IE_NAME, status), expected=True)
-
- formats = []
- rtmp_formats = self._extract_smil_formats(
- 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
- formats.extend(rtmp_formats)
-
- for rtmp_format in rtmp_formats:
- rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
- rtsp_format = rtmp_format.copy()
- del rtsp_format['play_path']
- del rtsp_format['ext']
- rtsp_format.update({
- 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
- 'url': rtmp_url.replace('rtmp://', 'rtsp://'),
- 'protocol': 'rtsp',
- })
- formats.append(rtsp_format)
- http_base_url = rtmp_url.replace('rtmp://', 'http://')
- formats.extend(self._extract_m3u8_formats(
- http_base_url + '/playlist.m3u8', video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- http_base_url + '/manifest.f4m',
- video_id, f4m_id='hds', fatal=False))
-
- return {
- 'id': video_id,
- 'title': meta['title'].strip(),
- 'thumbnail': meta.get('image'),
- 'formats': formats
- }
+++ /dev/null
-from .common import InfoExtractor
-
-
-class RUHDIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.ruhd.ru/play.php?vid=207',
- 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
- 'info_dict': {
- 'id': '207',
- 'ext': 'divx',
- 'title': 'КОТ бааааам',
- 'description': 'классный кот)',
- 'thumbnail': r're:^http://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._html_search_regex(
- r'<param name="src" value="([^"]+)"', webpage, 'video url')
- title = self._html_search_regex(
- r'<title>([^<]+) RUHD\.ru - Видео Высокого качества №1 в России!</title>',
- webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
- webpage, 'description', fatal=False)
- thumbnail = self._html_search_regex(
- r'<param name="previewImage" value="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
- if thumbnail:
- thumbnail = 'http://www.ruhd.ru' + thumbnail
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- }
class SCTEIE(SCTEBaseIE):
+ _WORKING = False
_VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
class SCTECourseIE(SCTEBaseIE):
+ _WORKING = False
_VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
+++ /dev/null
-import urllib.parse
-
-from .common import InfoExtractor
-from ..compat import compat_b64decode
-from ..utils import (
- KNOWN_EXTENSIONS,
- ExtractorError,
- determine_ext,
- int_or_none,
- js_to_json,
- parse_filesize,
- rot47,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class SharedBaseIE(InfoExtractor):
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- if self._FILE_NOT_FOUND in webpage:
- raise ExtractorError(
- 'Video %s does not exist' % video_id, expected=True)
-
- video_url = self._extract_video_url(webpage, video_id, url)
-
- title = self._extract_title(webpage)
- filesize = int_or_none(self._extract_filesize(webpage))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'filesize': filesize,
- 'title': title,
- }
-
- def _extract_title(self, webpage):
- return compat_b64decode(self._html_search_meta(
- 'full:title', webpage, 'title')).decode('utf-8')
-
- def _extract_filesize(self, webpage):
- return self._html_search_meta(
- 'full:size', webpage, 'file size', fatal=False)
-
-
-class SharedIE(SharedBaseIE):
- IE_DESC = 'shared.sx'
- _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
- _FILE_NOT_FOUND = '>File does not exist<'
-
- _TEST = {
- 'url': 'http://shared.sx/0060718775',
- 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
- 'info_dict': {
- 'id': '0060718775',
- 'ext': 'mp4',
- 'title': 'Bmp4',
- 'filesize': 1720110,
- },
- }
-
- def _extract_video_url(self, webpage, video_id, url):
- download_form = self._hidden_inputs(webpage)
-
- video_page = self._download_webpage(
- url, video_id, 'Downloading video page',
- data=urlencode_postdata(download_form),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': url,
- })
-
- video_url = self._html_search_regex(
- r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- video_page, 'video URL', group='url')
-
- return video_url
-
-
-class VivoIE(SharedBaseIE):
- IE_DESC = 'vivo.sx'
- _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
- _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
-
- _TESTS = [{
- 'url': 'http://vivo.sx/d7ddda0e78',
- 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
- 'info_dict': {
- 'id': 'd7ddda0e78',
- 'ext': 'mp4',
- 'title': 'Chicken',
- 'filesize': 515659,
- },
- }, {
- 'url': 'http://vivo.st/d7ddda0e78',
- 'only_matching': True,
- }]
-
- def _extract_title(self, webpage):
- title = self._html_search_regex(
- r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
- 'title', default=None, group='title')
- if title:
- ext = determine_ext(title)
- if ext.lower() in KNOWN_EXTENSIONS:
- title = title.rpartition('.' + ext)[0]
- return title
- return self._og_search_title(webpage)
-
- def _extract_filesize(self, webpage):
- return parse_filesize(self._search_regex(
- r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
- webpage, 'filesize', fatal=False))
-
- def _extract_video_url(self, webpage, video_id, url):
- def decode_url_old(encoded_url):
- return compat_b64decode(encoded_url).decode('utf-8')
-
- stream_url = self._search_regex(
- r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'stream url', default=None, group='url')
- if stream_url:
- stream_url = url_or_none(decode_url_old(stream_url))
- if stream_url:
- return stream_url
-
- def decode_url(encoded_url):
- return rot47(urllib.parse.unquote_plus(encoded_url))
-
- return decode_url(self._parse_json(
- self._search_regex(
- r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage,
- 'stream'),
- video_id, transform_source=js_to_json)['source'])
from .common import InfoExtractor
from ..utils import (
extract_attributes,
- smuggle_url,
strip_or_none,
- urljoin,
)
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
_SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'
- def _process_ooyala_element(self, webpage, sdc_el, url):
+ def _process_video_element(self, webpage, sdc_el, url):
sdc = extract_attributes(sdc_el)
provider = sdc.get('data-provider')
- if provider == 'ooyala':
- video_id = sdc['data-sdc-video-id']
- video_url = 'ooyala:%s' % video_id
- ie_key = 'Ooyala'
- ooyala_el = self._search_regex(
- r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id,
- webpage, 'video data', fatal=False)
- if ooyala_el:
- ooyala_attrs = extract_attributes(ooyala_el) or {}
- if ooyala_attrs.get('data-token-required') == 'true':
- token_fetch_url = (self._parse_json(ooyala_attrs.get(
- 'data-token-fetch-options', '{}'),
- video_id, fatal=False) or {}).get('url')
- if token_fetch_url:
- embed_token = self._download_json(urljoin(
- url, token_fetch_url), video_id, fatal=False)
- if embed_token:
- video_url = smuggle_url(
- video_url, {'embed_token': embed_token})
- elif provider == 'brightcove':
+ if provider == 'brightcove':
video_id = sdc['data-video-id']
account_id = sdc.get('data-account-id') or '6058004172001'
player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- info = self._process_ooyala_element(webpage, self._search_regex(
+ info = self._process_video_element(webpage, self._search_regex(
self._SDC_EL_REGEX, webpage, 'sdc element'), url)
info.update({
'title': self._og_search_title(webpage),
'title': 'Bale: It\'s our time to shine',
'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
},
- 'add_ie': ['Ooyala'],
+ 'add_ie': ['BrightcoveNew'],
}, {
'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook',
'only_matching': True,
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
- entries = [self._process_ooyala_element(webpage, sdc_el, url)
+ entries = [self._process_video_element(webpage, sdc_el, url)
for sdc_el in re.findall(self._SDC_EL_REGEX, webpage)]
return self.playlist_result(
entries = []
for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
- entries.append(self._process_ooyala_element(webpage, sdc_el, url))
+ entries.append(self._process_video_element(webpage, sdc_el, url))
return self.playlist_result(
entries, article_id, self._og_search_title(webpage),
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
- merge_dicts,
- str_or_none,
- str_to_int,
- url_or_none,
-)
-
-
-class SpankwireIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?spankwire\.com/
- (?:
- [^/]+/video|
- EmbedPlayer\.aspx/?\?.*?\bArticleId=
- )
- (?P<id>\d+)
- '''
- _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)']
- _TESTS = [{
- # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
- 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
- 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd',
- 'info_dict': {
- 'id': '103545',
- 'ext': 'mp4',
- 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
- 'description': 'Crazy Bitch X rated music video.',
- 'duration': 222,
- 'uploader': 'oreusz',
- 'uploader_id': '124697',
- 'timestamp': 1178587885,
- 'upload_date': '20070508',
- 'average_rating': float,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'categories': list,
- 'tags': list,
- },
- }, {
- # download URL pattern: */mp4_<format_id>_<video_id>.mp4
- 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
- 'md5': '09b3c20833308b736ae8902db2f8d7e6',
- 'info_dict': {
- 'id': '1921551',
- 'ext': 'mp4',
- 'title': 'Titcums Compiloation I',
- 'description': 'cum on tits',
- 'uploader': 'dannyh78999',
- 'uploader_id': '3056053',
- 'upload_date': '20150822',
- 'age_limit': 18,
- },
- 'params': {
- 'proxy': '127.0.0.1:8118'
- },
- 'skip': 'removed',
- }, {
- 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id)
-
- title = video['title']
-
- formats = []
- videos = video.get('videos')
- if isinstance(videos, dict):
- for format_id, format_url in videos.items():
- video_url = url_or_none(format_url)
- if not format_url:
- continue
- height = int_or_none(self._search_regex(
- r'(\d+)[pP]', format_id, 'height', default=None))
- m = re.search(
- r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url)
- if m:
- tbr = int(m.group('tbr'))
- height = height or int(m.group('height'))
- else:
- tbr = None
- formats.append({
- 'url': video_url,
- 'format_id': '%dp' % height if height else format_id,
- 'height': height,
- 'tbr': tbr,
- })
- m3u8_url = url_or_none(video.get('HLS'))
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
-
- view_count = str_to_int(video.get('viewed'))
-
- thumbnails = []
- for preference, t in enumerate(('', '2x'), start=0):
- thumbnail_url = url_or_none(video.get('poster%s' % t))
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'preference': preference,
- })
-
- def extract_names(key):
- entries_list = video.get(key)
- if not isinstance(entries_list, list):
- return
- entries = []
- for entry in entries_list:
- name = str_or_none(entry.get('name'))
- if name:
- entries.append(name)
- return entries
-
- categories = extract_names('categories')
- tags = extract_names('tags')
-
- uploader = None
- info = {}
-
- webpage = self._download_webpage(
- 'https://www.spankwire.com/_/video%s/' % video_id, video_id,
- fatal=False)
- if webpage:
- info = self._search_json_ld(webpage, video_id, default={})
- thumbnail_url = None
- if 'thumbnail' in info:
- thumbnail_url = url_or_none(info['thumbnail'])
- del info['thumbnail']
- if not thumbnail_url:
- thumbnail_url = self._og_search_thumbnail(webpage)
- if thumbnail_url:
- thumbnails.append({
- 'url': thumbnail_url,
- 'preference': 10,
- })
- uploader = self._html_search_regex(
- r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>',
- webpage, 'uploader', fatal=False)
- if not view_count:
- view_count = str_to_int(self._search_regex(
- r'data-views=["\']([\d,.]+)', webpage, 'view count',
- fatal=False))
-
- return merge_dicts({
- 'id': video_id,
- 'title': title,
- 'description': video.get('description'),
- 'duration': int_or_none(video.get('duration')),
- 'thumbnails': thumbnails,
- 'uploader': uploader,
- 'uploader_id': str_or_none(video.get('userId')),
- 'timestamp': int_or_none(video.get('time_approved_on')),
- 'average_rating': float_or_none(video.get('rating')),
- 'view_count': view_count,
- 'comment_count': int_or_none(video.get('comments')),
- 'age_limit': 18,
- 'categories': categories,
- 'tags': tags,
- 'formats': formats,
- }, info)
class SRMediathekIE(ARDMediathekBaseIE):
+ _WORKING = False
IE_NAME = 'sr:mediathek'
IE_DESC = 'Saarländischer Rundfunk'
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- urlencode_postdata,
-)
-
-
-class StreamcloudIE(InfoExtractor):
- IE_NAME = 'streamcloud.eu'
- _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
-
- _TESTS = [{
- 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
- 'md5': '6bea4c7fa5daaacc2a946b7146286686',
- 'info_dict': {
- 'id': 'skp9j99s4bpz',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video \'/\\ ä ↭',
- },
- 'skip': 'Only available from the EU'
- }, {
- 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = 'http://streamcloud.eu/%s' % video_id
-
- orig_webpage = self._download_webpage(url, video_id)
-
- if '>File Not Found<' in orig_webpage:
- raise ExtractorError(
- 'Video %s does not exist' % video_id, expected=True)
-
- fields = re.findall(r'''(?x)<input\s+
- type="(?:hidden|submit)"\s+
- name="([^"]+)"\s+
- (?:id="[^"]+"\s+)?
- value="([^"]*)"
- ''', orig_webpage)
-
- self._sleep(6, video_id)
-
- webpage = self._download_webpage(
- url, video_id, data=urlencode_postdata(fields), headers={
- b'Content-Type': b'application/x-www-form-urlencoded',
- })
-
- try:
- title = self._html_search_regex(
- r'<h1[^>]*>([^<]+)<', webpage, 'title')
- video_url = self._search_regex(
- r'file:\s*"([^"]+)"', webpage, 'video URL')
- except ExtractorError:
- message = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
- webpage, 'message', default=None, group='message')
- if message:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
- raise
- thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'http_headers': {
- 'Referer': url,
- },
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- int_or_none,
- determine_protocol,
-)
-
-
-class SWRMediathekIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
-
- _TESTS = [{
- 'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
- 'md5': '8c5f6f0172753368547ca8413a7768ac',
- 'info_dict': {
- 'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
- 'ext': 'mp4',
- 'title': 'SWR odysso',
- 'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
- 'thumbnail': r're:^http:.*\.jpg$',
- 'duration': 2602,
- 'upload_date': '20140515',
- 'uploader': 'SWR Fernsehen',
- 'uploader_id': '990030',
- },
- }, {
- 'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
- 'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
- 'info_dict': {
- 'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
- 'ext': 'mp4',
- 'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
- 'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
- 'thumbnail': r're:http://.*\.jpg',
- 'duration': 5305,
- 'upload_date': '20140516',
- 'uploader': 'SWR Fernsehen',
- 'uploader_id': '990030',
- },
- 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink',
- }, {
- 'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
- 'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
- 'info_dict': {
- 'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
- 'ext': 'mp3',
- 'title': 'Saša Stanišic: Vor dem Fest',
- 'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
- 'thumbnail': r're:http://.*\.jpg',
- 'duration': 3366,
- 'upload_date': '20140520',
- 'uploader': 'SWR 2',
- 'uploader_id': '284670',
- },
- 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink',
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id,
- video_id, 'Downloading video JSON')
-
- attr = video['attr']
- title = attr['entry_title']
- media_type = attr.get('entry_etype')
-
- formats = []
- for entry in video.get('sub', []):
- if entry.get('name') != 'entry_media':
- continue
-
- entry_attr = entry.get('attr', {})
- f_url = entry_attr.get('val2')
- if not f_url:
- continue
- codec = entry_attr.get('val0')
- if codec == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- f_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif codec == 'f4m':
- formats.extend(self._extract_f4m_formats(
- f_url + '?hdcore=3.7.0', video_id,
- f4m_id='hds', fatal=False))
- else:
- formats.append({
- 'format_id': determine_protocol({'url': f_url}),
- 'url': f_url,
- 'quality': int_or_none(entry_attr.get('val1')),
- 'vcodec': codec if media_type == 'Video' else 'none',
- 'acodec': codec if media_type == 'Audio' else None,
- })
-
- upload_date = None
- entry_pdatet = attr.get('entry_pdatet')
- if entry_pdatet:
- upload_date = entry_pdatet[:-4]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': attr.get('entry_descl'),
- 'thumbnail': attr.get('entry_image_16_9'),
- 'duration': parse_duration(attr.get('entry_durat')),
- 'upload_date': upload_date,
- 'uploader': attr.get('channel_title'),
- 'uploader_id': attr.get('channel_idkey'),
- 'formats': formats,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- get_element_by_attribute,
- clean_html,
-)
-
-
-class TechTalksIE(InfoExtractor):
- _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
- 'info_dict': {
- 'id': '57758',
- 'title': 'Learning Topic Models --- Going beyond SVD',
- },
- 'playlist': [
- {
- 'info_dict': {
- 'id': '57758',
- 'ext': 'flv',
- 'title': 'Learning Topic Models --- Going beyond SVD',
- },
- },
- {
- 'info_dict': {
- 'id': '57758-slides',
- 'ext': 'flv',
- 'title': 'Learning Topic Models --- Going beyond SVD',
- },
- },
- ],
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://techtalks.tv/talks/57758',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- talk_id = mobj.group('id')
- webpage = self._download_webpage(url, talk_id)
- rtmp_url = self._search_regex(
- r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
- play_path = self._search_regex(
- r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
- webpage, 'presenter play path')
- title = clean_html(get_element_by_attribute('class', 'title', webpage))
- video_info = {
- 'id': talk_id,
- 'title': title,
- 'url': rtmp_url,
- 'play_path': play_path,
- 'ext': 'flv',
- }
- m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
- if m_slides is None:
- return video_info
- else:
- return {
- '_type': 'playlist',
- 'id': talk_id,
- 'title': title,
- 'entries': [
- video_info,
- # The slides video
- {
- 'id': talk_id + '-slides',
- 'title': title,
- 'url': rtmp_url,
- 'play_path': m_slides.group(1),
- 'ext': 'flv',
- },
- ],
- }
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
'only_matching': True,
}, {
- # ooyala video
'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
'only_matching': True,
}]
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class TinyPicIE(InfoExtractor):
- IE_NAME = 'tinypic'
- IE_DESC = 'tinypic.com videos'
- _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
-
- _TESTS = [
- {
- 'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
- 'md5': '609b74432465364e72727ebc6203f044',
- 'info_dict': {
- 'id': '6xw7tc',
- 'ext': 'flv',
- 'title': 'shadow phenomenon weird',
- },
- },
- {
- 'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id, 'Downloading page')
-
- mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
- r'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
- if mobj is None:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- file_id = mobj.group('fileid')
- server_id = mobj.group('serverid')
-
- KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
- keywords = self._html_search_meta('keywords', webpage, 'title')
- title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
-
- video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
- thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
-
- return {
- 'id': file_id,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'title': title
- }
+++ /dev/null
-import functools
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- get_element_by_class,
- parse_count,
- remove_end,
- unified_strdate,
- js_to_json,
- OnDemandPagedList,
-)
-
-
-class TokentubeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tokentube\.net/(?:view\?[vl]=|[vl]/)(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://tokentube.net/l/3236632011/Praise-A-Thon-Pastori-Chrisin-ja-Pastori-Bennyn-kanssa-27-8-2021',
- 'info_dict': {
- 'id': '3236632011',
- 'ext': 'mp4',
- 'title': 'Praise-A-Thon Pastori Chrisin ja Pastori Bennyn kanssa 27.8.2021',
- 'description': '',
- 'uploader': 'Pastori Chris - Rapsodia.fi',
- 'upload_date': '20210827',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://tokentube.net/v/3950239124/Linux-Ubuntu-Studio-perus-k%C3%A4ytt%C3%B6',
- 'md5': '0e1f00421f501f5eada9890d38fcfb56',
- 'info_dict': {
- 'id': '3950239124',
- 'ext': 'mp4',
- 'title': 'Linux Ubuntu Studio perus käyttö',
- 'description': 'md5:46077d0daaba1974f2dc381257f9d64c',
- 'uploader': 'jyrilehtonen',
- 'upload_date': '20210825',
- },
- }, {
- 'url': 'https://tokentube.net/view?v=3582463289',
- 'info_dict': {
- 'id': '3582463289',
- 'ext': 'mp4',
- 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??',
- 'description': 'md5:37ebf1cb44264e0bf23ed98b337ee63e',
- 'uploader': 'Voitontie',
- 'upload_date': '20210428',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(r'<h1\s*class=["\']title-text["\']>(.+?)</h1>', webpage, 'title')
-
- data_json = self._html_search_regex(r'({["\']html5["\'].+?}}}+)', webpage, 'data json')
- data_json = self._parse_json(js_to_json(data_json), video_id, fatal=False)
-
- sources = data_json.get('sources') or self._parse_json(
- self._html_search_regex(r'updateSrc\(([^\)]+)\)', webpage, 'sources'),
- video_id, transform_source=js_to_json)
-
- formats = [{
- 'url': format.get('src'),
- 'format_id': format.get('label'),
- 'height': format.get('res'),
- } for format in sources]
-
- view_count = parse_count(self._html_search_regex(
- r'<p\s*class=["\']views_counter["\']>\s*([\d\.,]+)\s*<span>views?</span></p>',
- webpage, 'view_count', fatal=False))
-
- like_count = parse_count(self._html_search_regex(
- r'<div\s*class="sh_button\s*likes_count">\s*(\d+)\s*</div>',
- webpage, 'like count', fatal=False))
-
- dislike_count = parse_count(self._html_search_regex(
- r'<div\s*class="sh_button\s*dislikes_count">\s*(\d+)\s*</div>',
- webpage, 'dislike count', fatal=False))
-
- upload_date = unified_strdate(self._html_search_regex(
- r'<span\s*class="p-date">Published\s*on\s+([^<]+)',
- webpage, 'upload date', fatal=False))
-
- uploader = self._html_search_regex(
- r'<a\s*class="place-left"[^>]+>(.+?)</a>',
- webpage, 'uploader', fatal=False)
-
- description = (clean_html(get_element_by_class('p-d-txt', webpage))
- or self._html_search_meta(('og:description', 'description', 'twitter:description'), webpage))
-
- description = remove_end(description, 'Category')
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'upload_date': upload_date,
- 'description': description,
- 'uploader': uploader,
- }
-
-
-class TokentubeChannelIE(InfoExtractor):
- _PAGE_SIZE = 20
- IE_NAME = 'Tokentube:channel'
- _VALID_URL = r'https?://(?:www\.)?tokentube\.net/channel/(?P<id>\d+)/[^/]+(?:/videos)?'
- _TESTS = [{
- 'url': 'https://tokentube.net/channel/3697658904/TokenTube',
- 'info_dict': {
- 'id': '3697658904',
- },
- 'playlist_mincount': 7,
- }, {
- 'url': 'https://tokentube.net/channel/3353234420/Linux/videos',
- 'info_dict': {
- 'id': '3353234420',
- },
- 'playlist_mincount': 20,
- }, {
- 'url': 'https://tokentube.net/channel/3475834195/Voitontie',
- 'info_dict': {
- 'id': '3475834195',
- },
- 'playlist_mincount': 150,
- }]
-
- def _fetch_page(self, channel_id, page):
- page += 1
- videos_info = self._download_webpage(
- f'https://tokentube.net/videos?p=0&m=1&sort=recent&u={channel_id}&page={page}',
- channel_id, headers={'X-Requested-With': 'XMLHttpRequest'},
- note=f'Downloading page {page}', fatal=False)
- if '</i> Sorry, no results were found.' not in videos_info:
- for path, media_id in re.findall(
- r'<a[^>]+\bhref=["\']([^"\']+/[lv]/(\d+)/\S+)["\'][^>]+>',
- videos_info):
- yield self.url_result(path, ie=TokentubeIE.ie_key(), video_id=media_id)
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- entries = OnDemandPagedList(functools.partial(
- self._fetch_page, channel_id), self._PAGE_SIZE)
-
- return self.playlist_result(entries, channel_id)
class ToypicsIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'Toypics video'
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
_TEST = {
class ToypicsUserIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'Toypics user profile'
_VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
_TEST = {
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_iso8601,
-)
-
-
-class TriluliluIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P<id>[^/#\?]+)'
- _TESTS = [{
- 'url': 'http://www.trilulilu.ro/big-buck-bunny-1',
- 'md5': '68da087b676a6196a413549212f60cc6',
- 'info_dict': {
- 'id': 'ae2899e124140b',
- 'ext': 'mp4',
- 'title': 'Big Buck Bunny',
- 'description': ':) pentru copilul din noi',
- 'uploader_id': 'chipy',
- 'upload_date': '20120304',
- 'timestamp': 1330830647,
- 'uploader': 'chipy',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- }, {
- 'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta',
- 'md5': '929dfb8729dc71750463af88bbbbf4a4',
- 'info_dict': {
- 'id': 'f299710e3c91c5',
- 'ext': 'mp4',
- 'title': 'Adena ft. Morreti - Inocenta',
- 'description': 'pop music',
- 'uploader_id': 'VEVOmixt',
- 'upload_date': '20151204',
- 'uploader': 'VEVOmixt',
- 'timestamp': 1449187937,
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id)
-
- age_limit = 0
- errors = media_info.get('errors', {})
- if errors.get('friends'):
- raise ExtractorError('This video is private.', expected=True)
- elif errors.get('geoblock'):
- raise ExtractorError('This video is not available in your country.', expected=True)
- elif errors.get('xxx_unlogged'):
- age_limit = 18
-
- media_class = media_info.get('class')
- if media_class not in ('video', 'audio'):
- raise ExtractorError('not a video or an audio')
-
- user = media_info.get('user', {})
-
- thumbnail = media_info.get('cover_url')
- if thumbnail:
- thumbnail.format(width='1600', height='1200')
-
- # TODO: get correct ext for audio files
- stream_type = media_info.get('stream_type')
- formats = [{
- 'url': media_info['href'],
- 'ext': stream_type,
- }]
- if media_info.get('is_hd'):
- formats.append({
- 'format_id': 'hd',
- 'url': media_info['hrefhd'],
- 'ext': stream_type,
- })
- if media_class == 'audio':
- formats[0]['vcodec'] = 'none'
- else:
- formats[0]['format_id'] = 'sd'
-
- return {
- 'id': media_info['identifier'].split('|')[1],
- 'display_id': display_id,
- 'formats': formats,
- 'title': media_info['title'],
- 'description': media_info.get('description'),
- 'thumbnail': thumbnail,
- 'uploader_id': user.get('username'),
- 'uploader': user.get('fullname'),
- 'timestamp': parse_iso8601(media_info.get('published'), ' '),
- 'duration': int_or_none(media_info.get('duration')),
- 'view_count': int_or_none(media_info.get('count_views')),
- 'like_count': int_or_none(media_info.get('count_likes')),
- 'comment_count': int_or_none(media_info.get('count_comments')),
- 'age_limit': age_limit,
- }
import re
+from .common import InfoExtractor
+from ..aes import aes_decrypt_text
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
+ determine_ext,
+ format_field,
int_or_none,
str_to_int,
+ strip_or_none,
+ url_or_none,
)
-from .keezmovies import KeezMoviesIE
-class Tube8IE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
+class Tube8IE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)']
_TESTS = [{
'only_matching': True,
}]
+ def _extract_info(self, url, fatal=True):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = (mobj.group('display_id')
+ if 'display_id' in mobj.groupdict()
+ else None) or mobj.group('id')
+
+ webpage = self._download_webpage(
+ url, display_id, headers={'Cookie': 'age_verified=1'})
+
+ formats = []
+ format_urls = set()
+
+ title = None
+ thumbnail = None
+ duration = None
+ encrypted = False
+
+ def extract_format(format_url, height=None):
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//')):
+ return
+ if format_url in format_urls:
+ return
+ format_urls.add(format_url)
+ tbr = int_or_none(self._search_regex(
+ r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
+ if not height:
+ height = int_or_none(self._search_regex(
+ r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
+ if encrypted:
+ format_url = aes_decrypt_text(
+ video_url, title, 32).decode('utf-8')
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_field(height, None, '%dp'),
+ 'height': height,
+ 'tbr': tbr,
+ })
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'flashvars\s*=\s*({.+?});', webpage,
+ 'flashvars', default='{}'),
+ display_id, fatal=False)
+
+ if flashvars:
+ title = flashvars.get('video_title')
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ encrypted = flashvars.get('encrypted') is True
+ for key, value in flashvars.items():
+ mobj = re.search(r'quality_(\d+)[pP]', key)
+ if mobj:
+ extract_format(value, int(mobj.group(1)))
+ video_url = flashvars.get('video_url')
+ if video_url and determine_ext(video_url, None):
+ extract_format(video_url)
+
+ video_url = self._html_search_regex(
+ r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
+ webpage, 'video url', default=None, group='url')
+ if video_url:
+ extract_format(compat_urllib_parse_unquote(video_url))
+
+ if not formats:
+ if 'title="This video is no longer available"' in webpage:
+ self.raise_no_formats(
+ 'Video %s is no longer available' % video_id, expected=True)
+
+ if not title:
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title')
+
+ return webpage, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': strip_or_none(title),
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
def _real_extract(self, url):
webpage, info = self._extract_info(url)
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- try_get,
- unified_timestamp,
-)
-
-
-class TunePkIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)|
- embed\.tune\.pk/play/
- )
- (?P<id>\d+)
- '''
- _TESTS = [{
- 'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins',
- 'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55',
- 'info_dict': {
- 'id': '6919541',
- 'ext': 'mp4',
- 'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins',
- 'description': 'md5:eb5a04114fafef5cec90799a93a2d09c',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1487327564,
- 'upload_date': '20170217',
- 'uploader': 'Movie Trailers',
- 'duration': 107,
- 'view_count': int,
- }
- }, {
- 'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no',
- 'only_matching': True,
- }, {
- 'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://tune.pk/video/%s' % video_id, video_id)
-
- details = self._parse_json(
- self._search_regex(
- r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'),
- video_id)['details']
-
- video = details['video']
- title = video.get('title') or self._og_search_title(
- webpage, default=None) or self._html_search_meta(
- 'title', webpage, 'title', fatal=True)
-
- formats = self._parse_jwplayer_formats(
- details['player']['sources'], video_id)
-
- description = self._og_search_description(
- webpage, default=None) or self._html_search_meta(
- 'description', webpage, 'description')
-
- thumbnail = video.get('thumb') or self._og_search_thumbnail(
- webpage, default=None) or self._html_search_meta(
- 'thumbnail', webpage, 'thumbnail')
-
- timestamp = unified_timestamp(video.get('date_added'))
- uploader = try_get(
- video, lambda x: x['uploader']['name'],
- compat_str) or self._html_search_meta('author', webpage, 'author')
-
- duration = int_or_none(video.get('duration'))
- view_count = int_or_none(video.get('views'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unescapeHTML,
- url_or_none,
-)
-
-
-class TVNetIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
- _TESTS = [{
- # video
- 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
- 'md5': 'b4d7abe0252c9b47774760b7519c7558',
- 'info_dict': {
- 'id': '109788',
- 'ext': 'mp4',
- 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang',
- 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
- 'is_live': False,
- 'view_count': int,
- },
- }, {
- # audio
- 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi',
- 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae',
- 'info_dict': {
- 'id': '27017',
- 'ext': 'm4a',
- 'title': 'VOV1 - Bản tin chiều (10/06/2018)',
- 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
- 'is_live': False,
- },
- }, {
- 'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
- 'info_dict': {
- 'id': '129999',
- 'ext': 'mp4',
- 'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
- 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
- 'is_live': False,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # live stream
- 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
- 'info_dict': {
- 'id': '1011',
- 'ext': 'mp4',
- 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # radio live stream
- 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014',
- 'info_dict': {
- 'id': '1014',
- 'ext': 'm4a',
- 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._og_search_title(
- webpage, default=None) or self._html_search_meta(
- 'title', webpage, default=None) or self._search_regex(
- r'<title>([^<]+)<', webpage, 'title')
- title = re.sub(r'\s*-\s*TV Net\s*$', '', title)
-
- if '/video/' in url or '/radio/' in url:
- is_live = False
- elif '/kenh-truyen-hinh/' in url:
- is_live = True
- else:
- is_live = None
-
- data_file = unescapeHTML(self._search_regex(
- r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
- 'data file', group='url'))
-
- stream_urls = set()
- formats = []
- for stream in self._download_json(data_file, video_id):
- if not isinstance(stream, dict):
- continue
- stream_url = url_or_none(stream.get('url'))
- if stream_url in stream_urls or not stream_url:
- continue
- stream_urls.add(stream_url)
- formats.extend(self._extract_m3u8_formats(
- stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
-
- # better support for radio streams
- if title.startswith('VOV'):
- for f in formats:
- f.update({
- 'ext': 'm4a',
- 'vcodec': 'none',
- })
-
- thumbnail = self._og_search_thumbnail(
- webpage, default=None) or unescapeHTML(
- self._search_regex(
- r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
- 'thumbnail', default=None, group='url'))
-
- view_count = int_or_none(self._search_regex(
- r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>',
- webpage, 'view count', default=None))
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'is_live': is_live,
- 'view_count': view_count,
- 'formats': formats,
- }
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- get_element_by_id,
- int_or_none,
- parse_iso8601,
- parse_duration,
- str_or_none,
- try_get,
- update_url_query,
- urljoin,
-)
-
-
-class TVNowBaseIE(InfoExtractor):
- _VIDEO_FIELDS = (
- 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
- 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
- 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
- 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
-
- def _call_api(self, path, video_id, query):
- return self._download_json(
- 'https://api.tvnow.de/v3/' + path, video_id, query=query)
-
- def _extract_video(self, info, display_id):
- video_id = compat_str(info['id'])
- title = info['title']
-
- paths = []
- for manifest_url in (info.get('manifest') or {}).values():
- if not manifest_url:
- continue
- manifest_url = update_url_query(manifest_url, {'filter': ''})
- path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
- if path in paths:
- continue
- paths.append(path)
-
- def url_repl(proto, suffix):
- return re.sub(
- r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
- r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
- '.ism/' + suffix, manifest_url))
-
- def make_urls(proto, suffix):
- urls = [url_repl(proto, suffix)]
- hd_url = urls[0].replace('/manifest/', '/ngvod/')
- if hd_url != urls[0]:
- urls.append(hd_url)
- return urls
-
- for man_url in make_urls('dash', '.mpd'):
- formats = self._extract_mpd_formats(
- man_url, video_id, mpd_id='dash', fatal=False)
- for man_url in make_urls('hss', 'Manifest'):
- formats.extend(self._extract_ism_formats(
- man_url, video_id, ism_id='mss', fatal=False))
- for man_url in make_urls('hls', '.m3u8'):
- formats.extend(self._extract_m3u8_formats(
- man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
- fatal=False))
- if formats:
- break
- else:
- if not self.get_param('allow_unplayable_formats') and info.get('isDrm'):
- raise ExtractorError(
- 'Video %s is DRM protected' % video_id, expected=True)
- if info.get('geoblocked'):
- raise self.raise_geo_restricted()
- if not info.get('free', True):
- raise ExtractorError(
- 'Video %s is not available for free' % video_id, expected=True)
-
- description = info.get('articleLong') or info.get('articleShort')
- timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
- duration = parse_duration(info.get('duration'))
-
- f = info.get('format', {})
-
- thumbnails = [{
- 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
- }]
- thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
- if thumbnail:
- thumbnails.append({
- 'url': thumbnail,
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'duration': duration,
- 'series': f.get('title'),
- 'season_number': int_or_none(info.get('season')),
- 'episode_number': int_or_none(info.get('episode')),
- 'episode': title,
- 'formats': formats,
- }
-
-
-class TVNowIE(TVNowBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
- (?P<show_id>[^/]+)/
- (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
- '''
-
- @classmethod
- def suitable(cls, url):
- return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
- else super(TVNowIE, cls).suitable(url))
-
- _TESTS = [{
- 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
- 'info_dict': {
- 'id': '331082',
- 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
- 'ext': 'mp4',
- 'title': 'Der neue Porsche 911 GT 3',
- 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
- 'timestamp': 1495994400,
- 'upload_date': '20170528',
- 'duration': 5283,
- 'series': 'GRIP - Das Motormagazin',
- 'season_number': 14,
- 'episode_number': 405,
- 'episode': 'Der neue Porsche 911 GT 3',
- },
- }, {
- # rtl2
- 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
- 'only_matching': True,
- }, {
- # rtlnitro
- 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
- 'only_matching': True,
- }, {
- # superrtl
- 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
- 'only_matching': True,
- }, {
- # ntv
- 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
- 'only_matching': True,
- }, {
- # vox
- 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
- 'only_matching': True,
- }, {
- # rtlplus
- 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- display_id = '%s/%s' % mobj.group(2, 3)
-
- info = self._call_api(
- 'movies/' + display_id, display_id, query={
- 'fields': ','.join(self._VIDEO_FIELDS),
- })
-
- return self._extract_video(info, display_id)
-
-
-class TVNowNewIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?P<base_url>https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/
- (?:shows|serien))/
- (?P<show>[^/]+)-\d+/
- [^/]+/
- episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
- '''
-
- _TESTS = [{
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
- show, episode = mobj.group('show', 'episode')
- return self.url_result(
- # Rewrite new URLs to the old format and use extraction via old API
- # at api.tvnow.de as a loophole for bypassing premium content checks
- '%s/%s/%s' % (base_url, show, episode),
- ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
-
-
-class TVNowFilmIE(TVNowBaseIE):
- _VALID_URL = r'''(?x)
- (?P<base_url>https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/
- (?:filme))/
- (?P<title>[^/?$&]+)-(?P<id>\d+)
- '''
- _TESTS = [{
- 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
- 'info_dict': {
- 'id': '1426690',
- 'display_id': 'lord-of-war-haendler-des-todes',
- 'ext': 'mp4',
- 'title': 'Lord of War',
- 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
- 'timestamp': 1550010000,
- 'upload_date': '20190212',
- 'duration': 7016,
- },
- }, {
- 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
- 'info_dict': {
- 'id': '328160',
- 'display_id': 'the-machinist',
- 'ext': 'mp4',
- 'title': 'The Machinist',
- 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
- 'timestamp': 1496469720,
- 'upload_date': '20170603',
- 'duration': 5836,
- },
- }, {
- 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
- 'only_matching': True, # DRM protected
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- display_id = mobj.group('title')
-
- webpage = self._download_webpage(url, display_id, fatal=False)
- if not webpage:
- raise ExtractorError('Cannot download "%s"' % url, expected=True)
-
- json_text = get_element_by_id('now-web-state', webpage)
- if not json_text:
- raise ExtractorError('Cannot read video data', expected=True)
-
- json_data = self._parse_json(
- json_text,
- display_id,
- transform_source=lambda x: x.replace('&q;', '"'),
- fatal=False)
- if not json_data:
- raise ExtractorError('Cannot read video data', expected=True)
-
- player_key = next(
- (key for key in json_data.keys() if 'module/player' in key),
- None)
- page_key = next(
- (key for key in json_data.keys() if 'page/filme' in key),
- None)
- movie_id = try_get(
- json_data,
- [
- lambda x: x[player_key]['body']['id'],
- lambda x: x[page_key]['body']['modules'][0]['id'],
- lambda x: x[page_key]['body']['modules'][1]['id']],
- int)
- if not movie_id:
- raise ExtractorError('Cannot extract movie ID', expected=True)
-
- info = self._call_api(
- 'movies/%d' % movie_id,
- display_id,
- query={'fields': ','.join(self._VIDEO_FIELDS)})
-
- return self._extract_video(info, display_id)
-
-
-class TVNowNewBaseIE(InfoExtractor):
- def _call_api(self, path, video_id, query={}):
- result = self._download_json(
- 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
- error = result.get('error')
- if error:
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, error), expected=True)
- return result
-
-
-r"""
-TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
-when api.tvnow.de is shut down. This version can't bypass premium checks though.
-class TVNowIE(TVNowNewBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/
- (?:shows|serien)/[^/]+/
- (?:[^/]+/)+
- (?P<display_id>[^/?$&]+)-(?P<id>\d+)
- '''
-
- _TESTS = [{
- # episode with annual navigation
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
- 'info_dict': {
- 'id': '331082',
- 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
- 'ext': 'mp4',
- 'title': 'Der neue Porsche 911 GT 3',
- 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1495994400,
- 'upload_date': '20170528',
- 'duration': 5283,
- 'series': 'GRIP - Das Motormagazin',
- 'season_number': 14,
- 'episode_number': 405,
- 'episode': 'Der neue Porsche 911 GT 3',
- },
- }, {
- # rtl2, episode with season navigation
- 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
- 'only_matching': True,
- }, {
- # rtlnitro
- 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
- 'only_matching': True,
- }, {
- # superrtl
- 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
- 'only_matching': True,
- }, {
- # ntv
- 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
- 'only_matching': True,
- }, {
- # vox
- 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
- 'only_matching': True,
- }]
-
- def _extract_video(self, info, url, display_id):
- config = info['config']
- source = config['source']
-
- video_id = compat_str(info.get('id') or source['videoId'])
- title = source['title'].strip()
-
- paths = []
- for manifest_url in (info.get('manifest') or {}).values():
- if not manifest_url:
- continue
- manifest_url = update_url_query(manifest_url, {'filter': ''})
- path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
- if path in paths:
- continue
- paths.append(path)
-
- def url_repl(proto, suffix):
- return re.sub(
- r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
- r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
- '.ism/' + suffix, manifest_url))
-
- formats = self._extract_mpd_formats(
- url_repl('dash', '.mpd'), video_id,
- mpd_id='dash', fatal=False)
- formats.extend(self._extract_ism_formats(
- url_repl('hss', 'Manifest'),
- video_id, ism_id='mss', fatal=False))
- formats.extend(self._extract_m3u8_formats(
- url_repl('hls', '.m3u8'), video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- if formats:
- break
- else:
- if try_get(info, lambda x: x['rights']['isDrm']):
- raise ExtractorError(
- 'Video %s is DRM protected' % video_id, expected=True)
- if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
- raise self.raise_geo_restricted()
- if not info.get('free', True):
- raise ExtractorError(
- 'Video %s is not available for free' % video_id, expected=True)
-
- description = source.get('description')
- thumbnail = url_or_none(source.get('poster'))
- timestamp = unified_timestamp(source.get('previewStart'))
- duration = parse_duration(source.get('length'))
-
- series = source.get('format')
- season_number = int_or_none(self._search_regex(
- r'staffel-(\d+)', url, 'season number', default=None))
- episode_number = int_or_none(self._search_regex(
- r'episode-(\d+)', url, 'episode number', default=None))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'episode': title,
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- display_id, video_id = self._match_valid_url(url).groups()
- info = self._call_api('player/' + video_id, video_id)
- return self._extract_video(info, video_id, display_id)
-
-
-class TVNowFilmIE(TVNowIE): # XXX: Do not subclass from concrete IE
- _VALID_URL = r'''(?x)
- (?P<base_url>https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/
- (?:filme))/
- (?P<title>[^/?$&]+)-(?P<id>\d+)
- '''
- _TESTS = [{
- 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
- 'info_dict': {
- 'id': '1426690',
- 'display_id': 'lord-of-war-haendler-des-todes',
- 'ext': 'mp4',
- 'title': 'Lord of War',
- 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
- 'timestamp': 1550010000,
- 'upload_date': '20190212',
- 'duration': 7016,
- },
- }, {
- 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
- 'info_dict': {
- 'id': '328160',
- 'display_id': 'the-machinist',
- 'ext': 'mp4',
- 'title': 'The Machinist',
- 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
- 'timestamp': 1496469720,
- 'upload_date': '20170603',
- 'duration': 5836,
- },
- }, {
- 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
- 'only_matching': True, # DRM protected
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- display_id = mobj.group('title')
-
- webpage = self._download_webpage(url, display_id, fatal=False)
- if not webpage:
- raise ExtractorError('Cannot download "%s"' % url, expected=True)
-
- json_text = get_element_by_id('now-web-state', webpage)
- if not json_text:
- raise ExtractorError('Cannot read video data', expected=True)
-
- json_data = self._parse_json(
- json_text,
- display_id,
- transform_source=lambda x: x.replace('&q;', '"'),
- fatal=False)
- if not json_data:
- raise ExtractorError('Cannot read video data', expected=True)
-
- player_key = next(
- (key for key in json_data.keys() if 'module/player' in key),
- None)
- page_key = next(
- (key for key in json_data.keys() if 'page/filme' in key),
- None)
- movie_id = try_get(
- json_data,
- [
- lambda x: x[player_key]['body']['id'],
- lambda x: x[page_key]['body']['modules'][0]['id'],
- lambda x: x[page_key]['body']['modules'][1]['id']],
- int)
- if not movie_id:
- raise ExtractorError('Cannot extract movie ID', expected=True)
-
- info = self._call_api('player/%d' % movie_id, display_id)
- return self._extract_video(info, url, display_id)
-"""
-
-
-class TVNowListBaseIE(TVNowNewBaseIE):
- _SHOW_VALID_URL = r'''(?x)
- (?P<base_url>
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
- [^/?#&]+-(?P<show_id>\d+)
- )
- '''
-
- @classmethod
- def suitable(cls, url):
- return (False if TVNowNewIE.suitable(url)
- else super(TVNowListBaseIE, cls).suitable(url))
-
- def _extract_items(self, url, show_id, list_id, query):
- items = self._call_api(
- 'teaserrow/format/episode/' + show_id, list_id,
- query=query)['items']
-
- entries = []
- for item in items:
- if not isinstance(item, dict):
- continue
- item_url = urljoin(url, item.get('url'))
- if not item_url:
- continue
- video_id = str_or_none(item.get('id') or item.get('videoId'))
- item_title = item.get('subheadline') or item.get('text')
- entries.append(self.url_result(
- item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
- video_title=item_title))
-
- return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
-
-
-class TVNowSeasonIE(TVNowListBaseIE):
- _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
- _TESTS = [{
- 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
- 'info_dict': {
- 'id': '1815/13',
- },
- 'playlist_mincount': 22,
- }]
-
- def _real_extract(self, url):
- _, show_id, season_id = self._match_valid_url(url).groups()
- return self._extract_items(
- url, show_id, season_id, {'season': season_id})
-
-
-class TVNowAnnualIE(TVNowListBaseIE):
- _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
- _TESTS = [{
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
- 'info_dict': {
- 'id': '1669/2017-05',
- },
- 'playlist_mincount': 2,
- }]
-
- def _real_extract(self, url):
- _, show_id, year, month = self._match_valid_url(url).groups()
- return self._extract_items(
- url, show_id, '%s-%s' % (year, month), {
- 'year': int(year),
- 'month': int(month),
- })
-
-
-class TVNowShowIE(TVNowListBaseIE):
- _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
- _TESTS = [{
- # annual navigationType
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
- 'info_dict': {
- 'id': '1669',
- },
- 'playlist_mincount': 73,
- }, {
- # season navigationType
- 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
- 'info_dict': {
- 'id': '11471',
- },
- 'playlist_mincount': 3,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
- else super(TVNowShowIE, cls).suitable(url))
-
- def _real_extract(self, url):
- base_url, show_id = self._match_valid_url(url).groups()
-
- result = self._call_api(
- 'teaserrow/format/navigation/' + show_id, show_id)
-
- items = result['items']
-
- entries = []
- navigation = result.get('navigationType')
- if navigation == 'annual':
- for item in items:
- if not isinstance(item, dict):
- continue
- year = int_or_none(item.get('year'))
- if year is None:
- continue
- months = item.get('months')
- if not isinstance(months, list):
- continue
- for month_dict in months:
- if not isinstance(month_dict, dict) or not month_dict:
- continue
- month_number = int_or_none(list(month_dict.keys())[0])
- if month_number is None:
- continue
- entries.append(self.url_result(
- '%s/%04d-%02d' % (base_url, year, month_number),
- ie=TVNowAnnualIE.ie_key()))
- elif navigation == 'season':
- for item in items:
- if not isinstance(item, dict):
- continue
- season_number = int_or_none(item.get('season'))
- if season_number is None:
- continue
- entries.append(self.url_result(
- '%s/staffel-%d' % (base_url, season_number),
- ie=TVNowSeasonIE.ie_key()))
- else:
- raise ExtractorError('Unknown navigationType')
-
- return self.playlist_result(entries, show_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- parse_iso8601,
- int_or_none,
- xpath_attr,
- xpath_element,
-)
-
-
-class TwentyFourVideoIE(InfoExtractor):
- IE_NAME = '24video'
- _VALID_URL = r'''(?x)
- https?://
- (?P<host>
- (?:(?:www|porno?)\.)?24video\.
- (?:net|me|xxx|sexy?|tube|adult|site|vip)
- )/
- (?:
- video/(?:(?:view|xml)/)?|
- player/new24_play\.swf\?id=
- )
- (?P<id>\d+)
- '''
-
- _TESTS = [{
- 'url': 'http://www.24video.net/video/view/1044982',
- 'md5': 'e09fc0901d9eaeedac872f154931deeb',
- 'info_dict': {
- 'id': '1044982',
- 'ext': 'mp4',
- 'title': 'Эротика каменного века',
- 'description': 'Как смотрели порно в каменном веке.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'SUPERTELO',
- 'duration': 31,
- 'timestamp': 1275937857,
- 'upload_date': '20100607',
- 'age_limit': 18,
- 'like_count': int,
- 'dislike_count': int,
- },
- }, {
- 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
- 'only_matching': True,
- }, {
- 'url': 'http://www.24video.me/video/view/1044982',
- 'only_matching': True,
- }, {
- 'url': 'http://www.24video.tube/video/view/2363750',
- 'only_matching': True,
- }, {
- 'url': 'https://www.24video.site/video/view/2640421',
- 'only_matching': True,
- }, {
- 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
- 'only_matching': True,
- }, {
- 'url': 'https://www.24video.vip/video/view/1044982',
- 'only_matching': True,
- }, {
- 'url': 'https://porn.24video.net/video/2640421-vsya-takay',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- host = mobj.group('host')
-
- webpage = self._download_webpage(
- 'http://%s/video/view/%s' % (host, video_id), video_id)
-
- title = self._og_search_title(webpage)
- description = self._html_search_regex(
- r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
- webpage, 'description', fatal=False, group='description')
- thumbnail = self._og_search_thumbnail(webpage)
- duration = int_or_none(self._og_search_property(
- 'duration', webpage, 'duration', fatal=False))
- timestamp = parse_iso8601(self._search_regex(
- r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
- webpage, 'upload date', fatal=False))
-
- uploader = self._html_search_regex(
- r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
- webpage, 'uploader', fatal=False)
-
- view_count = int_or_none(self._html_search_regex(
- r'<span class="video-views">(\d+) просмотр',
- webpage, 'view count', fatal=False))
- comment_count = int_or_none(self._html_search_regex(
- r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
- webpage, 'comment count', default=None))
-
- # Sets some cookies
- self._download_xml(
- r'http://%s/video/xml/%s?mode=init' % (host, video_id),
- video_id, 'Downloading init XML')
-
- video_xml = self._download_xml(
- 'http://%s/video/xml/%s?mode=play' % (host, video_id),
- video_id, 'Downloading video XML')
-
- video = xpath_element(video_xml, './/video', 'video', fatal=True)
-
- formats = [{
- 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
- }]
-
- like_count = int_or_none(video.get('ratingPlus'))
- dislike_count = int_or_none(video.get('ratingMinus'))
- age_limit = 18 if video.get('adult') == 'true' else 0
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': duration,
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'age_limit': age_limit,
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import parse_duration, traverse_obj
-
-
-class UnscriptedNewsVideoIE(InfoExtractor):
- _VALID_URL = r'https?://www\.unscripted\.news/videos/(?P<id>[\w-]+)'
- _TESTS = [{
- 'url': 'https://www.unscripted.news/videos/a-day-at-the-farmers-protest',
- 'info_dict': {
- 'id': '60c0a55cd1e99b1079918a57',
- 'display_id': 'a-day-at-the-farmers-protest',
- 'ext': 'mp4',
- 'title': 'A Day at the Farmers\' Protest',
- 'description': 'md5:4b3df22747a03e8f14f746dd72190384',
- 'thumbnail': 'https://s3.unscripted.news/anj2/60c0a55cd1e99b1079918a57/5f199a65-c803-4a5c-8fce-2077359c3b72.jpg',
- 'duration': 2251.0,
- 'series': 'Ground Reports',
- }
- }, {
- 'url': 'https://www.unscripted.news/videos/you-get-the-politicians-you-deserve-ft-shashi-tharoor',
- 'info_dict': {
- 'id': '5fb3afbf18ac817d341a74d8',
- 'display_id': 'you-get-the-politicians-you-deserve-ft-shashi-tharoor',
- 'ext': 'mp4',
- 'cast': ['Avalok Langer', 'Ashwin Mehta'],
- 'thumbnail': 'https://s3.unscripted.news/anj2/5fb3afbf18ac817d341a74d8/82bd7942-4f20-4cd8-98ae-83f9e814f998.jpg',
- 'description': 'md5:1e91b069238a705ca3a40f87e6f1182c',
- 'duration': 1046.0,
- 'series': 'Dumb Questions Only',
- 'title': 'You Get The Politicians You Deserve! ft. Shashi Tharoor',
- }
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['dataLocal']
-
- # TODO: get subtitle from srt key
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(nextjs_data['alt_content'], display_id)
-
- return {
- 'id': nextjs_data['_id'],
- 'display_id': display_id,
- 'title': nextjs_data.get('title') or self._og_search_title(webpage),
- 'description': nextjs_data.get('sh_heading') or self._og_search_description(webpage),
- 'formats': formats,
- 'subtitles': subtitles,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'duration': parse_duration(nextjs_data.get('duration')),
- 'series': traverse_obj(nextjs_data, ('show', 'topic')),
- 'cast': traverse_obj(nextjs_data, ('cast_crew', ..., 'displayname')),
- }
+++ /dev/null
-import re
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- clean_html,
- get_element_by_id,
-)
-
-
-class VeeHDIE(InfoExtractor):
- _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
-
- # Seems VeeHD videos have multiple copies on several servers, all of
- # whom have different MD5 checksums, so omit md5 field in all tests
- _TESTS = [{
- 'url': 'http://veehd.com/video/4639434_Solar-Sinter',
- 'info_dict': {
- 'id': '4639434',
- 'ext': 'mp4',
- 'title': 'Solar Sinter',
- 'uploader_id': 'VideoEyes',
- 'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
- },
- 'skip': 'Video deleted',
- }, {
- 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
- 'info_dict': {
- 'id': '4905758',
- 'ext': 'mp4',
- 'title': 'Elysian Fields - Channeling',
- 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
- 'uploader_id': 'spotted',
- }
- }, {
- 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
- 'info_dict': {
- 'id': '2046729',
- 'ext': 'avi',
- 'title': '2012 (2009) DivX Trailer',
- 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
- 'uploader_id': 'Movie_Trailers',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- # VeeHD seems to send garbage on the first request.
- # See https://github.com/ytdl-org/youtube-dl/issues/2102
- self._download_webpage(url, video_id, 'Requesting webpage')
- webpage = self._download_webpage(url, video_id)
-
- if 'This video has been removed<' in webpage:
- raise ExtractorError('Video %s has been removed' % video_id, expected=True)
-
- player_path = self._search_regex(
- r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
- webpage, 'player path')
- player_url = compat_urlparse.urljoin(url, player_path)
-
- self._download_webpage(player_url, video_id, 'Requesting player page')
- player_page = self._download_webpage(
- player_url, video_id, 'Downloading player page')
-
- video_url = None
-
- config_json = self._search_regex(
- r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
-
- if config_json:
- config = json.loads(config_json)
- video_url = compat_urllib_parse_unquote(config['clip']['url'])
-
- if not video_url:
- video_url = self._html_search_regex(
- r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
- player_page, 'video url', default=None)
-
- if not video_url:
- iframe_src = self._search_regex(
- r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
- iframe_url = 'http://veehd.com/%s' % iframe_src
-
- self._download_webpage(iframe_url, video_id, 'Requesting iframe page')
- iframe_page = self._download_webpage(
- iframe_url, video_id, 'Downloading iframe page')
-
- video_url = self._search_regex(
- r"file\s*:\s*'([^']+)'", iframe_page, 'video url')
-
- title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
- uploader_id = self._html_search_regex(
- r'<a href="/profile/\d+">(.+?)</a>',
- webpage, 'uploader')
- thumbnail = self._search_regex(
- r'<img id="veehdpreview" src="(.+?)"',
- webpage, 'thumbnail')
- description = self._html_search_regex(
- r'<td class="infodropdown".*?<div>(.*?)<ul',
- webpage, 'description', flags=re.DOTALL)
-
- return {
- '_type': 'video',
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'uploader_id': uploader_id,
- 'thumbnail': thumbnail,
- 'description': description,
- }
if vice_url:
return _url_res(vice_url, ViceIE.ie_key())
- embed_code = self._search_regex(
- r'embedCode=([^&\'"]+)', body,
- 'ooyala embed code', default=None)
- if embed_code:
- return _url_res('ooyala:%s' % embed_code, 'Ooyala')
-
youtube_url = YoutubeIE._extract_url(body)
if youtube_url:
return _url_res(youtube_url, YoutubeIE.ie_key())
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- int_or_none,
- js_to_json,
- remove_end,
- unified_strdate,
-)
-
-
-class VidbitIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
- 'md5': '1a34b7f14defe3b8fafca9796892924d',
- 'info_dict': {
- 'id': 'jkL2yDOEq2',
- 'ext': 'mp4',
- 'title': 'Intro to VidBit',
- 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'upload_date': '20160618',
- 'view_count': int,
- 'comment_count': int,
- }
- }, {
- 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
-
- video_url, title = [None] * 2
-
- config = self._parse_json(self._search_regex(
- r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
- video_id, transform_source=js_to_json)
- if config:
- if config.get('file'):
- video_url = compat_urlparse.urljoin(url, config['file'])
- title = config.get('title')
-
- if not video_url:
- video_url = compat_urlparse.urljoin(url, self._search_regex(
- r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'video URL', group='url'))
-
- if not title:
- title = remove_end(
- self._html_search_regex(
- (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
- webpage, 'title', default=None) or self._og_search_title(webpage),
- ' - VidBit')
-
- description = self._html_search_meta(
- ('description', 'og:description', 'twitter:description'),
- webpage, 'description')
-
- upload_date = unified_strdate(self._html_search_meta(
- 'datePublished', webpage, 'upload date'))
-
- view_count = int_or_none(self._search_regex(
- r'<strong>(\d+)</strong> views',
- webpage, 'view count', fatal=False))
- comment_count = int_or_none(self._search_regex(
- r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
- webpage, 'comment count', fatal=False))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'upload_date': upload_date,
- 'view_count': view_count,
- 'comment_count': comment_count,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class SprutoBaseIE(InfoExtractor):
- def _extract_spruto(self, spruto, video_id):
- playlist = spruto['playlist'][0]
- title = playlist['title']
- video_id = playlist.get('videoId') or video_id
- thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
- duration = int_or_none(playlist.get('duration'))
-
- formats = [{
- 'url': f['url'],
- } for f in playlist['video']]
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class VimpleIE(SprutoBaseIE):
- IE_DESC = 'Vimple - one-click video hosting'
- _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P<id>[\da-f-]{32,36})'
- _TESTS = [{
- 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
- 'md5': '2e750a330ed211d3fd41821c6ad9a279',
- 'info_dict': {
- 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
- 'ext': 'mp4',
- 'title': 'Sunset',
- 'duration': 20,
- 'thumbnail': r're:https?://.*?\.jpg',
- },
- }, {
- 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
- 'only_matching': True,
- }, {
- 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://player.vimple.ru/iframe/%s' % video_id, video_id)
-
- spruto = self._parse_json(
- self._search_regex(
- r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
- video_id)
-
- return self._extract_spruto(spruto, video_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..networking import Request
-from ..utils import NO_DEFAULT, ExtractorError, urlencode_postdata
-
-
-class VodlockerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
-
- _TESTS = [{
- 'url': 'http://vodlocker.com/e8wvyzz4sl42',
- 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
- 'info_dict': {
- 'id': 'e8wvyzz4sl42',
- 'ext': 'mp4',
- 'title': 'Germany vs Brazil',
- 'thumbnail': r're:http://.*\.jpg',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- if any(p in webpage for p in (
- '>THIS FILE WAS DELETED<',
- '>File Not Found<',
- 'The file you were looking for could not be found, sorry for any inconvenience.<',
- '>The file was removed')):
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- fields = self._hidden_inputs(webpage)
-
- if fields['op'] == 'download1':
- self._sleep(3, video_id) # they do detect when requests happen too fast!
- post = urlencode_postdata(fields)
- req = Request(url, post)
- req.headers['Content-type'] = 'application/x-www-form-urlencoded'
- webpage = self._download_webpage(
- req, video_id, 'Downloading video page')
-
- def extract_file_url(html, default=NO_DEFAULT):
- return self._search_regex(
- r'file:\s*"(http[^\"]+)",', html, 'file url', default=default)
-
- video_url = extract_file_url(webpage, default=None)
-
- if not video_url:
- embed_url = self._search_regex(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1',
- webpage, 'embed url', group='url')
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- video_url = extract_file_url(embed_webpage)
- thumbnail_webpage = embed_webpage
- else:
- thumbnail_webpage = webpage
-
- title = self._search_regex(
- r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
- thumbnail = self._search_regex(
- r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False)
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- determine_ext,
- int_or_none,
- urljoin,
-)
-
-
-class VoiceRepublicIE(InfoExtractor):
- _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
- _TESTS = [{
- 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
- 'md5': 'b9174d651323f17783000876347116e3',
- 'info_dict': {
- 'id': '2296',
- 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
- 'ext': 'm4a',
- 'title': 'Watching the Watchers: Building a Sousveillance State',
- 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
- 'duration': 1556,
- 'view_count': int,
- }
- }, {
- 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- if '>Queued for processing, please stand by...<' in webpage:
- raise ExtractorError(
- 'Audio is still queued for processing', expected=True)
-
- talk = self._parse_json(self._search_regex(
- r'initialSnapshot\s*=\s*({.+?});',
- webpage, 'talk'), display_id)['talk']
- title = talk['title']
- formats = [{
- 'url': urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in talk['media_links'].items()]
-
- return {
- 'id': compat_str(talk.get('id') or display_id),
- 'display_id': display_id,
- 'title': title,
- 'description': talk.get('teaser'),
- 'thumbnail': talk.get('image_url'),
- 'duration': int_or_none(talk.get('archived_duration')),
- 'view_count': int_or_none(talk.get('play_count')),
- 'formats': formats,
- }
class VootIE(VootBaseIE):
+ _WORKING = False
_VALID_URL = r'''(?x)
(?:
voot:|
class VootSeriesIE(VootBaseIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
info['duration'] = int_or_none(asset.get('duration'))
return info
- for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
+ for provider_video_type in ('youtube', 'brightcove'):
provider_video_id = video_data.get('%s_id' % provider_video_type)
if not provider_video_id:
continue
def create_entry(provider_video_id, provider_video_type, title=None, description=None):
video_url = {
'youtube': '%s',
- 'ooyala': 'ooyala:%s',
'volume': 'http://volume.vox-cdn.com/embed/%s',
}[provider_video_type] % provider_video_id
return {
provider_video_id, provider_video_type,
video_data.get('title'), video_data.get('description')))
- provider_video_id = self._search_regex(
- r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None)
- if provider_video_id:
- entries.append(create_entry(provider_video_id, 'ooyala'))
-
volume_uuid = self._search_regex(
r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
if volume_uuid:
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from .brightcove import BrightcoveNewIE
-from ..utils import (
- int_or_none,
- parse_age_limit,
- smuggle_url,
- unescapeHTML,
-)
-
-
-class VrakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P<id>[\d.]+)'
- _TEST = {
- 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721',
- 'info_dict': {
- 'id': '5345661243001',
- 'ext': 'mp4',
- 'title': 'Obésité, film de hockey et Roseline Filion',
- 'timestamp': 1488492126,
- 'upload_date': '20170302',
- 'uploader_id': '2890187628001',
- 'creator': 'VRAK.TV',
- 'age_limit': 8,
- 'series': 'ALT (Actualité Légèrement Tordue)',
- 'episode': 'Obésité, film de hockey et Roseline Filion',
- 'tags': list,
- },
- 'params': {
- 'skip_download': True,
- },
- }
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(
- r'<h\d\b[^>]+\bclass=["\']videoTitle["\'][^>]*>([^<]+)',
- webpage, 'title', default=None) or self._og_search_title(webpage)
-
- content = self._parse_json(
- self._search_regex(
- r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
- webpage, 'content', default='{}', group='content'),
- video_id, transform_source=unescapeHTML)
-
- ref_id = content.get('refId') or self._search_regex(
- r'refId":"([^&]+)"', webpage, 'ref id')
-
- brightcove_id = self._search_regex(
- r'''(?x)
- java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s
- [^>]*
- java\.lang\.String\s+value\s*=\s*["'](\d+)
- ''' % re.escape(ref_id), webpage, 'brightcove id')
-
- return {
- '_type': 'url_transparent',
- 'ie_key': BrightcoveNewIE.ie_key(),
- 'url': smuggle_url(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
- {'geo_countries': ['CA']}),
- 'id': brightcove_id,
- 'description': content.get('description'),
- 'creator': content.get('brand'),
- 'age_limit': parse_age_limit(content.get('rating')),
- 'series': content.get('showName') or content.get(
- 'episodeName'), # this is intentional
- 'season_number': int_or_none(content.get('seasonNumber')),
- 'episode': title,
- 'episode_number': int_or_none(content.get('episodeNumber')),
- 'tags': content.get('tags', []),
- }
+++ /dev/null
-import base64
-import hashlib
-import hmac
-import json
-import random
-import string
-import time
-import urllib.parse
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlencode
-from ..networking.exceptions import HTTPError
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- join_nonempty,
- traverse_obj,
-)
-
-
-class VRVBaseIE(InfoExtractor):
- _API_DOMAIN = None
- _API_PARAMS = {}
- _CMS_SIGNING = {}
- _TOKEN = None
- _TOKEN_SECRET = ''
-
- def _call_api(self, path, video_id, note, data=None):
- # https://tools.ietf.org/html/rfc5849#section-3
- base_url = self._API_DOMAIN + '/core/' + path
- query = [
- ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
- ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))),
- ('oauth_signature_method', 'HMAC-SHA1'),
- ('oauth_timestamp', int(time.time())),
- ]
- if self._TOKEN:
- query.append(('oauth_token', self._TOKEN))
- encoded_query = compat_urllib_parse_urlencode(query)
- headers = self.geo_verification_headers()
- if data:
- data = json.dumps(data).encode()
- headers['Content-Type'] = 'application/json'
- base_string = '&'.join([
- 'POST' if data else 'GET',
- urllib.parse.quote(base_url, ''),
- urllib.parse.quote(encoded_query, '')])
- oauth_signature = base64.b64encode(hmac.new(
- (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
- base_string.encode(), hashlib.sha1).digest()).decode()
- encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
- try:
- return self._download_json(
- '?'.join([base_url, encoded_query]), video_id,
- note='Downloading %s JSON metadata' % note, headers=headers, data=data)
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 401:
- raise ExtractorError(json.loads(e.cause.response.read().decode())['message'], expected=True)
- raise
-
- def _call_cms(self, path, video_id, note):
- if not self._CMS_SIGNING:
- index = self._call_api('index', video_id, 'CMS Signing')
- self._CMS_SIGNING = index.get('cms_signing') or {}
- if not self._CMS_SIGNING:
- for signing_policy in index.get('signing_policies', []):
- signing_path = signing_policy.get('path')
- if signing_path and signing_path.startswith('/cms/'):
- name, value = signing_policy.get('name'), signing_policy.get('value')
- if name and value:
- self._CMS_SIGNING[name] = value
- return self._download_json(
- self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
- note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
-
- def _get_cms_resource(self, resource_key, video_id):
- return self._call_api(
- 'cms_resource', video_id, 'resource path', data={
- 'resource_key': resource_key,
- })['__links__']['cms_resource']['href']
-
- def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
- if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
- return []
- format_id = join_nonempty(
- stream_format,
- audio_lang and 'audio-%s' % audio_lang,
- hardsub_lang and 'hardsub-%s' % hardsub_lang)
- if 'hls' in stream_format:
- adaptive_formats = self._extract_m3u8_formats(
- url, video_id, 'mp4', m3u8_id=format_id,
- note='Downloading %s information' % format_id,
- fatal=False)
- elif stream_format == 'dash':
- adaptive_formats = self._extract_mpd_formats(
- url, video_id, mpd_id=format_id,
- note='Downloading %s information' % format_id,
- fatal=False)
- if audio_lang:
- for f in adaptive_formats:
- if f.get('acodec') != 'none':
- f['language'] = audio_lang
- return adaptive_formats
-
- def _set_api_params(self):
- webpage = self._download_webpage(
- 'https://vrv.co/', None, headers=self.geo_verification_headers())
- self._API_PARAMS = self._parse_json(self._search_regex(
- [
- r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
- r'window\.__APP_CONFIG__\s*=\s*({.+})'
- ], webpage, 'app config'), None)['cxApiParams']
- self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
-
-
-class VRVIE(VRVBaseIE):
- IE_NAME = 'vrv'
- _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
- _TESTS = [{
- 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
- 'info_dict': {
- 'id': 'GR9PNZ396',
- 'ext': 'mp4',
- 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
- 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
- 'uploader_id': 'seeso',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # movie listing
- 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
- 'info_dict': {
- 'id': 'G6NQXZ1J6',
- 'title': 'Lily C.A.T',
- 'description': 'md5:988b031e7809a6aeb60968be4af7db07',
- },
- 'playlist_count': 2,
- }]
- _NETRC_MACHINE = 'vrv'
-
- def _perform_login(self, username, password):
- token_credentials = self._call_api(
- 'authenticate/by:credentials', None, 'Token Credentials', data={
- 'email': username,
- 'password': password,
- })
- self._TOKEN = token_credentials['oauth_token']
- self._TOKEN_SECRET = token_credentials['oauth_token_secret']
-
- def _initialize_pre_login(self):
- return self._set_api_params()
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- object_data = self._call_cms(self._get_cms_resource(
- 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
- resource_path = object_data['__links__']['resource']['href']
- video_data = self._call_cms(resource_path, video_id, 'video')
- title = video_data['title']
- description = video_data.get('description')
-
- if video_data.get('__class__') == 'movie_listing':
- items = self._call_cms(
- video_data['__links__']['movie_listing/movies']['href'],
- video_id, 'movie listing').get('items') or []
- if len(items) != 1:
- entries = []
- for item in items:
- item_id = item.get('id')
- if not item_id:
- continue
- entries.append(self.url_result(
- 'https://vrv.co/watch/' + item_id,
- self.ie_key(), item_id, item.get('title')))
- return self.playlist_result(entries, video_id, title, description)
- video_data = items[0]
-
- streams_path = video_data['__links__'].get('streams', {}).get('href')
- if not streams_path:
- self.raise_login_required()
- streams_json = self._call_cms(streams_path, video_id, 'streams')
-
- audio_locale = streams_json.get('audio_locale')
- formats = []
- for stream_type, streams in streams_json.get('streams', {}).items():
- if stream_type in ('adaptive_hls', 'adaptive_dash'):
- for stream in streams.values():
- formats.extend(self._extract_vrv_formats(
- stream.get('url'), video_id, stream_type.split('_')[1],
- audio_locale, stream.get('hardsub_locale')))
-
- subtitles = {}
- for k in ('captions', 'subtitles'):
- for subtitle in streams_json.get(k, {}).values():
- subtitle_url = subtitle.get('url')
- if not subtitle_url:
- continue
- subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
- 'url': subtitle_url,
- 'ext': subtitle.get('format', 'ass'),
- })
-
- thumbnails = []
- for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []:
- thumbnail_url = thumbnail.get('source')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- 'thumbnails': thumbnails,
- 'description': description,
- 'duration': float_or_none(video_data.get('duration_ms'), 1000),
- 'uploader_id': video_data.get('channel_id'),
- 'series': video_data.get('series_title'),
- 'season': video_data.get('season_title'),
- 'season_number': int_or_none(video_data.get('season_number')),
- 'season_id': video_data.get('season_id'),
- 'episode': title,
- 'episode_number': int_or_none(video_data.get('episode_number')),
- 'episode_id': video_data.get('production_episode_id'),
- }
-
-
-class VRVSeriesIE(VRVBaseIE):
- IE_NAME = 'vrv:series'
- _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
- _TEST = {
- 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
- 'info_dict': {
- 'id': 'G68VXG3G6',
- },
- 'playlist_mincount': 11,
- }
-
- def _initialize_pre_login(self):
- return self._set_api_params()
-
- def _real_extract(self, url):
- series_id = self._match_id(url)
-
- seasons_path = self._get_cms_resource(
- 'cms:/seasons?series_id=' + series_id, series_id)
- seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
-
- entries = []
- for season in seasons_data.get('items', []):
- episodes_path = season['__links__']['season/episodes']['href']
- episodes = self._call_cms(episodes_path, series_id, 'episodes')
- for episode in episodes.get('items', []):
- episode_id = episode['id']
- entries.append(self.url_result(
- 'https://vrv.co/watch/' + episode_id,
- 'VRV', episode_id, episode.get('title')))
-
- return self.playlist_result(entries, series_id)
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import ExtractorError, decode_packed_codes
-
-
-class VShareIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
- _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
- _TESTS = [{
- 'url': 'https://vshare.io/d/0f64ce6',
- 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
- 'info_dict': {
- 'id': '0f64ce6',
- 'title': 'vl14062007715967',
- 'ext': 'mp4',
- }
- }, {
- 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
- 'only_matching': True,
- }]
-
- def _extract_packed(self, webpage):
- packed = self._search_regex(
- r'(eval\(function.+)', webpage, 'packed code')
- unpacked = decode_packed_codes(packed)
- digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits')
- digits = [int(digit) for digit in digits.split(',')]
- key_digit = self._search_regex(
- r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
- chars = [chr(d - int(key_digit)) for d in digits]
- return ''.join(chars)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
- video_id, headers={'Referer': url})
-
- title = self._html_extract_title(webpage)
- title = title.split(' - ')[0]
-
- error = self._html_search_regex(
- r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
- 'error', default=None)
- if error:
- raise ExtractorError(error, expected=True)
-
- info = self._parse_html5_media_entries(
- url, '<video>%s</video>' % self._extract_packed(webpage),
- video_id)[0]
-
- info.update({
- 'id': video_id,
- 'title': title,
- })
-
- return info
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- parse_filesize,
- extract_attributes,
- int_or_none,
- js_to_json
-)
-
-
-class VuploadIE(InfoExtractor):
- _VALID_URL = r'https://vupload\.com/v/(?P<id>[a-z0-9]+)'
- _TESTS = [{
- 'url': 'https://vupload.com/v/u28d0pl2tphy',
- 'md5': '9b42a4a193cca64d80248e58527d83c8',
- 'info_dict': {
- 'id': 'u28d0pl2tphy',
- 'ext': 'mp4',
- 'description': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb',
- 'title': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_extract_title(webpage)
- video_json = self._parse_json(self._html_search_regex(r'sources:\s*(.+?]),', webpage, 'video'), video_id, transform_source=js_to_json)
- formats = []
- for source in video_json:
- if source['src'].endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(source['src'], video_id, m3u8_id='hls'))
- duration = parse_duration(self._html_search_regex(
- r'<i\s*class=["\']fad\s*fa-clock["\']></i>\s*([\d:]+)\s*</div>', webpage, 'duration', fatal=False))
- filesize_approx = parse_filesize(self._html_search_regex(
- r'<i\s*class=["\']fad\s*fa-save["\']></i>\s*([^<]+)\s*</div>', webpage, 'filesize', fatal=False))
- extra_video_info = extract_attributes(self._html_search_regex(
- r'(<video[^>]+>)', webpage, 'video_info', fatal=False))
- description = self._html_search_meta('description', webpage)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'duration': duration,
- 'filesize_approx': filesize_approx,
- 'width': int_or_none(extra_video_info.get('width')),
- 'height': int_or_none(extra_video_info.get('height')),
- 'format_id': extra_video_info.get('height', '') + 'p',
- 'title': title,
- 'description': description,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-
-
-class VyboryMosIE(InfoExtractor):
- _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://vybory.mos.ru/#precinct/13636',
- 'info_dict': {
- 'id': '13636',
- 'ext': 'mp4',
- 'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 'Россия, Москва, улица Введенского, 32А',
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- 'url': 'http://vybory.mos.ru/account/channels?station_id=13636',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- station_id = self._match_id(url)
-
- channels = self._download_json(
- 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id,
- station_id, 'Downloading channels JSON')
-
- formats = []
- for cam_num, (sid, hosts, name, _) in enumerate(channels, 1):
- for num, host in enumerate(hosts, 1):
- formats.append({
- 'url': 'http://%s/master.m3u8?sid=%s' % (host, sid),
- 'ext': 'mp4',
- 'format_id': 'camera%d-host%d' % (cam_num, num),
- 'format_note': '%s, %s' % (name, host),
- })
-
- info = self._download_json(
- 'http://vybory.mos.ru/json/voting_stations/%s/%s.json'
- % (compat_str(station_id)[:3], station_id),
- station_id, 'Downloading station JSON', fatal=False) or {}
-
- return {
- 'id': station_id,
- 'title': info.get('name') or station_id,
- 'description': info.get('address'),
- 'is_live': True,
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- float_or_none,
- unified_timestamp,
- url_or_none,
-)
-
-
-class VzaarIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
- _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)']
- _TESTS = [{
- # HTTP and HLS
- 'url': 'https://vzaar.com/videos/1152805',
- 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
- 'info_dict': {
- 'id': '1152805',
- 'ext': 'mp4',
- 'title': 'sample video (public)',
- },
- }, {
- 'url': 'https://view.vzaar.com/27272/player',
- 'md5': '3b50012ac9bbce7f445550d54e0508f2',
- 'info_dict': {
- 'id': '27272',
- 'ext': 'mp3',
- 'title': 'MP3',
- },
- }, {
- # hlsAes = true
- 'url': 'https://view.vzaar.com/11379930/player',
- 'info_dict': {
- 'id': '11379930',
- 'ext': 'mp4',
- 'title': 'Videoaula',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # with null videoTitle
- 'url': 'https://view.vzaar.com/20313539/download',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_data = self._download_json(
- 'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
-
- title = video_data.get('videoTitle') or video_id
-
- formats = []
-
- source_url = url_or_none(video_data.get('sourceUrl'))
- if source_url:
- f = {
- 'url': source_url,
- 'format_id': 'http',
- 'quality': 1,
- }
- if 'audio' in source_url:
- f.update({
- 'vcodec': 'none',
- 'ext': 'mp3',
- })
- else:
- f.update({
- 'width': int_or_none(video_data.get('width')),
- 'height': int_or_none(video_data.get('height')),
- 'ext': 'mp4',
- 'fps': float_or_none(video_data.get('fps')),
- })
- formats.append(f)
-
- video_guid = video_data.get('guid')
- usp = video_data.get('usp')
- if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
- hls_aes = video_data.get('hlsAes')
- qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
- url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
- m3u8_formats = self._extract_m3u8_formats(
- url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False)
- if hls_aes:
- for f in m3u8_formats:
- f['hls_aes'] = {'uri': url_templ % ('goose', '') + qs}
- formats.extend(m3u8_formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': self._proto_relative_url(video_data.get('poster')),
- 'duration': float_or_none(video_data.get('videoDuration')),
- 'timestamp': unified_timestamp(video_data.get('ts')),
- 'formats': formats,
- }
+++ /dev/null
-from urllib.parse import unquote
-
-from .common import InfoExtractor
-from ..utils import (
- merge_dicts,
- urljoin,
-)
-
-
-class WakanimIE(InfoExtractor):
- _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu',
- 'info_dict': {
- 'id': '2997',
- 'ext': 'mp4',
- 'title': 'Episode 02',
- 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d',
- 'series': 'The Asterisk War (OmU.)',
- 'season_number': 1,
- 'episode': 'Episode 02',
- 'episode_number': 2,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # DRM Protected
- 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu',
- 'only_matching': True,
- }]
- _GEO_BYPASS = False
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- if 'Geoblocking' in webpage:
- if '/de/' in url:
- self.raise_geo_restricted(countries=['DE', 'AT', 'CH'])
- else:
- self.raise_geo_restricted(countries=['RU'])
-
- manifest_url = urljoin(url, self._search_regex(
- r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'manifest url',
- group='url'))
- if not self.get_param('allow_unplayable_formats'):
- # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
- encryption = self._search_regex(
- r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
- manifest_url, 'encryption', default=None)
- if encryption in ('cenc', 'cbcs-aapl'):
- self.report_drm(video_id)
-
- if 'format=mpd-time-cmaf' in unquote(manifest_url):
- formats = self._extract_mpd_formats(
- manifest_url, video_id, mpd_id='dash')
- else:
- formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
-
- info = self._search_json_ld(webpage, video_id, default={})
-
- title = self._search_regex(
- (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'),
- webpage, 'title', default=None, group='title')
-
- return merge_dicts(info, {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- })
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- js_to_json,
- strip_or_none,
- try_get,
- unescapeHTML,
- unified_timestamp,
-)
-
-
-class WatchBoxIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)'
- _TESTS = [{
- # film
- 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html',
- 'info_dict': {
- 'id': '341368',
- 'ext': 'mp4',
- 'title': 'Free Jimmy',
- 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 4890,
- 'age_limit': 16,
- 'release_year': 2009,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Failed to download m3u8 information'],
- }, {
- # episode
- 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html',
- 'info_dict': {
- 'id': '328286',
- 'ext': 'mp4',
- 'title': 'S01 E01 - Date in der Hölle',
- 'description': 'md5:2f31c74a8186899f33cb5114491dae2b',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1291,
- 'age_limit': 12,
- 'release_year': 2010,
- 'series': 'Ugly Americans',
- 'season_number': 1,
- 'episode': 'Date in der Hölle',
- 'episode_number': 1,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Failed to download m3u8 information'],
- }, {
- 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- kind, video_id = mobj.group('kind', 'id')
-
- webpage = self._download_webpage(url, video_id)
-
- player_config = self._parse_json(
- self._search_regex(
- r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
- 'player config', default='{}', group='data'),
- video_id, transform_source=unescapeHTML, fatal=False)
-
- if not player_config:
- player_config = self._parse_json(
- self._search_regex(
- r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
- default='{}'),
- video_id, transform_source=js_to_json, fatal=False) or {}
-
- source = player_config.get('source') or {}
-
- video_id = compat_str(source.get('videoId') or video_id)
-
- devapi = self._download_json(
- 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={
- 'format': 'json',
- 'apikey': 'hbbtv',
- }, fatal=False)
-
- item = try_get(devapi, lambda x: x['items'][0], dict) or {}
-
- title = item.get('title') or try_get(
- item, lambda x: x['movie']['headline_movie'],
- compat_str) or source['title']
-
- formats = []
- hls_url = item.get('media_videourl_hls') or source.get('hls')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- dash_url = item.get('media_videourl_wv') or source.get('dash')
- if dash_url:
- formats.extend(self._extract_mpd_formats(
- dash_url, video_id, mpd_id='dash', fatal=False))
- mp4_url = item.get('media_videourl')
- if mp4_url:
- formats.append({
- 'url': mp4_url,
- 'format_id': 'mp4',
- 'width': int_or_none(item.get('width')),
- 'height': int_or_none(item.get('height')),
- 'tbr': int_or_none(item.get('bitrate')),
- })
-
- description = strip_or_none(item.get('descr'))
- thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
- duration = int_or_none(item.get('media_length') or source.get('length'))
- timestamp = unified_timestamp(item.get('pubDate'))
- view_count = int_or_none(item.get('media_views'))
- age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk']))
- release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year']))
-
- info = {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'age_limit': age_limit,
- 'release_year': release_year,
- 'formats': formats,
- }
-
- if kind.lower() == 'serien':
- series = try_get(
- item, lambda x: x['special']['title'],
- compat_str) or source.get('format')
- season_number = int_or_none(self._search_regex(
- r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number',
- default=None) or self._search_regex(
- r'/staffel-(\d+)/', url, 'season number', default=None))
- episode = source.get('title')
- episode_number = int_or_none(self._search_regex(
- r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number',
- default=None))
- info.update({
- 'series': series,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- })
-
- return info
+++ /dev/null
-import re
-
-from .common import InfoExtractor
-from ..utils import parse_duration
-
-
-class WatchIndianPornIE(InfoExtractor):
- IE_DESC = 'Watch Indian Porn'
- _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
- _TEST = {
- 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
- 'md5': '249589a164dde236ec65832bfce17440',
- 'info_dict': {
- 'id': 'RZa2avywNPa',
- 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
- 'ext': 'mp4',
- 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 226,
- 'view_count': int,
- 'categories': list,
- 'age_limit': 18,
- }
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
-
- title = self._html_search_regex((
- r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
- r'<h4>(.+?)</h4>'
- ), webpage, 'title')
-
- duration = parse_duration(self._search_regex(
- r'Time:\s*<strong>\s*(.+?)\s*</strong>',
- webpage, 'duration', fatal=False))
-
- view_count = int(self._search_regex(
- r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
- webpage, 'view count', fatal=False))
-
- categories = re.findall(
- r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
- webpage)
-
- info_dict.update({
- 'id': video_id,
- 'display_id': display_id,
- 'http_headers': {
- 'Referer': url,
- },
- 'title': title,
- 'duration': duration,
- 'view_count': view_count,
- 'categories': categories,
- 'age_limit': 18,
- })
-
- return info_dict
+++ /dev/null
-from ..utils import ExtractorError
-from .common import InfoExtractor
-
-
-class WillowIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.)?willow\.tv/videos/(?P<id>[0-9a-z-_]+)'
- _GEO_COUNTRIES = ['US']
-
- _TESTS = [{
- 'url': 'http://willow.tv/videos/d5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021',
- 'info_dict': {
- 'id': '169662',
- 'display_id': 'd5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021',
- 'ext': 'mp4',
- 'title': 'Winning Moment: 4th Test, England vs India',
- 'thumbnail': 'https://aimages.willow.tv/ytThumbnails/6748_D5winning_moment.jpg',
- 'duration': 233,
- 'timestamp': 1630947954,
- 'upload_date': '20210906',
- 'location': 'Kennington Oval, London',
- 'series': 'India tour of England 2021',
- },
- 'params': {
- 'skip_download': True, # AES-encrypted m3u8
- },
- }, {
- 'url': 'http://willow.tv/videos/highlights-short-ind-vs-nz-streaming-online-2nd-t20i-new-zealand-tour-of-india-2021',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- video_data = self._parse_json(self._html_search_regex(
- r'var\s+data_js\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage,
- 'data_js'), video_id)
-
- video = next((v for v in video_data.get('trending_videos') or []
- if v.get('secureurl')), None)
- if not video:
- raise ExtractorError('No videos found')
-
- formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4')
-
- return {
- 'id': str(video.get('content_id')),
- 'display_id': video.get('video_slug'),
- 'title': video.get('video_name') or self._html_search_meta('twitter:title', webpage),
- 'formats': formats,
- 'thumbnail': video.get('yt_thumb_url') or self._html_search_meta(
- 'twitter:image', webpage, default=None),
- 'duration': video.get('duration_seconds'),
- 'timestamp': video.get('created_date'),
- 'location': video.get('venue'),
- 'series': video.get('series_name'),
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-
-
-class XBefIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
- 'md5': 'a478b565baff61634a98f5e5338be995',
- 'info_dict': {
- 'id': '5119',
- 'ext': 'mp4',
- 'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
- 'age_limit': 18,
- 'thumbnail': r're:^http://.*\.jpg',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(
- r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
-
- config_url_enc = self._download_webpage(
- 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
- note='Retrieving config URL')
- config_url = compat_urllib_parse_unquote(config_url_enc)
- config = self._download_xml(
- config_url, video_id, note='Retrieving config')
-
- video_url = config.find('./file').text
- thumbnail = config.find('./image').text
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'thumbnail': thumbnail,
- 'age_limit': 18,
- }
+++ /dev/null
-import itertools
-import re
-
-from .common import InfoExtractor
-from ..networking import Request
-from ..utils import (
- int_or_none,
- js_to_json,
- orderedSet,
- parse_duration,
- str_to_int,
- url_or_none,
-)
-
-
-class XTubeIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- xtube:|
- https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-)
- )
- (?P<id>[^/?&#]+)
- '''
-
- _TESTS = [{
- # old URL schema
- 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
- 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
- 'info_dict': {
- 'id': 'kVTUy_G222_',
- 'ext': 'mp4',
- 'title': 'strange erotica',
- 'description': 'contains:an ET kind of thing',
- 'uploader': 'greenshowers',
- 'duration': 450,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- }
- }, {
- # new URL schema
- 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
- 'only_matching': True,
- }, {
- 'url': 'xtube:625837',
- 'only_matching': True,
- }, {
- 'url': 'xtube:kVTUy_G222_',
- 'only_matching': True,
- }, {
- 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- if not display_id:
- display_id = video_id
-
- if video_id.isdigit() and len(video_id) < 11:
- url_pattern = 'http://www.xtube.com/video-watch/-%s'
- else:
- url_pattern = 'http://www.xtube.com/watch.php?v=%s'
-
- webpage = self._download_webpage(
- url_pattern % video_id, display_id, headers={
- 'Cookie': 'age_verified=1; cookiesAccepted=1',
- })
-
- title, thumbnail, duration, sources, media_definition = [None] * 5
-
- config = self._parse_json(self._search_regex(
- r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
- default='{}'), video_id, transform_source=js_to_json, fatal=False)
- if config:
- config = config.get('mainRoll')
- if isinstance(config, dict):
- title = config.get('title')
- thumbnail = config.get('poster')
- duration = int_or_none(config.get('duration'))
- sources = config.get('sources') or config.get('format')
- media_definition = config.get('mediaDefinition')
-
- if not isinstance(sources, dict) and not media_definition:
- sources = self._parse_json(self._search_regex(
- r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
- webpage, 'sources', group='sources'), video_id,
- transform_source=js_to_json)
-
- formats = []
- format_urls = set()
-
- if isinstance(sources, dict):
- for format_id, format_url in sources.items():
- format_url = url_or_none(format_url)
- if not format_url:
- continue
- if format_url in format_urls:
- continue
- format_urls.add(format_url)
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'height': int_or_none(format_id),
- })
-
- if isinstance(media_definition, list):
- for media in media_definition:
- video_url = url_or_none(media.get('videoUrl'))
- if not video_url:
- continue
- if video_url in format_urls:
- continue
- format_urls.add(video_url)
- format_id = media.get('format')
- if format_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif format_id == 'mp4':
- height = int_or_none(media.get('quality'))
- formats.append({
- 'url': video_url,
- 'format_id': '%s-%d' % (format_id, height) if height else format_id,
- 'height': height,
- })
-
- self._remove_duplicate_formats(formats)
-
- if not title:
- title = self._search_regex(
- (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
- webpage, 'title', group='title')
- description = self._og_search_description(
- webpage, default=None) or self._html_search_meta(
- 'twitter:description', webpage, default=None) or self._search_regex(
- r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
- uploader = self._search_regex(
- (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
- r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
- webpage, 'uploader', fatal=False)
- if not duration:
- duration = parse_duration(self._search_regex(
- r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
- webpage, 'duration', fatal=False))
- view_count = str_to_int(self._search_regex(
- (r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
- r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
- webpage, 'view count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'>Comments? \(([\d,\.]+)\)<',
- webpage, 'comment count', fatal=False))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': duration,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'age_limit': 18,
- 'formats': formats,
- }
-
-
-class XTubeUserIE(InfoExtractor):
- IE_DESC = 'XTube user profile'
- _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)'
- _TEST = {
- 'url': 'http://www.xtube.com/profile/greenshowers-4056496',
- 'info_dict': {
- 'id': 'greenshowers-4056496',
- 'age_limit': 18,
- },
- 'playlist_mincount': 154,
- }
-
- def _real_extract(self, url):
- user_id = self._match_id(url)
-
- entries = []
- for pagenum in itertools.count(1):
- request = Request(
- 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum),
- headers={
- 'Cookie': 'popunder=4',
- 'X-Requested-With': 'XMLHttpRequest',
- 'Referer': url,
- })
-
- page = self._download_json(
- request, user_id, 'Downloading videos JSON page %d' % pagenum)
-
- html = page.get('html')
- if not html:
- break
-
- for video_id in orderedSet([video_id for _, video_id in re.findall(
- r'data-plid=(["\'])(.+?)\1', html)]):
- entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))
-
- page_count = int_or_none(page.get('pageCount'))
- if not page_count or pagenum == page_count:
- break
-
- playlist = self.playlist_result(entries, user_id)
- playlist['age_limit'] = 18
- return playlist
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- float_or_none,
- get_element_by_attribute,
- parse_iso8601,
- remove_end,
-)
-
-
-class XuiteIE(InfoExtractor):
- IE_DESC = '隨意窩Xuite影音'
- _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
- _VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
- _TESTS = [{
- # Audio
- 'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
- 'md5': 'e79284c87b371424885448d11f6398c8',
- 'info_dict': {
- 'id': '3860914',
- 'ext': 'mp3',
- 'title': '孤單南半球-歐德陽',
- 'description': '孤單南半球-歐德陽',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 247.246,
- 'timestamp': 1314932940,
- 'upload_date': '20110902',
- 'uploader': '阿能',
- 'uploader_id': '15973816',
- 'categories': ['個人短片'],
- },
- }, {
- # Video with only one format
- 'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==',
- 'md5': '21f7b39c009b5a4615b4463df6eb7a46',
- 'info_dict': {
- 'id': '25925099',
- 'ext': 'mp4',
- 'title': 'BigBuckBunny_320x180',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 596.458,
- 'timestamp': 1454242500,
- 'upload_date': '20160131',
- 'uploader': '屁姥',
- 'uploader_id': '12158353',
- 'categories': ['個人短片'],
- 'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
- },
- }, {
- # Video with two formats
- 'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
- 'md5': '1166e0f461efe55b62e26a2d2a68e6de',
- 'info_dict': {
- 'id': '21301170',
- 'ext': 'mp4',
- 'title': '暗殺教室 02',
- 'description': '字幕:【極影字幕社】',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1384.907,
- 'timestamp': 1421481240,
- 'upload_date': '20150117',
- 'uploader': '我只是想認真點',
- 'uploader_id': '242127761',
- 'categories': ['電玩動漫'],
- },
- 'skip': 'Video removed',
- }, {
- # Video with encoded media id
- # from http://forgetfulbc.blogspot.com/2016/06/date.html
- 'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
- 'info_dict': {
- 'id': '27447336',
- 'ext': 'mp4',
- 'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
- 'description': 'md5:1223810fa123b179083a3aed53574706',
- 'timestamp': 1466160960,
- 'upload_date': '20160617',
- 'uploader': 'B.C. & Lowy',
- 'uploader_id': '232279340',
- },
- }, {
- 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- # /play/ URLs provide embedded video URL and more metadata
- url = url.replace('/embed/', '/play/')
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- error_msg = self._search_regex(
- r'<div id="error-message-content">([^<]+)',
- webpage, 'error message', default=None)
- if error_msg:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error_msg),
- expected=True)
-
- media_info = self._parse_json(self._search_regex(
- r'var\s+mediaInfo\s*=\s*({.*});', webpage, 'media info'), video_id)
-
- video_id = media_info['MEDIA_ID']
-
- formats = []
- for key in ('html5Url', 'html5HQUrl'):
- video_url = media_info.get(key)
- if not video_url:
- continue
- format_id = self._search_regex(
- r'\bq=(.+?)\b', video_url, 'format id', default=None)
- formats.append({
- 'url': video_url,
- 'ext': 'mp4' if format_id.isnumeric() else format_id,
- 'format_id': format_id,
- 'height': int(format_id) if format_id.isnumeric() else None,
- })
-
- timestamp = media_info.get('PUBLISH_DATETIME')
- if timestamp:
- timestamp = parse_iso8601(timestamp + ' +0800', ' ')
-
- category = media_info.get('catName')
- categories = [category] if category else []
-
- uploader = media_info.get('NICKNAME')
- uploader_url = None
-
- author_div = get_element_by_attribute('itemprop', 'author', webpage)
- if author_div:
- uploader = uploader or self._html_search_meta('name', author_div)
- uploader_url = self._html_search_regex(
- r'<link[^>]+itemprop="url"[^>]+href="([^"]+)"', author_div,
- 'uploader URL', fatal=False)
-
- return {
- 'id': video_id,
- 'title': media_info['TITLE'],
- 'description': remove_end(media_info.get('metaDesc'), ' (Xuite 影音)'),
- 'thumbnail': media_info.get('ogImageUrl'),
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': media_info.get('MEMBER_ID'),
- 'uploader_url': uploader_url,
- 'duration': float_or_none(media_info.get('MEDIA_DURATION'), 1000000),
- 'categories': categories,
- 'formats': formats,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..networking import HEADRequest
-from ..utils import get_element_by_attribute, parse_iso8601
-
-
-class YesJapanIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html'
- _TEST = {
- 'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html',
- 'md5': 'f0be416314e5be21a12b499b330c21cf',
- 'info_dict': {
- 'id': '726497834',
- 'title': 'Japanese in 5! #20 - WA And GA Particle Usages',
- 'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....',
- 'ext': 'mp4',
- 'timestamp': 1416391590,
- 'upload_date': '20141119',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
- video_url = self._og_search_video_url(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- timestamp = None
- submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage)
- if submit_info:
- timestamp = parse_iso8601(self._search_regex(
- r'datetime="([^"]+)"', submit_info, 'upload date', fatal=False, default=None))
-
- # attempt to resolve the final URL in order to get a proper extension
- redirect_req = HEADRequest(video_url)
- req = self._request_webpage(
- redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
- if req:
- video_url = req.url
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'description': description,
- 'timestamp': timestamp,
- 'thumbnail': thumbnail,
- }
+++ /dev/null
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class YinYueTaiIE(InfoExtractor):
- IE_NAME = 'yinyuetai:video'
- IE_DESC = '音悦Tai'
- _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://v.yinyuetai.com/video/2322376',
- 'md5': '6e3abe28d38e3a54b591f9f040595ce0',
- 'info_dict': {
- 'id': '2322376',
- 'ext': 'mp4',
- 'title': '少女时代_PARTY_Music Video Teaser',
- 'creator': '少女时代',
- 'duration': 25,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- }, {
- 'url': 'http://v.yinyuetai.com/video/h5/2322376',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- info = self._download_json(
- 'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
- 'Downloading mv info')['videoInfo']['coreVideoInfo']
-
- if info['error']:
- raise ExtractorError(info['errorMsg'], expected=True)
-
- formats = [{
- 'url': format_info['videoUrl'],
- 'format_id': format_info['qualityLevel'],
- 'format': format_info.get('qualityLevelName'),
- 'filesize': format_info.get('fileSize'),
- # though URLs ends with .flv, the downloaded files are in fact mp4
- 'ext': 'mp4',
- 'tbr': format_info.get('bitrate'),
- } for format_info in info['videoUrlModels']]
-
- return {
- 'id': video_id,
- 'title': info['videoName'],
- 'thumbnail': info.get('bigHeadImage'),
- 'creator': info.get('artistNames'),
- 'duration': info.get('duration'),
- 'formats': formats,
- }
+++ /dev/null
-import json
-import re
-import urllib.parse
-
-from .common import InfoExtractor
-
-
-class YnetIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
- _TESTS = [
- {
- 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
- 'info_dict': {
- 'id': 'L-11659-99244',
- 'ext': 'flv',
- 'title': 'איש לא יודע מאיפה באנו',
- 'thumbnail': r're:^https?://.*\.jpg',
- }
- }, {
- 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
- 'info_dict': {
- 'id': 'L-8859-84418',
- 'ext': 'flv',
- 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
- 'thumbnail': r're:^https?://.*\.jpg',
- }
- }
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- content = urllib.parse.unquote_plus(self._og_search_video_url(webpage))
- config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
- f4m_url = config['clip']['url']
- title = self._og_search_title(webpage)
- m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
- if m:
- title = m.group('title')
- formats = self._extract_f4m_formats(f4m_url, video_id)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': self._og_search_thumbnail(webpage),
- }