]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/shahid.py
[extractor/youtube] Fallback regex for nsig code extraction
[yt-dlp.git] / yt_dlp / extractor / shahid.py
index c1d6aba2c962cb2b869c0c75606f6942926ec80e..53ca86b73e0cf0304b436ef64a081e222878def4 100644 (file)
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import json
 import math
 import re
@@ -21,6 +18,7 @@
 class ShahidBaseIE(AWSIE):
     _AWS_PROXY_HOST = 'api2.shahid.net'
     _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
+    _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
 
     def _handle_error(self, e):
         fail_data = self._parse_json(
@@ -49,15 +47,18 @@ def _call_api(self, path, video_id, request=None):
 
 class ShahidIE(ShahidBaseIE):
     _NETRC_MACHINE = 'shahid'
-    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
+    _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
     _TESTS = [{
-        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
+        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
         'info_dict': {
-            'id': '275286',
+            'id': '816924',
             'ext': 'mp4',
-            'title': 'مجلس الشباب الموسم 1 كليب 1',
-            'timestamp': 1506988800,
-            'upload_date': '20171003',
+            'title': 'متحف الدحيح الموسم 1 كليب 1',
+            'timestamp': 1602806400,
+            'upload_date': '20201016',
+            'description': 'برومو',
+            'duration': 22,
+            'categories': ['كوميديا'],
         },
         'params': {
             # m3u8 download
@@ -70,18 +71,17 @@ class ShahidIE(ShahidBaseIE):
         # shahid plus subscriber only
         'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
         'only_matching': True
+    }, {
+        'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
+        'only_matching': True
     }]
 
-    def _real_initialize(self):
-        email, password = self._get_login_info()
-        if email is None:
-            return
-
+    def _perform_login(self, username, password):
         try:
             user_data = self._download_json(
                 'https://shahid.mbc.net/wd/service/users/login',
                 None, 'Logging in', data=json.dumps({
-                    'email': email,
+                    'email': username,
                     'password': password,
                     'basic': 'false',
                 }).encode('utf-8'), headers={
@@ -104,17 +104,20 @@ def _real_initialize(self):
             }))
 
     def _real_extract(self, url):
-        page_type, video_id = re.match(self._VALID_URL, url).groups()
+        page_type, video_id = self._match_valid_url(url).groups()
         if page_type == 'clip':
             page_type = 'episode'
 
         playout = self._call_api(
-            'playout/url/' + video_id, video_id)['playout']
+            'playout/new/url/' + video_id, video_id)['playout']
 
-        if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+        if not self.get_param('allow_unplayable_formats') and playout.get('drm'):
+            self.report_drm(video_id)
 
-        formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
+        formats = self._extract_m3u8_formats(re.sub(
+            # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
+            r'aws\.manifestfilter=[\w:;,-]+&?',
+            '', playout['url']), video_id, 'mp4')
         self._sort_formats(formats)
 
         # video = self._call_api(
@@ -162,7 +165,7 @@ def _real_extract(self, url):
 
 
 class ShahidShowIE(ShahidBaseIE):
-    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
+    _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
         'info_dict': {