]> jfr.im git - yt-dlp.git/commitdiff
[go] Fix `_VALID_URL`
authorpukkandan <redacted>
Wed, 21 Apr 2021 10:10:55 +0000 (15:40 +0530)
committerpukkandan <redacted>
Wed, 21 Apr 2021 10:13:53 +0000 (15:43 +0530)
Closes #255

yt_dlp/extractor/go.py

index dda08ae3e05e8361adcb368b12be44e4229aa927..c31e07a0c865294b4c909e93751ee982c1ca0c4f 100644 (file)
@@ -8,6 +8,8 @@
     int_or_none,
     determine_ext,
     parse_age_limit,
+    remove_start,
+    remove_end,
     urlencode_postdata,
     ExtractorError,
 )
@@ -46,15 +48,15 @@ class GoIE(AdobePassIE):
     }
     _VALID_URL = r'''(?x)
                     https?://
-                        (?:
-                            (?:(?P<sub_domain>%s)\.)?go|
-                            (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
+                        (?P<sub_domain>
+                            (?:%s\.)?go|fxnow\.fxnetworks|
+                            (?:www\.)?(?:abc|freeform|disneynow)
                         )\.com/
                         (?:
                             (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
                             (?:[^/]+/)*(?P<display_id>[^/?\#]+)
                         )
-                    ''' % '|'.join(list(_SITE_INFO.keys()))
+                    ''' % r'\.|'.join(list(_SITE_INFO.keys()))
     _TESTS = [{
         'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
         'info_dict': {
@@ -133,6 +135,9 @@ class GoIE(AdobePassIE):
     }, {
         'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
         'only_matching': True,
+    }, {
+        'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
+        'only_matching': True,
     }]
 
     def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@@ -143,7 +148,7 @@ def _extract_videos(self, brand, video_id='-1', show_id='-1'):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
+        sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
         video_id, display_id = mobj.group('id', 'display_id')
         site_info = self._SITE_INFO.get(sub_domain, {})
         brand = site_info.get('brand')