]> jfr.im git - yt-dlp.git/commitdiff
[Cleanup] Remove some unnecessary groups in regexes (#1738)
authorAshish Gupta <redacted>
Tue, 30 Nov 2021 16:14:47 +0000 (21:44 +0530)
committerGitHub <redacted>
Tue, 30 Nov 2021 16:14:47 +0000 (21:44 +0530)
Authored by: Ashish0804

22 files changed:
yt_dlp/extractor/amazon.py
yt_dlp/extractor/canalalpha.py
yt_dlp/extractor/chingari.py
yt_dlp/extractor/cozytv.py
yt_dlp/extractor/epicon.py
yt_dlp/extractor/euscreen.py
yt_dlp/extractor/gab.py
yt_dlp/extractor/gronkh.py
yt_dlp/extractor/hotstar.py
yt_dlp/extractor/koo.py
yt_dlp/extractor/mlssoccer.py
yt_dlp/extractor/musescore.py
yt_dlp/extractor/mxplayer.py
yt_dlp/extractor/onefootball.py
yt_dlp/extractor/planetmarathi.py
yt_dlp/extractor/projectveritas.py
yt_dlp/extractor/shemaroome.py
yt_dlp/extractor/skynewsau.py
yt_dlp/extractor/threespeak.py
yt_dlp/extractor/utreon.py
yt_dlp/extractor/voot.py
yt_dlp/extractor/zee5.py

index 7c5d35f47374f72c719a26ea495eafbbcfc969f8..07b1b18611a43e20f2c269d25e61b96c6d8e2f93 100644 (file)
@@ -4,7 +4,7 @@
 
 
 class AmazonStoreIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
+    _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
 
     _TESTS = [{
         'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
index 7287677c11a07996936c7c2b4547ee414b07c5fd..51d30a32139f3729504c553ca7c0616def0e04b4 100644 (file)
@@ -11,7 +11,7 @@
 
 
 class CanalAlphaIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
+    _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
 
     _TESTS = [{
         'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021',
index 6bdc4f6bbbbdd8c1987bf64fcdd36ba343862793..e6841fb8b2b3e5499f40e67494e0fd60607111af 100644 (file)
@@ -67,7 +67,7 @@ def _get_post(self, id, post_data):
 
 
 class ChingariIE(ChingariBaseIE):
-    _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
     _TESTS = [{
         'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
         'info_dict': {
@@ -102,7 +102,7 @@ def _real_extract(self, url):
 
 
 class ChingariUserIE(ChingariBaseIE):
-    _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
     _TESTS = [{
         'url': 'https://chingari.io/dada1023',
         'playlist_mincount': 3,
index 868d8d27da30d20704c4c138339c27ab9a94dd74..d49f1ca7442f5ba0d2ff9536fe262d26165c6475 100644 (file)
@@ -6,7 +6,7 @@
 
 
 class CozyTVIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
+    _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
 
     _TESTS = [{
         'url': 'https://cozy.tv/beardson/replays/2021-11-19_1',
index b4e544d4f67c059dae9a3c90d6c2b4cb59ac3b76..cd19325bc7296c1e1924bddbf75811f60c9f0466 100644 (file)
@@ -8,7 +8,7 @@
 
 
 class EpiconIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar',
         'info_dict': {
@@ -84,7 +84,7 @@ def _real_extract(self, url):
 
 
 class EpiconSeriesIE(InfoExtractor):
-    _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
+    _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.epicon.in/tv-shows/1-of-something',
         'playlist_mincount': 5,
index 3980c2349f26730a9e39ca5c703061241cc91243..2759e7436f22403beeae689b925662f8f74c40f1 100644 (file)
@@ -10,7 +10,7 @@
 
 
 class EUScreenIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
+    _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
 
     _TESTS = [{
         'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C',
index bde6e86248bd5286b669d920a1e7e68432c3ec60..9ba0b1ca1998a9e669301f314eb9fc87da703339 100644 (file)
@@ -15,7 +15,7 @@
 
 
 class GabTVIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
+    _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
     _TESTS = [{
         'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
         'info_dict': {
index a7792a5e0e9577adb2867675db40f4817a1c3e03..58cd595113cd364fbdf0a6edfdc1ee1f2c2b4da6 100644 (file)
@@ -6,7 +6,7 @@
 
 
 class GronkhIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'https://gronkh.tv/stream/536',
index 0bdf772a19765f43f5c60bf8664e3207e30b3730..de2b30cf7c42ea42ffc41725a105cdc48a6969fa 100644 (file)
@@ -296,7 +296,7 @@ def _real_extract(self, url):
 
 class HotStarSeriesIE(HotStarBaseIE):
     IE_NAME = 'hotstar:series'
-    _VALID_URL = r'(?P<url>(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
     _TESTS = [{
         'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
         'info_dict': {
index 1706b28a0df210809af06c112c9574e174e98b41..088db1cb018658704170df199146a983dfffbed9 100644 (file)
@@ -8,7 +8,7 @@
 
 
 class KooIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
+    _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
     _TESTS = [{  # Test for video in the comments
         'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
         'info_dict': {
index 0f0b09e2c469e2d72710a4435c36e2052c3e77ae..1d6d4b8040deb274f20a75985ed9795a3ffff116 100644 (file)
@@ -6,7 +6,7 @@
 
 class MLSSoccerIE(InfoExtractor):
     _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)'
-    _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
+    _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
 
     _TESTS = [{
         'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986',
index dcd26388a60d865d6b3577edb034075a70fc94db..09fadf8d906ca47fd816ac5120d80910468af8d9 100644 (file)
@@ -5,7 +5,7 @@
 
 
 class MuseScoreIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
+    _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
     _TESTS = [{
         'url': 'https://musescore.com/user/73797/scores/142975',
         'info_dict': {
@@ -13,7 +13,7 @@ class MuseScoreIE(InfoExtractor):
             'ext': 'mp3',
             'title': 'WA Mozart Marche Turque (Turkish March fingered)',
             'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be',
-            'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+            'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
             'uploader': 'PapyPiano',
             'creator': 'Wolfgang Amadeus Mozart',
         }
@@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor):
             'ext': 'mp3',
             'title': 'Sweet Child O\' Mine  – Guns N\' Roses sweet child',
             'description': 'md5:4dca71191c14abc312a0a4192492eace',
-            'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+            'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
             'uploader': 'roxbelviolin',
             'creator': 'Guns N´Roses Arr. Roxbel Violin',
         }
@@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor):
             'ext': 'mp3',
             'title': 'Für Elise – Beethoven',
             'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34',
-            'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+            'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
             'uploader': 'ClassicMan',
             'creator': 'Ludwig van Beethoven (1770–1827)',
         }
index 5874556e346f4dd7449d9509189cdf1f3c63cc1e..3c2afd838d97309fccfffa0a39fdc13e0cb11030 100644 (file)
@@ -180,7 +180,7 @@ def _real_extract(self, url):
 
 
 class MxplayerShowIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
+    _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
     _TESTS = [{
         'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417',
         'playlist_mincount': 440,
index 79501003dbc40acc3a9efc48f73fe010c81752b5..826faadd2efbe8965f4f447c02ec0aa49685b4fd 100644 (file)
@@ -5,7 +5,7 @@
 
 
 class OneFootballIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
index d1d9911f7d51751f7d09b1b9672d2873361e27fd..07ac15b540f3c243c2c19e1e9f15595109b9d56a 100644 (file)
@@ -9,7 +9,7 @@
 
 
 class PlanetMarathiIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
+    _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
     _TESTS = [{
         'url': 'https://www.planetmarathi.com/titles/ek-unad-divas',
         'playlist_mincount': 2,
index 1d832a6796a44e0418288b2be7e2038ef070a303..9e9867ba5dbd653bac51d94d65d998bf3ba13333 100644 (file)
@@ -10,7 +10,7 @@
 
 
 class ProjectVeritasIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/',
         'info_dict': {
index 142d5dc3a0430a9b9c85eec65de66337f4de51a7..00a5b00cdd4e52de4ad44a854f1b0d3934edd7c9 100644 (file)
@@ -16,7 +16,7 @@
 
 
 class ShemarooMeIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
     _TESTS = [{
         'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara',
         'info_dict': {
@@ -78,7 +78,7 @@ def _real_extract(self, url):
         iv = [0] * 16
         m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))
         m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii')
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
+        formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
         self._sort_formats(formats)
 
         release_date = self._html_search_regex(
@@ -91,6 +91,7 @@ def _real_extract(self, url):
             subtitles.setdefault('EN', []).append({
                 'url': self._proto_relative_url(sub_url),
             })
+        subtitles = self._merge_subtitles(subtitles, m3u8_subs)
         description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False)
 
         return {
index b1d77951e7708b8d31338294e0da6b391146aa0d..8e079ee312d3a9797fdbacdb2b5cce67267bdd5c 100644 (file)
@@ -9,7 +9,7 @@
 
 
 class SkyNewsAUIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
 
     _TESTS = [{
         'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71',
index 60e84529d8d19df293e5c855f1580e9f48269504..fe6a9554a95cff8f843f06fe198ac30a9d8a50f0 100644 (file)
@@ -11,7 +11,7 @@
 
 
 class ThreeSpeakIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
 
     _TESTS = [{
         'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
@@ -75,7 +75,7 @@ def _real_extract(self, url):
 
 
 class ThreeSpeakUserIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
 
     _TESTS = [{
         'url': 'https://3speak.tv/user/theycallmedan',
index 4a25f0c55c78df3be743a95260b71cfdc793e128..4986635f24d02717a80a272ee7591f517240b06a 100644 (file)
@@ -13,7 +13,7 @@
 
 
 class UtreonIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
     _TESTS = [{
         'url': 'https://utreon.com/v/z_I7ikQbuDw',
         'info_dict': {
index e2944ec63596d1c40c3d57fdbdf9ad6e5982a9a8..a9b66b95c2fc3b62b6af86376d7613face29363b 100644 (file)
@@ -15,7 +15,7 @@ class VootIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     (?:
                         voot:|
-                        (?:https?://)(?:www\.)?voot\.com/?
+                        https?://(?:www\.)?voot\.com/?
                         (?:
                             movies/[^/]+/|
                             (?:shows|kids)/(?:[^/]+/){4}
index 536604167411aac80d7c900cfe8d45ea0cee3c5f..462bc4efe66e70e37496d28bf175f939d24ae317 100644 (file)
@@ -21,7 +21,7 @@ class Zee5IE(InfoExtractor):
     _VALID_URL = r'''(?x)
                      (?:
                         zee5:|
-                        (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+                        https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
                         (?:
                             (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
                             |movies/[^#/?]+
@@ -174,7 +174,7 @@ class Zee5SeriesIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                      (?:
                         zee5:series:|
-                        (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+                        https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
                         (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
                      )
                      (?P<id>[^#/?]+)/?(?:$|[?#])