]> jfr.im git - yt-dlp.git/commitdiff
[linuxacadamy] Improve regex
authorpukkandan <redacted>
Sun, 21 Mar 2021 15:29:03 +0000 (20:59 +0530)
committerpukkandan <redacted>
Sun, 21 Mar 2021 15:57:33 +0000 (21:27 +0530)
TODO: We need to make a more robust standard regex for fetching js objects from html

yt_dlp/extractor/linuxacademy.py

index 7ec4a65573afe2d40a72c20c7f799b3f5ed0baef..70c84c2c1ac17e140c7626e0a0a56bbcbd907f25 100644 (file)
@@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'What Is Data Science',
             'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
-            'timestamp': 1607387907,
-            'upload_date': '20201208',
+            'timestamp': int,  # The timestamp and upload date changes
+            'upload_date': r're:\d+',
             'duration': 304,
         },
         'params': {
@@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor):
         },
         'playlist_count': 41,
         'skip': 'Requires Linux Academy account credentials',
+    }, {
+        'url': 'https://linuxacademy.com/cp/modules/view/id/39',
+        'info_dict': {
+            'id': '39',
+            'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep  (legacy)',
+            'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
+            'duration': 89280,
+        },
+        'playlist_count': 73,
+        'skip': 'Requires Linux Academy account credentials',
     }]
 
     _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
@@ -162,7 +172,7 @@ def _real_extract(self, url):
         if course_id:
             module = self._parse_json(
                 self._search_regex(
-                    r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
+                    r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
                 item_id)
             entries = []
             chapter_number = None