[extractor/Veoh] Add user extractor (#5242)

author Audrey <redacted>

Fri, 11 Nov 2022 17:58:54 +0000 (12:58 -0500)

committer GitHub <redacted>

Fri, 11 Nov 2022 17:58:54 +0000 (23:28 +0530)
author Audrey <redacted>
Fri, 11 Nov 2022 17:58:54 +0000 (12:58 -0500)
committer GitHub <redacted>
Fri, 11 Nov 2022 17:58:54 +0000 (23:28 +0530)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index 78555c05c02ab6b7e9344054e04fdb2195f0bf73..c1ab5a9640097780b92d2d73eb116c6a887de8e7 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2043,7 +2043,10 @@
  from .vbox7 import Vbox7IE
  from .veehd import VeeHDIE
  from .veo import VeoIE
-from .veoh import VeohIE
+from .veoh import (
+    VeohIE,
+    VeohUserIE
+)
  from .vesti import VestiIE
  from .vevo import (
      VevoIE,
diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py

index 70280ae850ba0bac5a9e5c509f183a6af920932c..a32c2fccb99f3380ac05e4419c0c1e286bf2eaee 100644 (file)
--- a/yt_dlp/extractor/veoh.py
+++ b/yt_dlp/extractor/veoh.py
@@ -1,9 +1,14 @@
+import functools
+import json
+
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
+    OnDemandPagedList,
      int_or_none,
      parse_duration,
      qualities,
-    try_get
+    try_get,
  )
  
  
@@ -123,3 +128,62 @@ def _real_extract(self, url):
              'categories': categories,
              'tags': tags.split(', ') if tags else None,
          }
+
+
+class VeohUserIE(VeohIE):
+    _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
+    IE_NAME = 'veoh:user'
+
+    _TESTS = [
+        {
+            'url': 'https://www.veoh.com/users/valentinazoe',
+            'info_dict': {
+                'id': 'valentinazoe',
+                'title': 'valentinazoe (Uploads)'
+            },
+            'playlist_mincount': 75
+        },
+        {
+            'url': 'https://www.veoh.com/users/PiensaLibre',
+            'info_dict': {
+                'id': 'PiensaLibre',
+                'title': 'PiensaLibre (Uploads)'
+            },
+            'playlist_mincount': 2
+        }]
+
+    _PAGE_SIZE = 16
+
+    def _fetch_page(self, uploader, page):
+        response = self._download_json(
+            'https://www.veoh.com/users/published/videos', uploader,
+            note=f'Downloading videos page {page + 1}',
+            headers={
+                'x-csrf-token': self._TOKEN,
+                'content-type': 'application/json;charset=UTF-8'
+            },
+            data=json.dumps({
+                'username': uploader,
+                'maxResults': self._PAGE_SIZE,
+                'page': page + 1,
+                'requestName': 'userPage'
+            }).encode('utf-8'))
+        if not response.get('success'):
+            raise ExtractorError(response['message'])
+
+        for video in response['videos']:
+            yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
+                                  video['permalinkId'], video.get('title'))
+
+    def _real_initialize(self):
+        webpage = self._download_webpage(
+            'https://www.veoh.com', None, note='Downloading authorization token')
+        self._TOKEN = self._search_regex(
+            r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
+            'request token', group='token')
+
+    def _real_extract(self, url):
+        uploader = self._match_id(url)
+        return self.playlist_result(OnDemandPagedList(
+            functools.partial(self._fetch_page, uploader),
+            self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
author	Audrey <redacted>
	Fri, 11 Nov 2022 17:58:54 +0000 (12:58 -0500)
committer	GitHub <redacted>
	Fri, 11 Nov 2022 17:58:54 +0000 (23:28 +0530)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/veoh.py		patch \| blob \| blame \| history