Merge pull request #3 from blackjack4494/sc-extractor-web_auth

author Tom-Oliver Heidel <redacted>

Mon, 31 Aug 2020 20:13:20 +0000 (22:13 +0200)

committer GitHub <redacted>

Mon, 31 Aug 2020 20:13:20 +0000 (22:13 +0200)
author Tom-Oliver Heidel <redacted>
Mon, 31 Aug 2020 20:13:20 +0000 (22:13 +0200)
committer GitHub <redacted>
Mon, 31 Aug 2020 20:13:20 +0000 (22:13 +0200)
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml

new file mode 100644 (file)

index 0000000..0fa2d18
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,33 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  pull_request:
+    branches:
+    - release
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        rm -rf dist/*
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
diff --git a/.travis.yml b/.travis.yml

index 51afd469afe569df116d0dd5c200c426f36546b6..fb499845e4b652edf2661a724bb9f7e515d4f056 100644 (file)
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,29 +12,18 @@ python:
  dist: trusty
  env:
    - YTDL_TEST_SET=core
-  - YTDL_TEST_SET=download
  jobs:
    include:
      - python: 3.7
        dist: xenial
        env: YTDL_TEST_SET=core
-    - python: 3.7
-      dist: xenial
-      env: YTDL_TEST_SET=download
      - python: 3.8
        dist: xenial
        env: YTDL_TEST_SET=core
-    - python: 3.8
-      dist: xenial
-      env: YTDL_TEST_SET=download
      - python: 3.8-dev
        dist: xenial
        env: YTDL_TEST_SET=core
-    - python: 3.8-dev
-      dist: xenial
-      env: YTDL_TEST_SET=download
      - env: JYTHON=true; YTDL_TEST_SET=core
-    - env: JYTHON=true; YTDL_TEST_SET=download
      - name: flake8
        python: 3.8
        dist: xenial
@@ -44,7 +33,6 @@ jobs:
    allow_failures:
      - env: YTDL_TEST_SET=download
      - env: JYTHON=true; YTDL_TEST_SET=core
-    - env: JYTHON=true; YTDL_TEST_SET=download
  before_install:
    - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
  script: ./devscripts/run_tests.sh
diff --git a/.travis.yml.original b/.travis.yml.original

new file mode 100644 (file)

index 0000000..51afd46
--- /dev/null
+++ b/.travis.yml.original
@@ -0,0 +1,50 @@
+language: python
+python:
+  - "2.6"
+  - "2.7"
+  - "3.2"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+  - "pypy"
+  - "pypy3"
+dist: trusty
+env:
+  - YTDL_TEST_SET=core
+  - YTDL_TEST_SET=download
+jobs:
+  include:
+    - python: 3.7
+      dist: xenial
+      env: YTDL_TEST_SET=core
+    - python: 3.7
+      dist: xenial
+      env: YTDL_TEST_SET=download
+    - python: 3.8
+      dist: xenial
+      env: YTDL_TEST_SET=core
+    - python: 3.8
+      dist: xenial
+      env: YTDL_TEST_SET=download
+    - python: 3.8-dev
+      dist: xenial
+      env: YTDL_TEST_SET=core
+    - python: 3.8-dev
+      dist: xenial
+      env: YTDL_TEST_SET=download
+    - env: JYTHON=true; YTDL_TEST_SET=core
+    - env: JYTHON=true; YTDL_TEST_SET=download
+    - name: flake8
+      python: 3.8
+      dist: xenial
+      install: pip install flake8
+      script: flake8 .
+  fast_finish: true
+  allow_failures:
+    - env: YTDL_TEST_SET=download
+    - env: JYTHON=true; YTDL_TEST_SET=core
+    - env: JYTHON=true; YTDL_TEST_SET=download
+before_install:
+  - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
+script: ./devscripts/run_tests.sh
diff --git a/README.md b/README.md

index 45326c69ec5bf3fa6665cca18e808a06546ce8ea..8c1a50141b0fc037ecf5a05f2cc725d0277a53cd 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl)
+[![Build Status](https://travis-ci.com/blackjack4494/youtube-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/youtube-dlc)
  
  youtube-dl - download videos from youtube.com or other video platforms
  
diff --git a/setup.py b/setup.py

index af68b485ef787f217fab474fbadbba2408707dc6..23553b88a923ad8203ce5f45ee778ae4d9319ab2 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -1,62 +1,21 @@
  #!/usr/bin/env python
  # coding: utf-8
  
-from __future__ import print_function
-
+from setuptools import setup, Command
  import os.path
  import warnings
  import sys
-
-try:
-    from setuptools import setup, Command
-    setuptools_available = True
-except ImportError:
-    from distutils.core import setup, Command
-    setuptools_available = False
  from distutils.spawn import spawn
  
-try:
-    # This will create an exe that needs Microsoft Visual C++ 2008
-    # Redistributable Package
-    import py2exe
-except ImportError:
-    if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
-        print('Cannot import py2exe', file=sys.stderr)
-        exit(1)
-
-py2exe_options = {
-    'bundle_files': 1,
-    'compressed': 1,
-    'optimize': 2,
-    'dist_dir': '.',
-    'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
-}
-
  # Get the version from youtube_dl/version.py without importing the package
  exec(compile(open('youtube_dl/version.py').read(),
               'youtube_dl/version.py', 'exec'))
  
-DESCRIPTION = 'YouTube video downloader'
-LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
-
-py2exe_console = [{
-    'script': './youtube_dl/__main__.py',
-    'dest_base': 'youtube-dl',
-    'version': __version__,
-    'description': DESCRIPTION,
-    'comments': LONG_DESCRIPTION,
-    'product_name': 'youtube-dl',
-    'product_version': __version__,
-}]
-
-py2exe_params = {
-    'console': py2exe_console,
-    'options': {'py2exe': py2exe_options},
-    'zipfile': None
-}
+DESCRIPTION = 'Media downloader supporting various sites such as youtube'
+LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites. Based on a more active community fork.'
  
  if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
-    params = py2exe_params
+    print("inv")
  else:
      files_spec = [
          ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
@@ -78,10 +37,10 @@
      params = {
          'data_files': data_files,
      }
-    if setuptools_available:
-        params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
-    else:
-        params['scripts'] = ['bin/youtube-dl']
+    #if setuptools_available:
+    params['entry_points'] = {'console_scripts': ['youtube-dlc = youtube_dl:main']}
+    #else:
+    #    params['scripts'] = ['bin/youtube-dlc']
  
  class build_lazy_extractors(Command):
      description = 'Build the extractor lazy loading module'
@@ -100,49 +59,45 @@ def run(self):
          )
  
  setup(
-    name='youtube_dl',
+    name="youtube_dlc",
      version=__version__,
+    maintainer="Tom-Oliver Heidel",
+    maintainer_email="theidel@uni-bremen.de",
      description=DESCRIPTION,
      long_description=LONG_DESCRIPTION,
-    url='https://github.com/ytdl-org/youtube-dl',
-    author='Ricardo Garcia',
-    author_email='ytdl@yt-dl.org',
-    maintainer='Sergey M.',
-    maintainer_email='dstftw@gmail.com',
-    license='Unlicense',
-    packages=[
+    # long_description_content_type="text/markdown",
+    url="https://github.com/blackjack4494/youtube-dlc",
+    # packages=setuptools.find_packages(),
+       packages=[
          'youtube_dl',
          'youtube_dl.extractor', 'youtube_dl.downloader',
          'youtube_dl.postprocessor'],
-
-    # Provokes warning on most systems (why?!)
-    # test_suite = 'nose.collector',
-    # test_requires = ['nosetest'],
-
      classifiers=[
-        'Topic :: Multimedia :: Video',
-        'Development Status :: 5 - Production/Stable',
-        'Environment :: Console',
-        'License :: Public Domain',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.2',
-        'Programming Language :: Python :: 3.3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: Implementation',
-        'Programming Language :: Python :: Implementation :: CPython',
-        'Programming Language :: Python :: Implementation :: IronPython',
-        'Programming Language :: Python :: Implementation :: Jython',
-        'Programming Language :: Python :: Implementation :: PyPy',
+           "Topic :: Multimedia :: Video",
+        "Development Status :: 5 - Production/Stable",
+        "Environment :: Console",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 2",
+        "Programming Language :: Python :: 2.6",
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.2",
+        "Programming Language :: Python :: 3.3",
+        "Programming Language :: Python :: 3.4",
+        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: Implementation",
+        "Programming Language :: Python :: Implementation :: CPython",
+        "Programming Language :: Python :: Implementation :: IronPython",
+        "Programming Language :: Python :: Implementation :: Jython",
+        "Programming Language :: Python :: Implementation :: PyPy",
+        "License :: Public Domain",
+        "Operating System :: OS Independent",
      ],
-
-    cmdclass={'build_lazy_extractors': build_lazy_extractors},
+    python_requires='>=2.6',
+       
+       cmdclass={'build_lazy_extractors': build_lazy_extractors},
      **params
-)
+)
+\ No newline at end of file
diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py

index af21e3ee5e53fbfdfafa5ee219c541ee6ca97de3..17ebbb25766bb500e6401f55b6105c37fcfd25f5 100644 (file)
--- a/youtube_dl/extractor/biqle.py
+++ b/youtube_dl/extractor/biqle.py
@@ -3,10 +3,11 @@
  
  from .common import InfoExtractor
  from .vk import VKIE
-from ..utils import (
-    HEADRequest,
-    int_or_none,
+from ..compat import (
+    compat_b64decode,
+    compat_urllib_parse_unquote,
  )
+from ..utils import int_or_none
  
  
  class BIQLEIE(InfoExtractor):
@@ -47,9 +48,16 @@ def _real_extract(self, url):
          if VKIE.suitable(embed_url):
              return self.url_result(embed_url, VKIE.ie_key(), video_id)
  
-        self._request_webpage(
-            HEADRequest(embed_url), video_id, headers={'Referer': url})
-        video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
+        embed_page = self._download_webpage(
+            embed_url, video_id, headers={'Referer': url})
+        video_ext = self._get_cookies(embed_url).get('video_ext')
+        if video_ext:
+            video_ext = compat_urllib_parse_unquote(video_ext.value)
+        if not video_ext:
+            video_ext = compat_b64decode(self._search_regex(
+                r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
+                embed_page, 'video_ext')).decode()
+        video_id, sig, _, access_token = video_ext.split(':')
          item = self._download_json(
              'https://api.vk.com/method/video.get', video_id,
              headers={'User-Agent': 'okhttp/3.4.1'}, query={
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py

index 4b3092028f46fbfbb7ac8a64d43d10c1d1eff148..5ae0a34aa220e0702ce36846fce4ea428c5f2429 100644 (file)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -440,6 +440,7 @@
  )
  from .howcast import HowcastIE
  from .howstuffworks import HowStuffWorksIE
+from .hrfensehen import HRFernsehenIE
  from .hrti import (
      HRTiIE,
      HRTiPlaylistIE,
diff --git a/youtube_dl/extractor/hrfensehen.py b/youtube_dl/extractor/hrfensehen.py

new file mode 100644 (file)

index 0000000..2beadef
--- /dev/null
+++ b/youtube_dl/extractor/hrfensehen.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from youtube_dl.utils import int_or_none, unified_timestamp, unescapeHTML
+from .common import InfoExtractor
+
+
+class HRFernsehenIE(InfoExtractor):
+    IE_NAME = 'hrfernsehen'
+    _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
+
+    _TESTS = [{
+        'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
+        'md5': '5c4e0ba94677c516a2f65a84110fc536',
+        'info_dict': {
+            'id': '130546',
+            'ext': 'mp4',
+            'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
+                           'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
+                           'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
+            'subtitles': {'de': [{
+                'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
+            }]},
+            'timestamp': 1598470200,
+            'upload_date': '20200826',
+            'thumbnails': [{
+                'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
+                'id': '0'
+            }, {
+                'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
+                'id': '1'
+            }],
+            'title': 'hessenschau vom 26.08.2020'
+        }
+    }, {
+        'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
+        'only_matching': True
+    }]
+
+    _GEO_COUNTRIES = ['DE']
+
+    def extract_airdate(self, loader_data):
+        airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
+
+        if airdate_str is None:
+            return None
+
+        return unified_timestamp(airdate_str)
+
+    def extract_formats(self, loader_data):
+        stream_formats = []
+        for stream_obj in loader_data["videoResolutionLevels"]:
+            stream_format = {
+                'format_id': str(stream_obj['verticalResolution']) + "p",
+                'height': stream_obj['verticalResolution'],
+                'url': stream_obj['url'],
+            }
+
+            quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
+                                            stream_obj['url'])
+            if quality_information:
+                stream_format['width'] = int_or_none(quality_information.group(1))
+                stream_format['height'] = int_or_none(quality_information.group(2))
+                stream_format['fps'] = int_or_none(quality_information.group(3))
+                stream_format['tbr'] = int_or_none(quality_information.group(4))
+
+            stream_formats.append(stream_format)
+
+        self._sort_formats(stream_formats)
+        return stream_formats
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            ['og:title', 'twitter:title', 'name'], webpage)
+        description = self._html_search_meta(
+            ['description'], webpage)
+
+        loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
+        loader_data = json.loads(loader_str)
+
+        info = {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': self.extract_formats(loader_data),
+            'timestamp': self.extract_airdate(loader_data)
+        }
+
+        if "subtitle" in loader_data:
+            info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
+
+        thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
+        if len(thumbnails) > 0:
+            info["thumbnails"] = [{"url": t} for t in thumbnails]
+
+        return info
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index b0dcdc0e6baced889541e3307ac8314e73f99522..9e4171237ef44b1b3d749ff23c06ad3ef96f37ca 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -56,14 +56,14 @@ def _prepare_call(self, path, timestamp=None, post_data=None):
  
      def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
          resp = self._download_json(
-            self._prepare_call(path, timestamp, post_data), video_id, note)
+            self._prepare_call(path, timestamp, post_data), video_id, note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
  
          error = resp.get('error')
          if error:
              if error == 'invalid timestamp':
                  resp = self._download_json(
                      self._prepare_call(path, int(resp['current_timestamp']), post_data),
-                    video_id, '%s (retry)' % note)
+                    video_id, '%s (retry)' % note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
                  error = resp.get('error')
              if error:
                  self._raise_error(resp['error'])
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py

index 0f7be6a7d93adc3a4fea8c6995cd8b58a084b9b4..902a3ed338e914c9e20edc3c643d0e5274d71fe7 100644 (file)
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -20,13 +20,13 @@
  
  
  class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
      _VALID_URL = r'''(?x)
                      https?://
                          (?:.+?\.)?%s/
                          (?:
-                            movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
-                            videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
+                            movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
+                            videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
                          )
                      ''' % _DOMAINS
      _TESTS = [{
@@ -99,12 +99,21 @@ class XHamsterIE(InfoExtractor):
      }, {
          'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
          'only_matching': True,
+    }, {
+        'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+        'only_matching': True,
+    }, {
+        'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+        'only_matching': True,
      }, {
          'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
          'only_matching': True,
      }, {
          'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
          'only_matching': True,
+    }, {
+        'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -129,7 +138,7 @@ def get_height(s):
  
          initials = self._parse_json(
              self._search_regex(
-                r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials',
+                r'window\.initials\s*=\s*({.+?})\s*;', webpage, 'initials',
                  default='{}'),
              video_id, fatal=False)
          if initials:
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 17101fa47501d9bae1d6f223e35d7cb4dd3f8d5e..b50bd2b3b72afad86326cf8b5e7d8a7c11dcd400 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2020.07.28'
+__version__ = '2020.08.31'
author	Tom-Oliver Heidel <redacted>
	Mon, 31 Aug 2020 20:13:20 +0000 (22:13 +0200)
committer	GitHub <redacted>
	Mon, 31 Aug 2020 20:13:20 +0000 (22:13 +0200)
.github/workflows/python-publish.yml	[new file with mode: 0644]	patch \| blob
.travis.yml		patch \| blob \| blame \| history
.travis.yml.original	[new file with mode: 0644]	patch \| blob
README.md		patch \| blob \| blame \| history
setup.py		patch \| blob \| blame \| history
youtube_dl/extractor/biqle.py		patch \| blob \| blame \| history
youtube_dl/extractor/extractors.py		patch \| blob \| blame \| history
youtube_dl/extractor/hrfensehen.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/viki.py		patch \| blob \| blame \| history
youtube_dl/extractor/xhamster.py		patch \| blob \| blame \| history
youtube_dl/version.py		patch \| blob \| blame \| history