]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/pr0gramm.py
[ie/media.ccc.de:lists] Fix extraction (#8144)
[yt-dlp.git] / yt_dlp / extractor / pr0gramm.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import merge_dicts
5
6
7 class Pr0grammStaticIE(InfoExtractor):
8 # Possible urls:
9 # https://pr0gramm.com/static/5466437
10 _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
11 _TEST = {
12 'url': 'https://pr0gramm.com/static/5466437',
13 'md5': '52fa540d70d3edc286846f8ca85938aa',
14 'info_dict': {
15 'id': '5466437',
16 'ext': 'mp4',
17 'title': 'pr0gramm-5466437 by g11st',
18 'uploader': 'g11st',
19 'upload_date': '20221221',
20 }
21 }
22
23 def _real_extract(self, url):
24 video_id = self._match_id(url)
25 webpage = self._download_webpage(url, video_id)
26
27 # Fetch media sources
28 entries = self._parse_html5_media_entries(url, webpage, video_id)
29 media_info = entries[0]
30
31 # Fetch author
32 uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
33
34 # Fetch approx upload timestamp from filename
35 # Have None-defaults in case the extraction fails
36 uploadDay = None
37 uploadMon = None
38 uploadYear = None
39 uploadTimestr = None
40 # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
41 m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
42
43 if (m):
44 # Up to a day of accuracy should suffice...
45 uploadDay = m.groupdict().get('day')
46 uploadMon = m.groupdict().get('mon')
47 uploadYear = m.groupdict().get('year')
48 uploadTimestr = uploadYear + uploadMon + uploadDay
49
50 return merge_dicts({
51 'id': video_id,
52 'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
53 'uploader': uploader,
54 'upload_date': uploadTimestr
55 }, media_info)
56
57
58 # This extractor is for the primary url (used for sharing, and appears in the
59 # location bar) Since this page loads the DOM via JS, yt-dl can't find any
60 # video information here. So let's redirect to a compatibility version of
61 # the site, which does contain the <video>-element by itself, without requiring
62 # js to be ran.
63 class Pr0grammIE(InfoExtractor):
64 # Possible urls:
65 # https://pr0gramm.com/new/546637
66 # https://pr0gramm.com/new/video/546637
67 # https://pr0gramm.com/top/546637
68 # https://pr0gramm.com/top/video/546637
69 # https://pr0gramm.com/user/g11st/uploads/5466437
70 # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
71 # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
72 # https://pr0gramm.com/user/froschler/1elf/5232030
73 # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
74 # https://pr0gramm.com/top/fruher war alles damals/5498175
75
76 _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
77 _TEST = {
78 'url': 'https://pr0gramm.com/new/video/5466437',
79 'info_dict': {
80 'id': '5466437',
81 'ext': 'mp4',
82 'title': 'pr0gramm-5466437 by g11st',
83 'uploader': 'g11st',
84 'upload_date': '20221221',
85 }
86 }
87
88 def _generic_title():
89 return "oof"
90
91 def _real_extract(self, url):
92 video_id = self._match_id(url)
93
94 return self.url_result(
95 'https://pr0gramm.com/static/' + video_id,
96 video_id=video_id,
97 ie=Pr0grammStaticIE.ie_key())