yt_dlp/extractor/bannedvideo.py

   1 from __future__ import unicode_literals
   2
   3 import json
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     try_get,
   8     int_or_none,
   9     url_or_none,
  10     float_or_none,
  11     unified_timestamp,
  12 )
  13
  14
  15 class BannedVideoIE(InfoExtractor):
  16     _VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'
  17     _TESTS = [{
  18         'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',
  19         'md5': '14b6e81d41beaaee2215cd75c6ed56e4',
  20         'info_dict': {
  21             'id': '5e7a859644e02200c6ef5f11',
  22             'ext': 'mp4',
  23             'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',
  24             'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',
  25             'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
  26             'upload_date': '20200324',
  27             'timestamp': 1585087895,
  28         }
  29     }]
  30
  31     _GRAPHQL_GETMETADATA_QUERY = '''
  32 query GetVideoAndComments($id: String!) {
  33     getVideo(id: $id) {
  34         streamUrl
  35         directUrl
  36         unlisted
  37         live
  38         tags {
  39             name
  40         }
  41         title
  42         summary
  43         playCount
  44         largeImage
  45         videoDuration
  46         channel {
  47             _id
  48             title
  49         }
  50         createdAt
  51     }
  52     getVideoComments(id: $id, limit: 999999, offset: 0) {
  53         _id
  54         content
  55         user {
  56             _id
  57             username
  58         }
  59         voteCount {
  60             positive
  61         }
  62         createdAt
  63         replyCount
  64     }
  65 }'''
  66
  67     _GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''
  68 query GetCommentReplies($id: String!) {
  69     getCommentReplies(id: $id, limit: 999999, offset: 0) {
  70         _id
  71         content
  72         user {
  73             _id
  74             username
  75         }
  76         voteCount {
  77             positive
  78         }
  79         createdAt
  80         replyCount
  81     }
  82 }'''
  83
  84     _GRAPHQL_QUERIES = {
  85         'GetVideoAndComments': _GRAPHQL_GETMETADATA_QUERY,
  86         'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
  87     }
  88
  89     def _call_api(self, video_id, id, operation, note):
  90         return self._download_json(
  91             'https://api.infowarsmedia.com/graphql', video_id, note=note,
  92             headers={
  93                 'Content-Type': 'application/json; charset=utf-8'
  94             }, data=json.dumps({
  95                 'variables': {'id': id},
  96                 'operationName': operation,
  97                 'query': self._GRAPHQL_QUERIES[operation]
  98             }).encode('utf8')).get('data')
  99
 100     def _get_comments(self, video_id, comments, comment_data):
 101         yield from comments
 102         for comment in comment_data.copy():
 103             comment_id = comment.get('_id')
 104             if comment.get('replyCount') > 0:
 105                 reply_json = self._call_api(
 106                     video_id, comment_id, 'GetCommentReplies',
 107                     f'Downloading replies for comment {comment_id}')
 108                 for reply in reply_json.get('getCommentReplies'):
 109                     yield self._parse_comment(reply, comment_id)
 110
 111     @staticmethod
 112     def _parse_comment(comment_data, parent):
 113         return {
 114             'id': comment_data.get('_id'),
 115             'text': comment_data.get('content'),
 116             'author': try_get(comment_data, lambda x: x['user']['username']),
 117             'author_id': try_get(comment_data, lambda x: x['user']['_id']),
 118             'timestamp': unified_timestamp(comment_data.get('createdAt')),
 119             'parent': parent,
 120             'like_count': try_get(comment_data, lambda x: x['voteCount']['positive']),
 121         }
 122
 123     def _real_extract(self, url):
 124         video_id = self._match_id(url)
 125         video_json = self._call_api(video_id, video_id, 'GetVideoAndComments', 'Downloading video metadata')
 126         video_info = video_json['getVideo']
 127         is_live = video_info.get('live')
 128         comments = [self._parse_comment(comment, 'root') for comment in video_json.get('getVideoComments')]
 129
 130         formats = [{
 131             'format_id': 'direct',
 132             'quality': 1,
 133             'url': video_info.get('directUrl'),
 134             'ext': 'mp4',
 135         }] if url_or_none(video_info.get('directUrl')) else []
 136         if video_info.get('streamUrl'):
 137             formats.extend(self._extract_m3u8_formats(
 138                 video_info.get('streamUrl'), video_id, 'mp4',
 139                 entry_protocol='m3u8_native', m3u8_id='hls', live=True))
 140         self._sort_formats(formats)
 141
 142         return {
 143             'id': video_id,
 144             'title': video_info.get('title')[:-1],
 145             'formats': formats,
 146             'is_live': is_live,
 147             'description': video_info.get('summary'),
 148             'channel': try_get(video_info, lambda x: x['channel']['title']),
 149             'channel_id': try_get(video_info, lambda x: x['channel']['_id']),
 150             'view_count': int_or_none(video_info.get('playCount')),
 151             'thumbnail': url_or_none(video_info.get('largeImage')),
 152             'duration': float_or_none(video_info.get('videoDuration')),
 153             'timestamp': unified_timestamp(video_info.get('createdAt')),
 154             'tags': [tag.get('name') for tag in video_info.get('tags')],
 155             'availability': self._availability(is_unlisted=video_info.get('unlisted')),
 156             'comments': comments,
 157             '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
 158         }