youtube_dl/extractor/noco.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 import hashlib
   7
   8 from .common import InfoExtractor
   9 from ..compat import (
  10     compat_str,
  11     compat_urllib_parse,
  12     compat_urllib_request,
  13 )
  14 from ..utils import (
  15     clean_html,
  16     ExtractorError,
  17     unified_strdate,
  18 )
  19
  20
  21 class NocoIE(InfoExtractor):
  22     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
  23     _LOGIN_URL = 'http://noco.tv/do.php'
  24     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
  25     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
  26     _NETRC_MACHINE = 'noco'
  27
  28     _TESTS = [
  29         {
  30             'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
  31             'md5': '0a993f0058ddbcd902630b2047ef710e',
  32             'info_dict': {
  33                 'id': '11538',
  34                 'ext': 'mp4',
  35                 'title': 'Ami Ami Idol - Hello! France',
  36                 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
  37                 'upload_date': '20140412',
  38                 'uploader': 'Nolife',
  39                 'uploader_id': 'NOL',
  40                 'duration': 2851.2,
  41             },
  42             'skip': 'Requires noco account',
  43         },
  44         {
  45             'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
  46             'md5': 'c190f1f48e313c55838f1f412225934d',
  47             'info_dict': {
  48                 'id': '12610',
  49                 'ext': 'mp4',
  50                 'title': 'The Guild #1 - Wake-Up Call',
  51                 'description': '',
  52                 'upload_date': '20140627',
  53                 'uploader': 'LBL42',
  54                 'uploader_id': 'LBL',
  55                 'duration': 233.023,
  56             },
  57             'skip': 'Requires noco account',
  58         }
  59     ]
  60
  61     def _real_initialize(self):
  62         self._login()
  63
  64     def _login(self):
  65         (username, password) = self._get_login_info()
  66         if username is None:
  67             return
  68
  69         login_form = {
  70             'a': 'login',
  71             'cookie': '1',
  72             'username': username,
  73             'password': password,
  74         }
  75         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  76         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
  77
  78         login = self._download_json(request, None, 'Logging in as %s' % username)
  79
  80         if 'erreur' in login:
  81             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
  82
  83     def _call_api(self, path, video_id, note, sub_lang=None):
  84         ts = compat_str(int(time.time() * 1000))
  85         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
  86         url = self._API_URL_TEMPLATE % (path, ts, tk)
  87         if sub_lang:
  88             url += self._SUB_LANG_TEMPLATE % sub_lang
  89
  90         resp = self._download_json(url, video_id, note)
  91
  92         if isinstance(resp, dict) and resp.get('error'):
  93             self._raise_error(resp['error'], resp['description'])
  94
  95         return resp
  96
  97     def _raise_error(self, error, description):
  98         raise ExtractorError(
  99             '%s returned error: %s - %s' % (self.IE_NAME, error, description),
 100             expected=True)
 101
 102     def _real_extract(self, url):
 103         mobj = re.match(self._VALID_URL, url)
 104         video_id = mobj.group('id')
 105
 106         options = self._call_api('users/init', None, 'Downloading user options JSON')['options']
 107         audio_lang = options.get('audio_language', 'fr')
 108
 109         medias = self._call_api(
 110             'shows/%s/medias' % video_id,
 111             video_id, 'Downloading video JSON')
 112
 113         show = self._call_api(
 114             'shows/by_id/%s' % video_id,
 115             video_id, 'Downloading show JSON')[0]
 116
 117         if audio_lang == 'original':
 118             audio_lang = show['original_lang']
 119         if len(medias) == 1:
 120             audio_lang = list(medias.keys())[0]
 121         elif not audio_lang in medias:
 122             audio_lang = 'fr'
 123
 124         qualities = self._call_api(
 125             'qualities',
 126             video_id, 'Downloading qualities JSON')
 127
 128         formats = []
 129
 130         for lang, lang_dict in medias[audio_lang]['video_list'].items():
 131             for format_id, fmt in lang_dict['quality_list'].items():
 132                 format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
 133
 134                 video = self._call_api(
 135                     'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
 136                     video_id, 'Downloading %s video JSON' % format_id_extended,
 137                     lang if lang != 'none' else None)
 138
 139                 file_url = video['file']
 140                 if not file_url:
 141                     continue
 142
 143                 if file_url in ['forbidden', 'not found']:
 144                     popmessage = video['popmessage']
 145                     self._raise_error(popmessage['title'], popmessage['message'])
 146
 147                 formats.append({
 148                     'url': file_url,
 149                     'format_id': format_id_extended,
 150                     'width': fmt['res_width'],
 151                     'height': fmt['res_lines'],
 152                     'abr': fmt['audiobitrate'],
 153                     'vbr': fmt['videobitrate'],
 154                     'filesize': fmt['filesize'],
 155                     'format_note': qualities[format_id]['quality_name'],
 156                     'preference': qualities[format_id]['priority'],
 157                 })
 158
 159         self._sort_formats(formats)
 160
 161         upload_date = unified_strdate(show['online_date_start_utc'])
 162         uploader = show['partner_name']
 163         uploader_id = show['partner_key']
 164         duration = show['duration_ms'] / 1000.0
 165
 166         thumbnails = []
 167         for thumbnail_key, thumbnail_url in show.items():
 168             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
 169             if not m:
 170                 continue
 171             thumbnails.append({
 172                 'url': thumbnail_url,
 173                 'width': int(m.group('width')),
 174                 'height': int(m.group('height')),
 175             })
 176
 177         episode = show.get('show_TT') or show.get('show_OT')
 178         family = show.get('family_TT') or show.get('family_OT')
 179         episode_number = show.get('episode_number')
 180
 181         title = ''
 182         if family:
 183             title += family
 184         if episode_number:
 185             title += ' #' + compat_str(episode_number)
 186         if episode:
 187             title += ' - ' + episode
 188
 189         description = show.get('show_resume') or show.get('family_resume')
 190
 191         return {
 192             'id': video_id,
 193             'title': title,
 194             'description': description,
 195             'thumbnails': thumbnails,
 196             'upload_date': upload_date,
 197             'uploader': uploader,
 198             'uploader_id': uploader_id,
 199             'duration': duration,
 200             'formats': formats,
 201         }