youtube_dl/extractor/liveleak.py

   1 from __future__ import unicode_literals
   2
   3 import json
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     ExtractorError,
   9 )
  10
  11
  12 class LiveLeakIE(InfoExtractor):
  13     _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
  14     _TEST = {
  15         'url': 'http://www.liveleak.com/view?i=757_1364311680',
  16         'file': '757_1364311680.mp4',
  17         'md5': '0813c2430bea7a46bf13acf3406992f4',
  18         'info_dict': {
  19             'description': 'extremely bad day for this guy..!',
  20             'uploader': 'ljfriel2',
  21             'title': 'Most unlucky car accident'
  22         }
  23     }
  24
  25     def _real_extract(self, url):
  26         mobj = re.match(self._VALID_URL, url)
  27
  28         video_id = mobj.group('video_id')
  29         webpage = self._download_webpage(url, video_id)
  30         sources_raw = self._search_regex(
  31             r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
  32         sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
  33         sources = json.loads(sources_json)
  34
  35         formats = [{
  36             'format_note': s.get('label'),
  37             'url': s['file'],
  38         } for s in sources]
  39         self._sort_formats(formats)
  40
  41         video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
  42         video_description = self._og_search_description(webpage)
  43         video_uploader = self._html_search_regex(
  44             r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
  45
  46         return {
  47             'id': video_id,
  48             'title': video_title,
  49             'description': video_description,
  50             'uploader': video_uploader,
  51             'formats': formats,
  52         }