]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nfhsnetwork.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / nfhsnetwork.py
CommitLineData
384fb069
L
1from .common import InfoExtractor
2
3
4from ..utils import (
5 try_get,
6 unified_strdate,
7 unified_timestamp
8)
9
10
11class NFHSNetworkIE(InfoExtractor):
12 IE_NAME = 'NFHSNetwork'
13 _VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
14 _TESTS = [{
15 # Auto-generated two-team sport (pixellot)
16 'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
17 'info_dict': {
18 'id': 'gamcf7e54cfbc',
19 'ext': 'mp4',
20 'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
21 'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
22 'uploader_id': 'cd2622cf76',
23 'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
24 'location': 'Rockford, Michigan',
25 'timestamp': 1616859000,
26 'upload_date': '20210327'
27 },
28 'params': {
29 # m3u8 download
30 'skip_download': True,
31 }
32 }, {
33 # Non-sport activity with description
34 'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
35 'info_dict': {
36 'id': 'evt4a30e3726c',
37 'ext': 'mp4',
38 'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
39 'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
40 'uploader': 'CHSAA: Limon High School, Limon, CO',
41 'uploader_id': '7d2d121332',
42 'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
43 'location': 'Limon, Colorado',
44 'timestamp': 1607893200,
45 'upload_date': '20201213'
46 },
47 'params': {
48 # m3u8 download
49 'skip_download': True,
50 }
51 }, {
52 # Postseason game
53 'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
54 'info_dict': {
55 'id': 'dd8de71d45',
56 'ext': 'mp4',
57 'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
58 'uploader': 'SoCal Sports Productions',
59 'uploader_id': '063dba0150',
60 'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
61 'location': 'San Diego, California',
62 'timestamp': 1451187000,
63 'upload_date': '20151226'
64 },
65 'params': {
66 # m3u8 download
67 'skip_download': True,
68 }
69 }, {
70 # Video with no broadcasts object
71 'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
72 'info_dict': {
73 'id': '9aa2f92f82',
74 'ext': 'mp4',
75 'title': 'Competitive Equity - 01/21/2015',
76 'description': 'Committee members discuss points of their research regarding a competitive equity plan',
77 'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
78 'uploader_id': 'a49f7d1002',
79 'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
80 'location': 'Stevens Point, Wisconsin',
81 'timestamp': 1421856000,
82 'upload_date': '20150121'
83 },
84 'params': {
85 # m3u8 download
86 'skip_download': True,
87 }
88 }
89 ]
90
91 def _real_extract(self, url):
92 video_id = self._match_id(url)
93 webpage = self._download_webpage(url, video_id)
94 data = self._download_json(
95 'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
96 video_id)
97 publisher = data.get('publishers')[0] # always exists
98 broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
99 uploader = publisher.get('formatted_name') or publisher.get('name')
100 uploaderID = publisher.get('publisher_key')
101 pubType = publisher.get('type')
102 uploaderPrefix = (
103 "schools" if pubType == "school"
104 else "associations" if "association" in pubType
105 else "affiliates" if (pubType == "publisher" or pubType == "affiliate")
106 else "schools")
107 uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug'))
108 location = '%s, %s' % (data.get('city'), data.get('state_name'))
109 description = broadcast.get('description')
110 isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
111
112 timestamp = unified_timestamp(data.get('local_start_time'))
113 upload_date = unified_strdate(data.get('local_start_time'))
114
115 title = (
116 self._og_search_title(webpage)
117 or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
118 title = title.split('|')[0].strip()
119
120 video_type = 'broadcasts' if isLive else 'vods'
121 key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key'])
122 m3u8_url = self._download_json(
123 'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key),
124 video_id).get('video_url')
125
126 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
384fb069
L
127
128 return {
129 'id': video_id,
130 'title': title,
131 'formats': formats,
132 'description': description,
133 'timestamp': timestamp,
134 'uploader': uploader,
135 'uploader_id': uploaderID,
136 'uploader_url': uploaderPage,
137 'location': location,
138 'upload_date': upload_date,
9f14daf2 139 'is_live': isLive,
140 '_format_sort_fields': ('res', 'tbr'),
384fb069 141 }