kLbLF6w7

· 5 years ago · Oct 23, 2020, 07:34 PM
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5
6import itertools
7import json
8import os.path
9import random
10import re
11import time
12import traceback
13
14from .common import InfoExtractor, SearchInfoExtractor
15from ..jsinterp import JSInterpreter
16from ..swfinterp import SWFInterpreter
17from ..compat import (
18    compat_chr,
19    compat_HTTPError,
20    compat_kwargs,
21    compat_parse_qs,
22    compat_urllib_parse_unquote,
23    compat_urllib_parse_unquote_plus,
24    compat_urllib_parse_urlencode,
25    compat_urllib_parse_urlparse,
26    compat_urlparse,
27    compat_str,
28)
29from ..utils import (
30    bool_or_none,
31    clean_html,
32    error_to_compat_str,
33    extract_attributes,
34    ExtractorError,
35    float_or_none,
36    get_element_by_attribute,
37    get_element_by_id,
38    int_or_none,
39    mimetype2ext,
40    orderedSet,
41    parse_codecs,
42    parse_duration,
43    remove_quotes,
44    remove_start,
45    smuggle_url,
46    str_or_none,
47    str_to_int,
48    try_get,
49    unescapeHTML,
50    unified_strdate,
51    unsmuggle_url,
52    uppercase_escape,
53    url_or_none,
54    urlencode_postdata,
55)
56
57
58class YoutubeBaseInfoExtractor(InfoExtractor):
59    """Provide base functions for Youtube extractors"""
60    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61    _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
62
63    _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64    _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
66
67    _NETRC_MACHINE = 'youtube'
68    # If True it will raise an error if no login info is provided
69    _LOGIN_REQUIRED = False
70
71    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
72
73    _YOUTUBE_CLIENT_HEADERS = {
74        'x-youtube-client-name': '1',
75        'x-youtube-client-version': '1.20200609.04.02',
76    }
77
78    def _set_language(self):
79        self._set_cookie(
80            '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
81            # YouTube sets the expire time to about two months
82            expire_time=time.time() + 2 * 30 * 24 * 3600)
83
84    def _ids_to_results(self, ids):
85        return [
86            self.url_result(vid_id, 'Youtube', video_id=vid_id)
87            for vid_id in ids]
88
89    def _login(self):
90        """
91        Attempt to log in to YouTube.
92        True is returned if successful or skipped.
93        False is returned if login failed.
94        If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95        """
96        username, password = self._get_login_info()
97        # No authentication to be performed
98        if username is None:
99            if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
100                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
101            return True
102
103        login_page = self._download_webpage(
104            self._LOGIN_URL, None,
105            note='Downloading login page',
106            errnote='unable to fetch login page', fatal=False)
107        if login_page is False:
108            return
109
110        login_form = self._hidden_inputs(login_page)
111
112        def req(url, f_req, note, errnote):
113            data = login_form.copy()
114            data.update({
115                'pstMsg': 1,
116                'checkConnection': 'youtube',
117                'checkedDomains': 'youtube',
118                'hl': 'en',
119                'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
120                'f.req': json.dumps(f_req),
121                'flowName': 'GlifWebSignIn',
122                'flowEntry': 'ServiceLogin',
123                # TODO: reverse actual botguard identifier generation algo
124                'bgRequest': '["identifier",""]',
125            })
126            return self._download_json(
127                url, None, note=note, errnote=errnote,
128                transform_source=lambda s: re.sub(r'^[^[]*', '', s),
129                fatal=False,
130                data=urlencode_postdata(data), headers={
131                    'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
132                    'Google-Accounts-XSRF': 1,
133                })
134
135        def warn(message):
136            self._downloader.report_warning(message)
137
138        lookup_req = [
139            username,
140            None, [], None, 'US', None, None, 2, False, True,
141            [
142                None, None,
143                [2, 1, None, 1,
144                 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
145                 None, [], 4],
146                1, [None, None, []], None, None, None, True
147            ],
148            username,
149        ]
150
151        lookup_results = req(
152            self._LOOKUP_URL, lookup_req,
153            'Looking up account info', 'Unable to look up account info')
154
155        if lookup_results is False:
156            return False
157
158        user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
159        if not user_hash:
160            warn('Unable to extract user hash')
161            return False
162
163        challenge_req = [
164            user_hash,
165            None, 1, None, [1, None, None, None, [password, None, True]],
166            [
167                None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
168                1, [None, None, []], None, None, None, True
169            ]]
170
171        challenge_results = req(
172            self._CHALLENGE_URL, challenge_req,
173            'Logging in', 'Unable to log in')
174
175        if challenge_results is False:
176            return
177
178        login_res = try_get(challenge_results, lambda x: x[0][5], list)
179        if login_res:
180            login_msg = try_get(login_res, lambda x: x[5], compat_str)
181            warn(
182                'Unable to login: %s' % 'Invalid password'
183                if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
184            return False
185
186        res = try_get(challenge_results, lambda x: x[0][-1], list)
187        if not res:
188            warn('Unable to extract result entry')
189            return False
190
191        login_challenge = try_get(res, lambda x: x[0][0], list)
192        if login_challenge:
193            challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
194            if challenge_str == 'TWO_STEP_VERIFICATION':
195                # SEND_SUCCESS - TFA code has been successfully sent to phone
196                # QUOTA_EXCEEDED - reached the limit of TFA codes
197                status = try_get(login_challenge, lambda x: x[5], compat_str)
198                if status == 'QUOTA_EXCEEDED':
199                    warn('Exceeded the limit of TFA codes, try later')
200                    return False
201
202                tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
203                if not tl:
204                    warn('Unable to extract TL')
205                    return False
206
207                tfa_code = self._get_tfa_info('2-step verification code')
208
209                if not tfa_code:
210                    warn(
211                        'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
212                        '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
213                    return False
214
215                tfa_code = remove_start(tfa_code, 'G-')
216
217                tfa_req = [
218                    user_hash, None, 2, None,
219                    [
220                        9, None, None, None, None, None, None, None,
221                        [None, tfa_code, True, 2]
222                    ]]
223
224                tfa_results = req(
225                    self._TFA_URL.format(tl), tfa_req,
226                    'Submitting TFA code', 'Unable to submit TFA code')
227
228                if tfa_results is False:
229                    return False
230
231                tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
232                if tfa_res:
233                    tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
234                    warn(
235                        'Unable to finish TFA: %s' % 'Invalid TFA code'
236                        if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
237                    return False
238
239                check_cookie_url = try_get(
240                    tfa_results, lambda x: x[0][-1][2], compat_str)
241            else:
242                CHALLENGES = {
243                    'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
244                    'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
245                    'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
246                }
247                challenge = CHALLENGES.get(
248                    challenge_str,
249                    '%s returned error %s.' % (self.IE_NAME, challenge_str))
250                warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
251                return False
252        else:
253            check_cookie_url = try_get(res, lambda x: x[2], compat_str)
254
255        if not check_cookie_url:
256            warn('Unable to extract CheckCookie URL')
257            return False
258
259        check_cookie_results = self._download_webpage(
260            check_cookie_url, None, 'Checking cookie', fatal=False)
261
262        if check_cookie_results is False:
263            return False
264
265        if 'https://myaccount.google.com/' not in check_cookie_results:
266            warn('Unable to log in')
267            return False
268
269        return True
270
271    def _download_webpage_handle(self, *args, **kwargs):
272        query = kwargs.get('query', {}).copy()
273        query['disable_polymer'] = 'true'
274        kwargs['query'] = query
275        return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
276            *args, **compat_kwargs(kwargs))
277
278    def _real_initialize(self):
279        if self._downloader is None:
280            return
281        self._set_language()
282        if not self._login():
283            return
284
285
286class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
287    # Extract entries from page with "Load more" button
288    def _entries(self, page, playlist_id):
289        more_widget_html = content_html = page
290        for page_num in itertools.count(1):
291            for entry in self._process_page(content_html):
292                yield entry
293
294            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
295            if not mobj:
296                break
297
298            count = 0
299            retries = 3
300            while count <= retries:
301                try:
302                    # Downloading page may result in intermittent 5xx HTTP error
303                    # that is usually worked around with a retry
304                    more = self._download_json(
305                        'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
306                        'Downloading page #%s%s'
307                        % (page_num, ' (retry #%d)' % count if count else ''),
308                        transform_source=uppercase_escape,
309                        headers=self._YOUTUBE_CLIENT_HEADERS)
310                    break
311                except ExtractorError as e:
312                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
313                        count += 1
314                        if count <= retries:
315                            continue
316                    raise
317
318            content_html = more['content_html']
319            if not content_html.strip():
320                # Some webpages show a "Load more" button but they don't
321                # have more videos
322                break
323            more_widget_html = more['load_more_widget_html']
324
325
326class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
327    def _process_page(self, content):
328        for video_id, video_title in self.extract_videos_from_page(content):
329            yield self.url_result(video_id, 'Youtube', video_id, video_title)
330
331    def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
332        for mobj in re.finditer(video_re, page):
333            # The link with index 0 is not the first video of the playlist (not sure if still actual)
334            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
335                continue
336            video_id = mobj.group('id')
337            video_title = unescapeHTML(
338                mobj.group('title')) if 'title' in mobj.groupdict() else None
339            if video_title:
340                video_title = video_title.strip()
341            if video_title == '► Play all':
342                video_title = None
343            try:
344                idx = ids_in_page.index(video_id)
345                if video_title and not titles_in_page[idx]:
346                    titles_in_page[idx] = video_title
347            except ValueError:
348                ids_in_page.append(video_id)
349                titles_in_page.append(video_title)
350
351    def extract_videos_from_page(self, page):
352        ids_in_page = []
353        titles_in_page = []
354        self.extract_videos_from_page_impl(
355            self._VIDEO_RE, page, ids_in_page, titles_in_page)
356        return zip(ids_in_page, titles_in_page)
357
358
359class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
360    def _process_page(self, content):
361        for playlist_id in orderedSet(re.findall(
362                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
363                content)):
364            yield self.url_result(
365                'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
366
367    def _real_extract(self, url):
368        playlist_id = self._match_id(url)
369        webpage = self._download_webpage(url, playlist_id)
370        title = self._og_search_title(webpage, fatal=False)
371        return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
372
373
374class YoutubeIE(YoutubeBaseInfoExtractor):
375    IE_DESC = 'YouTube.com'
376    _VALID_URL = r"""(?x)^
377                     (
378                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
379                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
380                            (?:www\.)?deturl\.com/www\.youtube\.com/|
381                            (?:www\.)?pwnyoutube\.com/|
382                            (?:www\.)?hooktube\.com/|
383                            (?:www\.)?yourepeat\.com/|
384                            tube\.majestyc\.net/|
385                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
386                            (?:(?:www|dev)\.)?invidio\.us/|
387                            (?:(?:www|no)\.)?invidiou\.sh/|
388                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
389                            (?:www\.)?invidious\.kabi\.tk/|
390                            (?:www\.)?invidious\.13ad\.de/|
391                            (?:www\.)?invidious\.mastodon\.host/|
392                            (?:www\.)?invidious\.nixnet\.xyz/|
393                            (?:www\.)?invidious\.drycat\.fr/|
394                            (?:www\.)?tube\.poal\.co/|
395                            (?:www\.)?vid\.wxzm\.sx/|
396                            (?:www\.)?yewtu\.be/|
397                            (?:www\.)?yt\.elukerio\.org/|
398                            (?:www\.)?yt\.lelux\.fi/|
399                            (?:www\.)?invidious\.ggc-project\.de/|
400                            (?:www\.)?yt\.maisputain\.ovh/|
401                            (?:www\.)?invidious\.13ad\.de/|
402                            (?:www\.)?invidious\.toot\.koeln/|
403                            (?:www\.)?invidious\.fdn\.fr/|
404                            (?:www\.)?watch\.nettohikari\.com/|
405                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
406                            (?:www\.)?qklhadlycap4cnod\.onion/|
407                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
408                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
409                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
410                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
411                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
412                            (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
413                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
414                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
415                         (?:                                                  # the various things that can precede the ID:
416                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
417                             |(?:                                             # or the v= param in all its forms
418                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
419                                 (?:\?|\#!?)                                  # the params delimiter ? or # or #!
420                                 (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
421                                 v=
422                             )
423                         ))
424                         |(?:
425                            youtu\.be|                                        # just youtu.be/xxxx
426                            vid\.plus|                                        # or vid.plus/xxxx
427                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
428                         )/
429                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
430                         )
431                     )?                                                       # all until now is optional -> you can pass the naked ID
432                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
433                     (?!.*?\blist=
434                        (?:
435                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
436                            WL                                                # WL are handled by the watch later IE
437                        )
438                     )
439                     (?(1).+)?                                                # if we found the ID, everything can follow
440                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
441    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
442    _PLAYER_INFO_RE = (
443        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
444        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
445    )
446    _formats = {
447        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
448        '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
449        '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
450        '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
451        '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
452        '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
453        '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
454        '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
455        # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
456        '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
457        '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
458        '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
459        '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
460        '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
461        '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
462        '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
463        '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
464        '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
465
466
467        # 3D videos
468        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
469        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
470        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
471        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
472        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
473        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
474        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
475
476        # Apple HTTP Live Streaming
477        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
478        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
479        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
480        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
481        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
482        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
483        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
484        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
485
486        # DASH mp4 video
487        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
488        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
489        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
490        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
491        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
492        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
493        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
494        '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
495        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
496        '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
497        '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
498        '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
499
500        # Dash mp4 audio
501        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
502        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
503        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
504        '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
505        '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
506        '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
507        '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
508
509        # Dash webm
510        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
511        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
512        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
513        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
514        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
515        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
516        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
517        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
520        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
521        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
523        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
524        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
525        # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
526        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
527        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
528        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
529        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
530        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
532
533        # Dash webm audio
534        '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
535        '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
536
537        # Dash webm audio with opus inside
538        '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
539        '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
540        '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
541
542        # RTMP (unnamed)
543        '_rtmp': {'protocol': 'rtmp'},
544
545        # av01 video only formats sometimes served with "unknown" codecs
546        '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
547        '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
548        '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
549        '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
550    }
551    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
552
553    _GEO_BYPASS = False
554
555    IE_NAME = 'youtube'
556    _TESTS = [
557        {
558            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
559            'info_dict': {
560                'id': 'BaW_jenozKc',
561                'ext': 'mp4',
562                'title': 'youtube-dl test video "\'/\\ä↭?',
563                'uploader': 'Philipp Hagemeister',
564                'uploader_id': 'phihag',
565                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
566                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
567                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
568                'upload_date': '20121002',
569                'description': 'test chars:  "\'/\\ä↭?\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
570                'categories': ['Science & Technology'],
571                'tags': ['youtube-dl'],
572                'duration': 10,
573                'view_count': int,
574                'like_count': int,
575                'dislike_count': int,
576                'start_time': 1,
577                'end_time': 9,
578            }
579        },
580        {
581            'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
582            'note': 'Test generic use_cipher_signature video (#897)',
583            'info_dict': {
584                'id': 'UxxajLWwzqY',
585                'ext': 'mp4',
586                'upload_date': '20120506',
587                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
588                'alt_title': 'I Love It (feat. Charli XCX)',
589                'description': 'md5:19a2f98d9032b9311e686ed039564f63',
590                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
591                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
592                         'iconic ep', 'iconic', 'love', 'it'],
593                'duration': 180,
594                'uploader': 'Icona Pop',
595                'uploader_id': 'IconaPop',
596                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
597                'creator': 'Icona Pop',
598                'track': 'I Love It (feat. Charli XCX)',
599                'artist': 'Icona Pop',
600            }
601        },
602        {
603            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
604            'note': 'Test VEVO video with age protection (#956)',
605            'info_dict': {
606                'id': '07FYdnEawAQ',
607                'ext': 'mp4',
608                'upload_date': '20130703',
609                'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
610                'alt_title': 'Tunnel Vision',
611                'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
612                'duration': 419,
613                'uploader': 'justintimberlakeVEVO',
614                'uploader_id': 'justintimberlakeVEVO',
615                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
616                'creator': 'Justin Timberlake',
617                'track': 'Tunnel Vision',
618                'artist': 'Justin Timberlake',
619                'age_limit': 18,
620            }
621        },
622        {
623            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
624            'note': 'Embed-only video (#1746)',
625            'info_dict': {
626                'id': 'yZIXLfi8CZQ',
627                'ext': 'mp4',
628                'upload_date': '20120608',
629                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
630                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
631                'uploader': 'SET India',
632                'uploader_id': 'setindia',
633                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
634                'age_limit': 18,
635            }
636        },
637        {
638            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
639            'note': 'Use the first video ID in the URL',
640            'info_dict': {
641                'id': 'BaW_jenozKc',
642                'ext': 'mp4',
643                'title': 'youtube-dl test video "\'/\\ä↭?',
644                'uploader': 'Philipp Hagemeister',
645                'uploader_id': 'phihag',
646                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
647                'upload_date': '20121002',
648                'description': 'test chars:  "\'/\\ä↭?\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
649                'categories': ['Science & Technology'],
650                'tags': ['youtube-dl'],
651                'duration': 10,
652                'view_count': int,
653                'like_count': int,
654                'dislike_count': int,
655            },
656            'params': {
657                'skip_download': True,
658            },
659        },
660        {
661            'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
662            'note': '256k DASH audio (format 141) via DASH manifest',
663            'info_dict': {
664                'id': 'a9LDPn-MO4I',
665                'ext': 'm4a',
666                'upload_date': '20121002',
667                'uploader_id': '8KVIDEO',
668                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
669                'description': '',
670                'uploader': '8KVIDEO',
671                'title': 'UHDTV TEST 8K VIDEO.mp4'
672            },
673            'params': {
674                'youtube_include_dash_manifest': True,
675                'format': '141',
676            },
677            'skip': 'format 141 not served anymore',
678        },
679        # DASH manifest with encrypted signature
680        {
681            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
682            'info_dict': {
683                'id': 'IB3lcPjvWLA',
684                'ext': 'm4a',
685                'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
686                'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
687                'duration': 244,
688                'uploader': 'AfrojackVEVO',
689                'uploader_id': 'AfrojackVEVO',
690                'upload_date': '20131011',
691            },
692            'params': {
693                'youtube_include_dash_manifest': True,
694                'format': '141/bestaudio[ext=m4a]',
695            },
696        },
697        # JS player signature function name containing $
698        {
699            'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
700            'info_dict': {
701                'id': 'nfWlot6h_JM',
702                'ext': 'm4a',
703                'title': 'Taylor Swift - Shake It Off',
704                'description': 'md5:307195cd21ff7fa352270fe884570ef0',
705                'duration': 242,
706                'uploader': 'TaylorSwiftVEVO',
707                'uploader_id': 'TaylorSwiftVEVO',
708                'upload_date': '20140818',
709            },
710            'params': {
711                'youtube_include_dash_manifest': True,
712                'format': '141/bestaudio[ext=m4a]',
713            },
714        },
715        # Controversy video
716        {
717            'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
718            'info_dict': {
719                'id': 'T4XJQO3qol8',
720                'ext': 'mp4',
721                'duration': 219,
722                'upload_date': '20100909',
723                'uploader': 'Amazing Atheist',
724                'uploader_id': 'TheAmazingAtheist',
725                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
726                'title': 'Burning Everyone\'s Koran',
727                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
728            }
729        },
730        # Normal age-gate video (No vevo, embed allowed)
731        {
732            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
733            'info_dict': {
734                'id': 'HtVdAasjOgU',
735                'ext': 'mp4',
736                'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
737                'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
738                'duration': 142,
739                'uploader': 'The Witcher',
740                'uploader_id': 'WitcherGame',
741                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
742                'upload_date': '20140605',
743                'age_limit': 18,
744            },
745        },
746        # Age-gate video with encrypted signature
747        {
748            'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
749            'info_dict': {
750                'id': '6kLq3WMV1nU',
751                'ext': 'mp4',
752                'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
753                'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
754                'duration': 246,
755                'uploader': 'LloydVEVO',
756                'uploader_id': 'LloydVEVO',
757                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
758                'upload_date': '20110629',
759                'age_limit': 18,
760            },
761        },
762        # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
763        # YouTube Red ad is not captured for creator
764        {
765            'url': '__2ABJjxzNo',
766            'info_dict': {
767                'id': '__2ABJjxzNo',
768                'ext': 'mp4',
769                'duration': 266,
770                'upload_date': '20100430',
771                'uploader_id': 'deadmau5',
772                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
773                'creator': 'Dada Life, deadmau5',
774                'description': 'md5:12c56784b8032162bb936a5f76d55360',
775                'uploader': 'deadmau5',
776                'title': 'Deadmau5 - Some Chords (HD)',
777                'alt_title': 'This Machine Kills Some Chords',
778            },
779            'expected_warnings': [
780                'DASH manifest missing',
781            ]
782        },
783        # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
784        {
785            'url': 'lqQg6PlCWgI',
786            'info_dict': {
787                'id': 'lqQg6PlCWgI',
788                'ext': 'mp4',
789                'duration': 6085,
790                'upload_date': '20150827',
791                'uploader_id': 'olympic',
792                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
793                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
794                'uploader': 'Olympic',
795                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
796            },
797            'params': {
798                'skip_download': 'requires avconv',
799            }
800        },
801        # Non-square pixels
802        {
803            'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
804            'info_dict': {
805                'id': '_b-2C3KPAM0',
806                'ext': 'mp4',
807                'stretched_ratio': 16 / 9.,
808                'duration': 85,
809                'upload_date': '20110310',
810                'uploader_id': 'AllenMeow',
811                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
812                'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
813                'uploader': '孫ᄋᄅ',
814                'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
815            },
816        },
817        # url_encoded_fmt_stream_map is empty string
818        {
819            'url': 'qEJwOuvDf7I',
820            'info_dict': {
821                'id': 'qEJwOuvDf7I',
822                'ext': 'webm',
823                'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
824                'description': '',
825                'upload_date': '20150404',
826                'uploader_id': 'spbelect',
827                'uploader': 'Наблюдатели Петербурга',
828            },
829            'params': {
830                'skip_download': 'requires avconv',
831            },
832            'skip': 'This live event has ended.',
833        },
834        # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
835        {
836            'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
837            'info_dict': {
838                'id': 'FIl7x6_3R5Y',
839                'ext': 'webm',
840                'title': 'md5:7b81415841e02ecd4313668cde88737a',
841                'description': 'md5:116377fd2963b81ec4ce64b542173306',
842                'duration': 220,
843                'upload_date': '20150625',
844                'uploader_id': 'dorappi2000',
845                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
846                'uploader': 'dorappi2000',
847                'formats': 'mincount:31',
848            },
849            'skip': 'not actual anymore',
850        },
851        # DASH manifest with segment_list
852        {
853            'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
854            'md5': '8ce563a1d667b599d21064e982ab9e31',
855            'info_dict': {
856                'id': 'CsmdDsKjzN8',
857                'ext': 'mp4',
858                'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
859                'uploader': 'Airtek',
860                'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
861                'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
862                'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
863            },
864            'params': {
865                'youtube_include_dash_manifest': True,
866                'format': '135',  # bestvideo
867            },
868            'skip': 'This live event has ended.',
869        },
870        {
871            # Multifeed videos (multiple cameras), URL is for Main Camera
872            'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
873            'info_dict': {
874                'id': 'jqWvoWXjCVs',
875                'title': 'teamPGP: Rocket League Noob Stream',
876                'description': 'md5:dc7872fb300e143831327f1bae3af010',
877            },
878            'playlist': [{
879                'info_dict': {
880                    'id': 'jqWvoWXjCVs',
881                    'ext': 'mp4',
882                    'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
883                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
884                    'duration': 7335,
885                    'upload_date': '20150721',
886                    'uploader': 'Beer Games Beer',
887                    'uploader_id': 'beergamesbeer',
888                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
889                    'license': 'Standard YouTube License',
890                },
891            }, {
892                'info_dict': {
893                    'id': '6h8e8xoXJzg',
894                    'ext': 'mp4',
895                    'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
896                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
897                    'duration': 7337,
898                    'upload_date': '20150721',
899                    'uploader': 'Beer Games Beer',
900                    'uploader_id': 'beergamesbeer',
901                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
902                    'license': 'Standard YouTube License',
903                },
904            }, {
905                'info_dict': {
906                    'id': 'PUOgX5z9xZw',
907                    'ext': 'mp4',
908                    'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
909                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
910                    'duration': 7337,
911                    'upload_date': '20150721',
912                    'uploader': 'Beer Games Beer',
913                    'uploader_id': 'beergamesbeer',
914                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
915                    'license': 'Standard YouTube License',
916                },
917            }, {
918                'info_dict': {
919                    'id': 'teuwxikvS5k',
920                    'ext': 'mp4',
921                    'title': 'teamPGP: Rocket League Noob Stream (zim)',
922                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
923                    'duration': 7334,
924                    'upload_date': '20150721',
925                    'uploader': 'Beer Games Beer',
926                    'uploader_id': 'beergamesbeer',
927                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
928                    'license': 'Standard YouTube License',
929                },
930            }],
931            'params': {
932                'skip_download': True,
933            },
934            'skip': 'This video is not available.',
935        },
936        {
937            # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
938            'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
939            'info_dict': {
940                'id': 'gVfLd0zydlo',
941                'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
942            },
943            'playlist_count': 2,
944            'skip': 'Not multifeed anymore',
945        },
946        {
947            'url': 'https://vid.plus/FlRa-iH7PGw',
948            'only_matching': True,
949        },
950        {
951            'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
952            'only_matching': True,
953        },
954        {
955            # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
956            # Also tests cut-off URL expansion in video description (see
957            # https://github.com/ytdl-org/youtube-dl/issues/1892,
958            # https://github.com/ytdl-org/youtube-dl/issues/8164)
959            'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
960            'info_dict': {
961                'id': 'lsguqyKfVQg',
962                'ext': 'mp4',
963                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
964                'alt_title': 'Dark Walk - Position Music',
965                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
966                'duration': 133,
967                'upload_date': '20151119',
968                'uploader_id': 'IronSoulElf',
969                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
970                'uploader': 'IronSoulElf',
971                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
972                'track': 'Dark Walk - Position Music',
973                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
974                'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
975            },
976            'params': {
977                'skip_download': True,
978            },
979        },
980        {
981            # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
982            'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
983            'only_matching': True,
984        },
985        {
986            # Video with yt:stretch=17:0
987            'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
988            'info_dict': {
989                'id': 'Q39EVAstoRM',
990                'ext': 'mp4',
991                'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
992                'description': 'md5:ee18a25c350637c8faff806845bddee9',
993                'upload_date': '20151107',
994                'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
995                'uploader': 'CH GAMER DROID',
996            },
997            'params': {
998                'skip_download': True,
999            },
1000            'skip': 'This video does not exist.',
1001        },
1002        {
1003            # Video licensed under Creative Commons
1004            'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1005            'info_dict': {
1006                'id': 'M4gD1WSo5mA',
1007                'ext': 'mp4',
1008                'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1009                'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1010                'duration': 721,
1011                'upload_date': '20150127',
1012                'uploader_id': 'BerkmanCenter',
1013                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1014                'uploader': 'The Berkman Klein Center for Internet & Society',
1015                'license': 'Creative Commons Attribution license (reuse allowed)',
1016            },
1017            'params': {
1018                'skip_download': True,
1019            },
1020        },
1021        {
1022            # Channel-like uploader_url
1023            'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1024            'info_dict': {
1025                'id': 'eQcmzGIKrzg',
1026                'ext': 'mp4',
1027                'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1028                'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1029                'duration': 4060,
1030                'upload_date': '20151119',
1031                'uploader': 'Bernie Sanders',
1032                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1033                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1034                'license': 'Creative Commons Attribution license (reuse allowed)',
1035            },
1036            'params': {
1037                'skip_download': True,
1038            },
1039        },
1040        {
1041            'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1042            'only_matching': True,
1043        },
1044        {
1045            # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1046            'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1047            'only_matching': True,
1048        },
1049        {
1050            # Rental video preview
1051            'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1052            'info_dict': {
1053                'id': 'uGpuVWrhIzE',
1054                'ext': 'mp4',
1055                'title': 'Piku - Trailer',
1056                'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1057                'upload_date': '20150811',
1058                'uploader': 'FlixMatrix',
1059                'uploader_id': 'FlixMatrixKaravan',
1060                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1061                'license': 'Standard YouTube License',
1062            },
1063            'params': {
1064                'skip_download': True,
1065            },
1066            'skip': 'This video is not available.',
1067        },
1068        {
1069            # YouTube Red video with episode data
1070            'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1071            'info_dict': {
1072                'id': 'iqKdEhx-dD4',
1073                'ext': 'mp4',
1074                'title': 'Isolation - Mind Field (Ep 1)',
1075                'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1076                'duration': 2085,
1077                'upload_date': '20170118',
1078                'uploader': 'Vsauce',
1079                'uploader_id': 'Vsauce',
1080                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1081                'series': 'Mind Field',
1082                'season_number': 1,
1083                'episode_number': 1,
1084            },
1085            'params': {
1086                'skip_download': True,
1087            },
1088            'expected_warnings': [
1089                'Skipping DASH manifest',
1090            ],
1091        },
1092        {
1093            # The following content has been identified by the YouTube community
1094            # as inappropriate or offensive to some audiences.
1095            'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1096            'info_dict': {
1097                'id': '6SJNVb0GnPI',
1098                'ext': 'mp4',
1099                'title': 'Race Differences in Intelligence',
1100                'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1101                'duration': 965,
1102                'upload_date': '20140124',
1103                'uploader': 'New Century Foundation',
1104                'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1105                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1106            },
1107            'params': {
1108                'skip_download': True,
1109            },
1110        },
1111        {
1112            # itag 212
1113            'url': '1t24XAntNCY',
1114            'only_matching': True,
1115        },
1116        {
1117            # geo restricted to JP
1118            'url': 'sJL6WA-aGkQ',
1119            'only_matching': True,
1120        },
1121        {
1122            'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1123            'only_matching': True,
1124        },
1125        {
1126            'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1127            'only_matching': True,
1128        },
1129        {
1130            # DRM protected
1131            'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1132            'only_matching': True,
1133        },
1134        {
1135            # Video with unsupported adaptive stream type formats
1136            'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1137            'info_dict': {
1138                'id': 'Z4Vy8R84T1U',
1139                'ext': 'mp4',
1140                'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1141                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1142                'duration': 433,
1143                'upload_date': '20130923',
1144                'uploader': 'Amelia Putri Harwita',
1145                'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1146                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1147                'formats': 'maxcount:10',
1148            },
1149            'params': {
1150                'skip_download': True,
1151                'youtube_include_dash_manifest': False,
1152            },
1153            'skip': 'not actual anymore',
1154        },
1155        {
1156            # Youtube Music Auto-generated description
1157            'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1158            'info_dict': {
1159                'id': 'MgNrAu2pzNs',
1160                'ext': 'mp4',
1161                'title': 'Voyeur Girl',
1162                'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1163                'upload_date': '20190312',
1164                'uploader': 'Stephen - Topic',
1165                'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1166                'artist': 'Stephen',
1167                'track': 'Voyeur Girl',
1168                'album': 'it\'s too much love to know my dear',
1169                'release_date': '20190313',
1170                'release_year': 2019,
1171            },
1172            'params': {
1173                'skip_download': True,
1174            },
1175        },
1176        {
1177            # Youtube Music Auto-generated description
1178            # Retrieve 'artist' field from 'Artist:' in video description
1179            # when it is present on youtube music video
1180            'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1181            'info_dict': {
1182                'id': 'k0jLE7tTwjY',
1183                'ext': 'mp4',
1184                'title': 'Latch Feat. Sam Smith',
1185                'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1186                'upload_date': '20150110',
1187                'uploader': 'Various Artists - Topic',
1188                'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1189                'artist': 'Disclosure',
1190                'track': 'Latch Feat. Sam Smith',
1191                'album': 'Latch Featuring Sam Smith',
1192                'release_date': '20121008',
1193                'release_year': 2012,
1194            },
1195            'params': {
1196                'skip_download': True,
1197            },
1198        },
1199        {
1200            # Youtube Music Auto-generated description
1201            # handle multiple artists on youtube music video
1202            'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1203            'info_dict': {
1204                'id': '74qn0eJSjpA',
1205                'ext': 'mp4',
1206                'title': 'Eastside',
1207                'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1208                'upload_date': '20180710',
1209                'uploader': 'Benny Blanco - Topic',
1210                'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1211                'artist': 'benny blanco, Halsey, Khalid',
1212                'track': 'Eastside',
1213                'album': 'Eastside',
1214                'release_date': '20180713',
1215                'release_year': 2018,
1216            },
1217            'params': {
1218                'skip_download': True,
1219            },
1220        },
1221        {
1222            # Youtube Music Auto-generated description
1223            # handle youtube music video with release_year and no release_date
1224            'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1225            'info_dict': {
1226                'id': '-hcAI0g-f5M',
1227                'ext': 'mp4',
1228                'title': 'Put It On Me',
1229                'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1230                'upload_date': '20180426',
1231                'uploader': 'Matt Maeson - Topic',
1232                'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1233                'artist': 'Matt Maeson',
1234                'track': 'Put It On Me',
1235                'album': 'The Hearse',
1236                'release_date': None,
1237                'release_year': 2018,
1238            },
1239            'params': {
1240                'skip_download': True,
1241            },
1242        },
1243        {
1244            'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1245            'only_matching': True,
1246        },
1247        {
1248            # invalid -> valid video id redirection
1249            'url': 'DJztXj2GPfl',
1250            'info_dict': {
1251                'id': 'DJztXj2GPfk',
1252                'ext': 'mp4',
1253                'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1254                'description': 'md5:bf577a41da97918e94fa9798d9228825',
1255                'upload_date': '20090125',
1256                'uploader': 'Prochorowka',
1257                'uploader_id': 'Prochorowka',
1258                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1259                'artist': 'Panjabi MC',
1260                'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1261                'album': 'Beware of the Boys (Mundian To Bach Ke)',
1262            },
1263            'params': {
1264                'skip_download': True,
1265            },
1266        },
1267        {
1268            # empty description results in an empty string
1269            'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1270            'info_dict': {
1271                'id': 'x41yOUIvK2k',
1272                'ext': 'mp4',
1273                'title': 'IMG 3456',
1274                'description': '',
1275                'upload_date': '20170613',
1276                'uploader_id': 'ElevageOrVert',
1277                'uploader': 'ElevageOrVert',
1278            },
1279            'params': {
1280                'skip_download': True,
1281            },
1282        },
1283    ]
1284
1285    def __init__(self, *args, **kwargs):
1286        super(YoutubeIE, self).__init__(*args, **kwargs)
1287        self._player_cache = {}
1288
1289    def report_video_info_webpage_download(self, video_id):
1290        """Report attempt to download video info webpage."""
1291        self.to_screen('%s: Downloading video info webpage' % video_id)
1292
1293    def report_information_extraction(self, video_id):
1294        """Report attempt to extract video information."""
1295        self.to_screen('%s: Extracting video information' % video_id)
1296
1297    def report_unavailable_format(self, video_id, format):
1298        """Report extracted video URL."""
1299        self.to_screen('%s: Format %s not available' % (video_id, format))
1300
1301    def report_rtmp_download(self):
1302        """Indicate the download will use the RTMP protocol."""
1303        self.to_screen('RTMP download detected')
1304
1305    def _signature_cache_id(self, example_sig):
1306        """ Return a string representation of a signature """
1307        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1308
1309    @classmethod
1310    def _extract_player_info(cls, player_url):
1311        for player_re in cls._PLAYER_INFO_RE:
1312            id_m = re.search(player_re, player_url)
1313            if id_m:
1314                break
1315        else:
1316            raise ExtractorError('Cannot identify player %r' % player_url)
1317        return id_m.group('ext'), id_m.group('id')
1318
1319    def _extract_signature_function(self, video_id, player_url, example_sig):
1320        player_type, player_id = self._extract_player_info(player_url)
1321
1322        # Read from filesystem cache
1323        func_id = '%s_%s_%s' % (
1324            player_type, player_id, self._signature_cache_id(example_sig))
1325        assert os.path.basename(func_id) == func_id
1326
1327        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1328        if cache_spec is not None:
1329            return lambda s: ''.join(s[i] for i in cache_spec)
1330
1331        download_note = (
1332            'Downloading player %s' % player_url
1333            if self._downloader.params.get('verbose') else
1334            'Downloading %s player %s' % (player_type, player_id)
1335        )
1336        if player_type == 'js':
1337            code = self._download_webpage(
1338                player_url, video_id,
1339                note=download_note,
1340                errnote='Download of %s failed' % player_url)
1341            res = self._parse_sig_js(code)
1342        elif player_type == 'swf':
1343            urlh = self._request_webpage(
1344                player_url, video_id,
1345                note=download_note,
1346                errnote='Download of %s failed' % player_url)
1347            code = urlh.read()
1348            res = self._parse_sig_swf(code)
1349        else:
1350            assert False, 'Invalid player type %r' % player_type
1351
1352        test_string = ''.join(map(compat_chr, range(len(example_sig))))
1353        cache_res = res(test_string)
1354        cache_spec = [ord(c) for c in cache_res]
1355
1356        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1357        return res
1358
1359    def _print_sig_code(self, func, example_sig):
1360        def gen_sig_code(idxs):
1361            def _genslice(start, end, step):
1362                starts = '' if start == 0 else str(start)
1363                ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1364                steps = '' if step == 1 else (':%d' % step)
1365                return 's[%s%s%s]' % (starts, ends, steps)
1366
1367            step = None
1368            # Quelch pyflakes warnings - start will be set when step is set
1369            start = '(Never used)'
1370            for i, prev in zip(idxs[1:], idxs[:-1]):
1371                if step is not None:
1372                    if i - prev == step:
1373                        continue
1374                    yield _genslice(start, prev, step)
1375                    step = None
1376                    continue
1377                if i - prev in [-1, 1]:
1378                    step = i - prev
1379                    start = prev
1380                    continue
1381                else:
1382                    yield 's[%d]' % prev
1383            if step is None:
1384                yield 's[%d]' % i
1385            else:
1386                yield _genslice(start, i, step)
1387
1388        test_string = ''.join(map(compat_chr, range(len(example_sig))))
1389        cache_res = func(test_string)
1390        cache_spec = [ord(c) for c in cache_res]
1391        expr_code = ' + '.join(gen_sig_code(cache_spec))
1392        signature_id_tuple = '(%s)' % (
1393            ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1394        code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1395                '    return %s\n') % (signature_id_tuple, expr_code)
1396        self.to_screen('Extracted signature function:\n' + code)
1397
1398    def _parse_sig_js(self, jscode):
1399        funcname = self._search_regex(
1400            (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1401             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1402             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1403             r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1404             # Obsolete patterns
1405             r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1406             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1407             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1408             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1409             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1410             r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1413            jscode, 'Initial JS player signature function name', group='sig')
1414
1415        jsi = JSInterpreter(jscode)
1416        initial_function = jsi.extract_function(funcname)
1417        return lambda s: initial_function([s])
1418
1419    def _parse_sig_swf(self, file_contents):
1420        swfi = SWFInterpreter(file_contents)
1421        TARGET_CLASSNAME = 'SignatureDecipher'
1422        searched_class = swfi.extract_class(TARGET_CLASSNAME)
1423        initial_function = swfi.extract_function(searched_class, 'decipher')
1424        return lambda s: initial_function([s])
1425
1426    def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1427        """Turn the encrypted s field into a working signature"""
1428
1429        if player_url is None:
1430            raise ExtractorError('Cannot decrypt signature without player_url')
1431
1432        if player_url.startswith('//'):
1433            player_url = 'https:' + player_url
1434        elif not re.match(r'https?://', player_url):
1435            player_url = compat_urlparse.urljoin(
1436                'https://www.youtube.com', player_url)
1437        try:
1438            player_id = (player_url, self._signature_cache_id(s))
1439            if player_id not in self._player_cache:
1440                func = self._extract_signature_function(
1441                    video_id, player_url, s
1442                )
1443                self._player_cache[player_id] = func
1444            func = self._player_cache[player_id]
1445            if self._downloader.params.get('youtube_print_sig_code'):
1446                self._print_sig_code(func, s)
1447            return func(s)
1448        except Exception as e:
1449            tb = traceback.format_exc()
1450            raise ExtractorError(
1451                'Signature extraction failed: ' + tb, cause=e)
1452
1453    def _get_subtitles(self, video_id, webpage):
1454        try:
1455            subs_doc = self._download_xml(
1456                'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1457                video_id, note=False)
1458        except ExtractorError as err:
1459            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1460            return {}
1461
1462        sub_lang_list = {}
1463        for track in subs_doc.findall('track'):
1464            lang = track.attrib['lang_code']
1465            if lang in sub_lang_list:
1466                continue
1467            sub_formats = []
1468            for ext in self._SUBTITLE_FORMATS:
1469                params = compat_urllib_parse_urlencode({
1470                    'lang': lang,
1471                    'v': video_id,
1472                    'fmt': ext,
1473                    'name': track.attrib['name'].encode('utf-8'),
1474                })
1475                sub_formats.append({
1476                    'url': 'https://www.youtube.com/api/timedtext?' + params,
1477                    'ext': ext,
1478                })
1479            sub_lang_list[lang] = sub_formats
1480        if not sub_lang_list:
1481            self._downloader.report_warning('video doesn\'t have subtitles')
1482            return {}
1483        return sub_lang_list
1484
1485    def _get_ytplayer_config(self, video_id, webpage):
1486        patterns = (
1487            # User data may contain arbitrary character sequences that may affect
1488            # JSON extraction with regex, e.g. when '};' is contained the second
1489            # regex won't capture the whole JSON. Yet working around by trying more
1490            # concrete regex first keeping in mind proper quoted string handling
1491            # to be implemented in future that will replace this workaround (see
1492            # https://github.com/ytdl-org/youtube-dl/issues/7468,
1493            # https://github.com/ytdl-org/youtube-dl/pull/7599)
1494            r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1495            r';ytplayer\.config\s*=\s*({.+?});',
1496        )
1497        config = self._search_regex(
1498            patterns, webpage, 'ytplayer.config', default=None)
1499        if config:
1500            return self._parse_json(
1501                uppercase_escape(config), video_id, fatal=False)
1502
1503    def _get_automatic_captions(self, video_id, webpage):
1504        """We need the webpage for getting the captions url, pass it as an
1505           argument to speed up the process."""
1506        self.to_screen('%s: Looking for automatic captions' % video_id)
1507        player_config = self._get_ytplayer_config(video_id, webpage)
1508        err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1509        if not player_config:
1510            self._downloader.report_warning(err_msg)
1511            return {}
1512        try:
1513            args = player_config['args']
1514            caption_url = args.get('ttsurl')
1515            if caption_url:
1516                timestamp = args['timestamp']
1517                # We get the available subtitles
1518                list_params = compat_urllib_parse_urlencode({
1519                    'type': 'list',
1520                    'tlangs': 1,
1521                    'asrs': 1,
1522                })
1523                list_url = caption_url + '&' + list_params
1524                caption_list = self._download_xml(list_url, video_id)
1525                original_lang_node = caption_list.find('track')
1526                if original_lang_node is None:
1527                    self._downloader.report_warning('Video doesn\'t have automatic captions')
1528                    return {}
1529                original_lang = original_lang_node.attrib['lang_code']
1530                caption_kind = original_lang_node.attrib.get('kind', '')
1531
1532                sub_lang_list = {}
1533                for lang_node in caption_list.findall('target'):
1534                    sub_lang = lang_node.attrib['lang_code']
1535                    sub_formats = []
1536                    for ext in self._SUBTITLE_FORMATS:
1537                        params = compat_urllib_parse_urlencode({
1538                            'lang': original_lang,
1539                            'tlang': sub_lang,
1540                            'fmt': ext,
1541                            'ts': timestamp,
1542                            'kind': caption_kind,
1543                        })
1544                        sub_formats.append({
1545                            'url': caption_url + '&' + params,
1546                            'ext': ext,
1547                        })
1548                    sub_lang_list[sub_lang] = sub_formats
1549                return sub_lang_list
1550
1551            def make_captions(sub_url, sub_langs):
1552                parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1553                caption_qs = compat_parse_qs(parsed_sub_url.query)
1554                captions = {}
1555                for sub_lang in sub_langs:
1556                    sub_formats = []
1557                    for ext in self._SUBTITLE_FORMATS:
1558                        caption_qs.update({
1559                            'tlang': [sub_lang],
1560                            'fmt': [ext],
1561                        })
1562                        sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1563                            query=compat_urllib_parse_urlencode(caption_qs, True)))
1564                        sub_formats.append({
1565                            'url': sub_url,
1566                            'ext': ext,
1567                        })
1568                    captions[sub_lang] = sub_formats
1569                return captions
1570
1571            # New captions format as of 22.06.2017
1572            player_response = args.get('player_response')
1573            if player_response and isinstance(player_response, compat_str):
1574                player_response = self._parse_json(
1575                    player_response, video_id, fatal=False)
1576                if player_response:
1577                    renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1578                    base_url = renderer['captionTracks'][0]['baseUrl']
1579                    sub_lang_list = []
1580                    for lang in renderer['translationLanguages']:
1581                        lang_code = lang.get('languageCode')
1582                        if lang_code:
1583                            sub_lang_list.append(lang_code)
1584                    return make_captions(base_url, sub_lang_list)
1585
1586            # Some videos don't provide ttsurl but rather caption_tracks and
1587            # caption_translation_languages (e.g. 20LmZk1hakA)
1588            # Does not used anymore as of 22.06.2017
1589            caption_tracks = args['caption_tracks']
1590            caption_translation_languages = args['caption_translation_languages']
1591            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1592            sub_lang_list = []
1593            for lang in caption_translation_languages.split(','):
1594                lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1595                sub_lang = lang_qs.get('lc', [None])[0]
1596                if sub_lang:
1597                    sub_lang_list.append(sub_lang)
1598            return make_captions(caption_url, sub_lang_list)
1599        # An extractor error can be raise by the download process if there are
1600        # no automatic captions but there are subtitles
1601        except (KeyError, IndexError, ExtractorError):
1602            self._downloader.report_warning(err_msg)
1603            return {}
1604
1605    def _mark_watched(self, video_id, video_info, player_response):
1606        playback_url = url_or_none(try_get(
1607            player_response,
1608            lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1609            video_info, lambda x: x['videostats_playback_base_url'][0]))
1610        if not playback_url:
1611            return
1612        parsed_playback_url = compat_urlparse.urlparse(playback_url)
1613        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1614
1615        # cpn generation algorithm is reverse engineered from base.js.
1616        # In fact it works even with dummy cpn.
1617        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1618        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1619
1620        qs.update({
1621            'ver': ['2'],
1622            'cpn': [cpn],
1623        })
1624        playback_url = compat_urlparse.urlunparse(
1625            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1626
1627        self._download_webpage(
1628            playback_url, video_id, 'Marking watched',
1629            'Unable to mark watched', fatal=False)
1630
1631    @staticmethod
1632    def _extract_urls(webpage):
1633        # Embedded YouTube player
1634        entries = [
1635            unescapeHTML(mobj.group('url'))
1636            for mobj in re.finditer(r'''(?x)
1637            (?:
1638                <iframe[^>]+?src=|
1639                data-video-url=|
1640                <embed[^>]+?src=|
1641                embedSWF\(?:\s*|
1642                <object[^>]+data=|
1643                new\s+SWFObject\(
1644            )
1645            (["\'])
1646                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1647                (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1648            \1''', webpage)]
1649
1650        # lazyYT YouTube embed
1651        entries.extend(list(map(
1652            unescapeHTML,
1653            re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1654
1655        # Wordpress "YouTube Video Importer" plugin
1656        matches = re.findall(r'''(?x)<div[^>]+
1657            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1658            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1659        entries.extend(m[-1] for m in matches)
1660
1661        return entries
1662
1663    @staticmethod
1664    def _extract_url(webpage):
1665        urls = YoutubeIE._extract_urls(webpage)
1666        return urls[0] if urls else None
1667
1668    @classmethod
1669    def extract_id(cls, url):
1670        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1671        if mobj is None:
1672            raise ExtractorError('Invalid URL: %s' % url)
1673        video_id = mobj.group(2)
1674        return video_id
1675
1676    def _extract_chapters_from_json(self, webpage, video_id, duration):
1677        if not webpage:
1678            return
1679        player = self._parse_json(
1680            self._search_regex(
1681                r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
1682                'player args', default='{}'),
1683            video_id, fatal=False)
1684        if not player or not isinstance(player, dict):
1685            return
1686        watch_next_response = player.get('watch_next_response')
1687        if not isinstance(watch_next_response, compat_str):
1688            return
1689        response = self._parse_json(watch_next_response, video_id, fatal=False)
1690        if not response or not isinstance(response, dict):
1691            return
1692        chapters_list = try_get(
1693            response,
1694            lambda x: x['playerOverlays']
1695                       ['playerOverlayRenderer']
1696                       ['decoratedPlayerBarRenderer']
1697                       ['decoratedPlayerBarRenderer']
1698                       ['playerBar']
1699                       ['chapteredPlayerBarRenderer']
1700                       ['chapters'],
1701            list)
1702        if not chapters_list:
1703            return
1704
1705        def chapter_time(chapter):
1706            return float_or_none(
1707                try_get(
1708                    chapter,
1709                    lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1710                    int),
1711                scale=1000)
1712        chapters = []
1713        for next_num, chapter in enumerate(chapters_list, start=1):
1714            start_time = chapter_time(chapter)
1715            if start_time is None:
1716                continue
1717            end_time = (chapter_time(chapters_list[next_num])
1718                        if next_num < len(chapters_list) else duration)
1719            if end_time is None:
1720                continue
1721            title = try_get(
1722                chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1723                compat_str)
1724            chapters.append({
1725                'start_time': start_time,
1726                'end_time': end_time,
1727                'title': title,
1728            })
1729        return chapters
1730
1731    @staticmethod
1732    def _extract_chapters_from_description(description, duration):
1733        if not description:
1734            return None
1735        chapter_lines = re.findall(
1736            r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1737            description)
1738        if not chapter_lines:
1739            return None
1740        chapters = []
1741        for next_num, (chapter_line, time_point) in enumerate(
1742                chapter_lines, start=1):
1743            start_time = parse_duration(time_point)
1744            if start_time is None:
1745                continue
1746            if start_time > duration:
1747                break
1748            end_time = (duration if next_num == len(chapter_lines)
1749                        else parse_duration(chapter_lines[next_num][1]))
1750            if end_time is None:
1751                continue
1752            if end_time > duration:
1753                end_time = duration
1754            if start_time > end_time:
1755                break
1756            chapter_title = re.sub(
1757                r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1758            chapter_title = re.sub(r'\s+', ' ', chapter_title)
1759            chapters.append({
1760                'start_time': start_time,
1761                'end_time': end_time,
1762                'title': chapter_title,
1763            })
1764        return chapters
1765
1766    def _extract_chapters(self, webpage, description, video_id, duration):
1767        return (self._extract_chapters_from_json(webpage, video_id, duration)
1768                or self._extract_chapters_from_description(description, duration))
1769
1770    def _real_extract(self, url):
1771        url, smuggled_data = unsmuggle_url(url, {})
1772
1773        proto = (
1774            'http' if self._downloader.params.get('prefer_insecure', False)
1775            else 'https')
1776
1777        start_time = None
1778        end_time = None
1779        parsed_url = compat_urllib_parse_urlparse(url)
1780        for component in [parsed_url.fragment, parsed_url.query]:
1781            query = compat_parse_qs(component)
1782            if start_time is None and 't' in query:
1783                start_time = parse_duration(query['t'][0])
1784            if start_time is None and 'start' in query:
1785                start_time = parse_duration(query['start'][0])
1786            if end_time is None and 'end' in query:
1787                end_time = parse_duration(query['end'][0])
1788
1789        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1790        mobj = re.search(self._NEXT_URL_RE, url)
1791        if mobj:
1792            url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1793        video_id = self.extract_id(url)
1794
1795        # Get video webpage
1796        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1797        video_webpage, urlh = self._download_webpage_handle(url, video_id)
1798
1799        qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1800        video_id = qs.get('v', [None])[0] or video_id
1801
1802        # Attempt to extract SWF player URL
1803        mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1804        if mobj is not None:
1805            player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1806        else:
1807            player_url = None
1808
1809        dash_mpds = []
1810
1811        def add_dash_mpd(video_info):
1812            dash_mpd = video_info.get('dashmpd')
1813            if dash_mpd and dash_mpd[0] not in dash_mpds:
1814                dash_mpds.append(dash_mpd[0])
1815
1816        def add_dash_mpd_pr(pl_response):
1817            dash_mpd = url_or_none(try_get(
1818                pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1819                compat_str))
1820            if dash_mpd and dash_mpd not in dash_mpds:
1821                dash_mpds.append(dash_mpd)
1822
1823        is_live = None
1824        view_count = None
1825
1826        def extract_view_count(v_info):
1827            return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1828
1829        def extract_player_response(player_response, video_id):
1830            pl_response = str_or_none(player_response)
1831            if not pl_response:
1832                return
1833            pl_response = self._parse_json(pl_response, video_id, fatal=False)
1834            if isinstance(pl_response, dict):
1835                add_dash_mpd_pr(pl_response)
1836                return pl_response
1837
1838        player_response = {}
1839
1840        # Get video info
1841        video_info = {}
1842        embed_webpage = None
1843        if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1844                or re.search(r'player-age-gate-content">', video_webpage) is not None):
1845            age_gate = True
1846            # We simulate the access to the video from www.youtube.com/v/{video_id}
1847            # this can be viewed without login into Youtube
1848            url = proto + '://www.youtube.com/embed/%s' % video_id
1849            embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1850            data = compat_urllib_parse_urlencode({
1851                'video_id': video_id,
1852                'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1853                'sts': self._search_regex(
1854                    r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1855            })
1856            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1857            try:
1858                video_info_webpage = self._download_webpage(
1859                    video_info_url, video_id,
1860                    note='Refetching age-gated info webpage',
1861                    errnote='unable to download video info webpage')
1862            except ExtractorError:
1863                video_info_webpage = None
1864            if video_info_webpage:
1865                video_info = compat_parse_qs(video_info_webpage)
1866                pl_response = video_info.get('player_response', [None])[0]
1867                player_response = extract_player_response(pl_response, video_id)
1868                add_dash_mpd(video_info)
1869                view_count = extract_view_count(video_info)
1870        else:
1871            age_gate = False
1872            # Try looking directly into the video webpage
1873            ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1874            if ytplayer_config:
1875                args = ytplayer_config['args']
1876                if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1877                    # Convert to the same format returned by compat_parse_qs
1878                    video_info = dict((k, [v]) for k, v in args.items())
1879                    add_dash_mpd(video_info)
1880                # Rental video is not rented but preview is available (e.g.
1881                # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1882                # https://github.com/ytdl-org/youtube-dl/issues/10532)
1883                if not video_info and args.get('ypc_vid'):
1884                    return self.url_result(
1885                        args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1886                if args.get('livestream') == '1' or args.get('live_playback') == 1:
1887                    is_live = True
1888                if not player_response:
1889                    player_response = extract_player_response(args.get('player_response'), video_id)
1890            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1891                add_dash_mpd_pr(player_response)
1892
1893        def extract_unavailable_message():
1894            messages = []
1895            for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1896                msg = self._html_search_regex(
1897                    r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1898                    video_webpage, 'unavailable %s' % kind, default=None)
1899                if msg:
1900                    messages.append(msg)
1901            if messages:
1902                return '\n'.join(messages)
1903
1904        if not video_info and not player_response:
1905            unavailable_message = extract_unavailable_message()
1906            if not unavailable_message:
1907                unavailable_message = 'Unable to extract video data'
1908            raise ExtractorError(
1909                'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1910
1911        if not isinstance(video_info, dict):
1912            video_info = {}
1913
1914        video_details = try_get(
1915            player_response, lambda x: x['videoDetails'], dict) or {}
1916
1917        microformat = try_get(
1918            player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1919
1920        video_title = video_info.get('title', [None])[0] or video_details.get('title')
1921        if not video_title:
1922            self._downloader.report_warning('Unable to extract video title')
1923            video_title = '_'
1924
1925        description_original = video_description = get_element_by_id("eow-description", video_webpage)
1926        if video_description:
1927
1928            def replace_url(m):
1929                redir_url = compat_urlparse.urljoin(url, m.group(1))
1930                parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1931                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1932                    qs = compat_parse_qs(parsed_redir_url.query)
1933                    q = qs.get('q')
1934                    if q and q[0]:
1935                        return q[0]
1936                return redir_url
1937
1938            description_original = video_description = re.sub(r'''(?x)
1939                <a\s+
1940                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
1941                    (?:title|href)="([^"]+)"\s+
1942                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
1943                    class="[^"]*"[^>]*>
1944                [^<]+\.{3}\s*
1945                </a>
1946            ''', replace_url, video_description)
1947            video_description = clean_html(video_description)
1948        else:
1949            video_description = video_details.get('shortDescription')
1950            if video_description is None:
1951                video_description = self._html_search_meta('description', video_webpage)
1952
1953        if not smuggled_data.get('force_singlefeed', False):
1954            if not self._downloader.params.get('noplaylist'):
1955                multifeed_metadata_list = try_get(
1956                    player_response,
1957                    lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1958                    compat_str) or try_get(
1959                    video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1960                if multifeed_metadata_list:
1961                    entries = []
1962                    feed_ids = []
1963                    for feed in multifeed_metadata_list.split(','):
1964                        # Unquote should take place before split on comma (,) since textual
1965                        # fields may contain comma as well (see
1966                        # https://github.com/ytdl-org/youtube-dl/issues/8536)
1967                        feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1968
1969                        def feed_entry(name):
1970                            return try_get(feed_data, lambda x: x[name][0], compat_str)
1971
1972                        feed_id = feed_entry('id')
1973                        if not feed_id:
1974                            continue
1975                        feed_title = feed_entry('title')
1976                        title = video_title
1977                        if feed_title:
1978                            title += ' (%s)' % feed_title
1979                        entries.append({
1980                            '_type': 'url_transparent',
1981                            'ie_key': 'Youtube',
1982                            'url': smuggle_url(
1983                                '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1984                                {'force_singlefeed': True}),
1985                            'title': title,
1986                        })
1987                        feed_ids.append(feed_id)
1988                    self.to_screen(
1989                        'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1990                        % (', '.join(feed_ids), video_id))
1991                    return self.playlist_result(entries, video_id, video_title, video_description)
1992            else:
1993                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1994
1995        if view_count is None:
1996            view_count = extract_view_count(video_info)
1997        if view_count is None and video_details:
1998            view_count = int_or_none(video_details.get('viewCount'))
1999        if view_count is None and microformat:
2000            view_count = int_or_none(microformat.get('viewCount'))
2001
2002        if is_live is None:
2003            is_live = bool_or_none(video_details.get('isLive'))
2004
2005        # Check for "rental" videos
2006        if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2007            raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2008
2009        def _extract_filesize(media_url):
2010            return int_or_none(self._search_regex(
2011                r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2012
2013        streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2014        streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2015
2016        if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2017            self.report_rtmp_download()
2018            formats = [{
2019                'format_id': '_rtmp',
2020                'protocol': 'rtmp',
2021                'url': video_info['conn'][0],
2022                'player_url': player_url,
2023            }]
2024        elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2025            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2026            if 'rtmpe%3Dyes' in encoded_url_map:
2027                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2028            formats = []
2029            formats_spec = {}
2030            fmt_list = video_info.get('fmt_list', [''])[0]
2031            if fmt_list:
2032                for fmt in fmt_list.split(','):
2033                    spec = fmt.split('/')
2034                    if len(spec) > 1:
2035                        width_height = spec[1].split('x')
2036                        if len(width_height) == 2:
2037                            formats_spec[spec[0]] = {
2038                                'resolution': spec[1],
2039                                'width': int_or_none(width_height[0]),
2040                                'height': int_or_none(width_height[1]),
2041                            }
2042            for fmt in streaming_formats:
2043                itag = str_or_none(fmt.get('itag'))
2044                if not itag:
2045                    continue
2046                quality = fmt.get('quality')
2047                quality_label = fmt.get('qualityLabel') or quality
2048                formats_spec[itag] = {
2049                    'asr': int_or_none(fmt.get('audioSampleRate')),
2050                    'filesize': int_or_none(fmt.get('contentLength')),
2051                    'format_note': quality_label,
2052                    'fps': int_or_none(fmt.get('fps')),
2053                    'height': int_or_none(fmt.get('height')),
2054                    # bitrate for itag 43 is always 2147483647
2055                    'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2056                    'width': int_or_none(fmt.get('width')),
2057                }
2058
2059            for fmt in streaming_formats:
2060                if fmt.get('drmFamilies') or fmt.get('drm_families'):
2061                    continue
2062                url = url_or_none(fmt.get('url'))
2063
2064                if not url:
2065                    cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2066                    if not cipher:
2067                        continue
2068                    url_data = compat_parse_qs(cipher)
2069                    url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2070                    if not url:
2071                        continue
2072                else:
2073                    cipher = None
2074                    url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2075
2076                stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2077                # Unsupported FORMAT_STREAM_TYPE_OTF
2078                if stream_type == 3:
2079                    continue
2080
2081                format_id = fmt.get('itag') or url_data['itag'][0]
2082                if not format_id:
2083                    continue
2084                format_id = compat_str(format_id)
2085
2086                if cipher:
2087                    if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2088                        ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2089                        jsplayer_url_json = self._search_regex(
2090                            ASSETS_RE,
2091                            embed_webpage if age_gate else video_webpage,
2092                            'JS player URL (1)', default=None)
2093                        if not jsplayer_url_json and not age_gate:
2094                            # We need the embed website after all
2095                            if embed_webpage is None:
2096                                embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2097                                embed_webpage = self._download_webpage(
2098                                    embed_url, video_id, 'Downloading embed webpage')
2099                            jsplayer_url_json = self._search_regex(
2100                                ASSETS_RE, embed_webpage, 'JS player URL')
2101
2102                        player_url = json.loads(jsplayer_url_json)
2103                        if player_url is None:
2104                            player_url_json = self._search_regex(
2105                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2106                                video_webpage, 'age gate player URL')
2107                            player_url = json.loads(player_url_json)
2108
2109                    if 'sig' in url_data:
2110                        url += '&signature=' + url_data['sig'][0]
2111                    elif 's' in url_data:
2112                        encrypted_sig = url_data['s'][0]
2113
2114                        if self._downloader.params.get('verbose'):
2115                            if player_url is None:
2116                                player_desc = 'unknown'
2117                            else:
2118                                player_type, player_version = self._extract_player_info(player_url)
2119                                player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2120                            parts_sizes = self._signature_cache_id(encrypted_sig)
2121                            self.to_screen('{%s} signature length %s, %s' %
2122                                           (format_id, parts_sizes, player_desc))
2123
2124                        signature = self._decrypt_signature(
2125                            encrypted_sig, video_id, player_url, age_gate)
2126                        sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2127                        url += '&%s=%s' % (sp, signature)
2128                if 'ratebypass' not in url:
2129                    url += '&ratebypass=yes'
2130
2131                dct = {
2132                    'format_id': format_id,
2133                    'url': url,
2134                    'player_url': player_url,
2135                }
2136                if format_id in self._formats:
2137                    dct.update(self._formats[format_id])
2138                if format_id in formats_spec:
2139                    dct.update(formats_spec[format_id])
2140
2141                # Some itags are not included in DASH manifest thus corresponding formats will
2142                # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2143                # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2144                mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2145                width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2146
2147                if width is None:
2148                    width = int_or_none(fmt.get('width'))
2149                if height is None:
2150                    height = int_or_none(fmt.get('height'))
2151
2152                filesize = int_or_none(url_data.get(
2153                    'clen', [None])[0]) or _extract_filesize(url)
2154
2155                quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2156                quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2157
2158                tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2159                       or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2160                fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2161
2162                more_fields = {
2163                    'filesize': filesize,
2164                    'tbr': tbr,
2165                    'width': width,
2166                    'height': height,
2167                    'fps': fps,
2168                    'format_note': quality_label or quality,
2169                }
2170                for key, value in more_fields.items():
2171                    if value:
2172                        dct[key] = value
2173                type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2174                if type_:
2175                    type_split = type_.split(';')
2176                    kind_ext = type_split[0].split('/')
2177                    if len(kind_ext) == 2:
2178                        kind, _ = kind_ext
2179                        dct['ext'] = mimetype2ext(type_split[0])
2180                        if kind in ('audio', 'video'):
2181                            codecs = None
2182                            for mobj in re.finditer(
2183                                    r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2184                                if mobj.group('key') == 'codecs':
2185                                    codecs = mobj.group('val')
2186                                    break
2187                            if codecs:
2188                                dct.update(parse_codecs(codecs))
2189                if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2190                    dct['downloader_options'] = {
2191                        # Youtube throttles chunks >~10M
2192                        'http_chunk_size': 10485760,
2193                    }
2194                formats.append(dct)
2195        else:
2196            manifest_url = (
2197                url_or_none(try_get(
2198                    player_response,
2199                    lambda x: x['streamingData']['hlsManifestUrl'],
2200                    compat_str))
2201                or url_or_none(try_get(
2202                    video_info, lambda x: x['hlsvp'][0], compat_str)))
2203            if manifest_url:
2204                formats = []
2205                m3u8_formats = self._extract_m3u8_formats(
2206                    manifest_url, video_id, 'mp4', fatal=False)
2207                for a_format in m3u8_formats:
2208                    itag = self._search_regex(
2209                        r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2210                    if itag:
2211                        a_format['format_id'] = itag
2212                        if itag in self._formats:
2213                            dct = self._formats[itag].copy()
2214                            dct.update(a_format)
2215                            a_format = dct
2216                    a_format['player_url'] = player_url
2217                    # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2218                    a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2219                    formats.append(a_format)
2220            else:
2221                error_message = extract_unavailable_message()
2222                if not error_message:
2223                    error_message = clean_html(try_get(
2224                        player_response, lambda x: x['playabilityStatus']['reason'],
2225                        compat_str))
2226                if not error_message:
2227                    error_message = clean_html(
2228                        try_get(video_info, lambda x: x['reason'][0], compat_str))
2229                if error_message:
2230                    raise ExtractorError(error_message, expected=True)
2231                raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2232
2233        # uploader
2234        video_uploader = try_get(
2235            video_info, lambda x: x['author'][0],
2236            compat_str) or str_or_none(video_details.get('author'))
2237        if video_uploader:
2238            video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2239        else:
2240            self._downloader.report_warning('unable to extract uploader name')
2241
2242        # uploader_id
2243        video_uploader_id = None
2244        video_uploader_url = None
2245        mobj = re.search(
2246            r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2247            video_webpage)
2248        if mobj is not None:
2249            video_uploader_id = mobj.group('uploader_id')
2250            video_uploader_url = mobj.group('uploader_url')
2251        else:
2252            owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2253            if owner_profile_url:
2254                video_uploader_id = self._search_regex(
2255                    r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2256                    default=None)
2257                video_uploader_url = owner_profile_url
2258
2259        channel_id = (
2260            str_or_none(video_details.get('channelId'))
2261            or self._html_search_meta(
2262                'channelId', video_webpage, 'channel id', default=None)
2263            or self._search_regex(
2264                r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2265                video_webpage, 'channel id', default=None, group='id'))
2266        channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2267
2268        thumbnails = []
2269        thumbnails_list = try_get(
2270            video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2271        for t in thumbnails_list:
2272            if not isinstance(t, dict):
2273                continue
2274            thumbnail_url = url_or_none(t.get('url'))
2275            if not thumbnail_url:
2276                continue
2277            thumbnails.append({
2278                'url': thumbnail_url,
2279                'width': int_or_none(t.get('width')),
2280                'height': int_or_none(t.get('height')),
2281            })
2282
2283        if not thumbnails:
2284            video_thumbnail = None
2285            # We try first to get a high quality image:
2286            m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2287                                video_webpage, re.DOTALL)
2288            if m_thumb is not None:
2289                video_thumbnail = m_thumb.group(1)
2290            thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2291            if thumbnail_url:
2292                video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2293            if video_thumbnail:
2294                thumbnails.append({'url': video_thumbnail})
2295
2296        # upload date
2297        upload_date = self._html_search_meta(
2298            'datePublished', video_webpage, 'upload date', default=None)
2299        if not upload_date:
2300            upload_date = self._search_regex(
2301                [r'(?s)id="eow-date.*?>(.*?)</span>',
2302                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2303                video_webpage, 'upload date', default=None)
2304        if not upload_date:
2305            upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2306        upload_date = unified_strdate(upload_date)
2307
2308        video_license = self._html_search_regex(
2309            r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2310            video_webpage, 'license', default=None)
2311
2312        m_music = re.search(
2313            r'''(?x)
2314                <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2315                <ul[^>]*>\s*
2316                <li>(?P<title>.+?)
2317                by (?P<creator>.+?)
2318                (?:
2319                    \(.+?\)|
2320                    <a[^>]*
2321                        (?:
2322                            \bhref=["\']/red[^>]*>|             # drop possible
2323                            >\s*Listen ad-free with YouTube Red # YouTube Red ad
2324                        )
2325                    .*?
2326                )?</li
2327            ''',
2328            video_webpage)
2329        if m_music:
2330            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2331            video_creator = clean_html(m_music.group('creator'))
2332        else:
2333            video_alt_title = video_creator = None
2334
2335        def extract_meta(field):
2336            return self._html_search_regex(
2337                r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2338                video_webpage, field, default=None)
2339
2340        track = extract_meta('Song')
2341        artist = extract_meta('Artist')
2342        album = extract_meta('Album')
2343
2344        # Youtube Music Auto-generated description
2345        release_date = release_year = None
2346        if video_description:
2347            mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2348            if mobj:
2349                if not track:
2350                    track = mobj.group('track').strip()
2351                if not artist:
2352                    artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2353                if not album:
2354                    album = mobj.group('album'.strip())
2355                release_year = mobj.group('release_year')
2356                release_date = mobj.group('release_date')
2357                if release_date:
2358                    release_date = release_date.replace('-', '')
2359                    if not release_year:
2360                        release_year = int(release_date[:4])
2361                if release_year:
2362                    release_year = int(release_year)
2363
2364        m_episode = re.search(
2365            r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2366            video_webpage)
2367        if m_episode:
2368            series = unescapeHTML(m_episode.group('series'))
2369            season_number = int(m_episode.group('season'))
2370            episode_number = int(m_episode.group('episode'))
2371        else:
2372            series = season_number = episode_number = None
2373
2374        m_cat_container = self._search_regex(
2375            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2376            video_webpage, 'categories', default=None)
2377        category = None
2378        if m_cat_container:
2379            category = self._html_search_regex(
2380                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2381                default=None)
2382        if not category:
2383            category = try_get(
2384                microformat, lambda x: x['category'], compat_str)
2385        video_categories = None if category is None else [category]
2386
2387        video_tags = [
2388            unescapeHTML(m.group('content'))
2389            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2390        if not video_tags:
2391            video_tags = try_get(video_details, lambda x: x['keywords'], list)
2392
2393        def _extract_count(count_name):
2394            return str_to_int(self._search_regex(
2395                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2396                % re.escape(count_name),
2397                video_webpage, count_name, default=None))
2398
2399        like_count = _extract_count('like')
2400        dislike_count = _extract_count('dislike')
2401
2402        if view_count is None:
2403            view_count = str_to_int(self._search_regex(
2404                r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2405                'view count', default=None))
2406
2407        average_rating = (
2408            float_or_none(video_details.get('averageRating'))
2409            or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2410
2411        # subtitles
2412        video_subtitles = self.extract_subtitles(video_id, video_webpage)
2413        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2414
2415        video_duration = try_get(
2416            video_info, lambda x: int_or_none(x['length_seconds'][0]))
2417        if not video_duration:
2418            video_duration = int_or_none(video_details.get('lengthSeconds'))
2419        if not video_duration:
2420            video_duration = parse_duration(self._html_search_meta(
2421                'duration', video_webpage, 'video duration'))
2422
2423        # annotations
2424        video_annotations = None
2425        if self._downloader.params.get('writeannotations', False):
2426            xsrf_token = self._search_regex(
2427                r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2428                video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2429            invideo_url = try_get(
2430                player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2431            if xsrf_token and invideo_url:
2432                xsrf_field_name = self._search_regex(
2433                    r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2434                    video_webpage, 'xsrf field name',
2435                    group='xsrf_field_name', default='session_token')
2436                video_annotations = self._download_webpage(
2437                    self._proto_relative_url(invideo_url),
2438                    video_id, note='Downloading annotations',
2439                    errnote='Unable to download video annotations', fatal=False,
2440                    data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2441
2442        chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2443
2444        # Look for the DASH manifest
2445        if self._downloader.params.get('youtube_include_dash_manifest', True):
2446            dash_mpd_fatal = True
2447            for mpd_url in dash_mpds:
2448                dash_formats = {}
2449                try:
2450                    def decrypt_sig(mobj):
2451                        s = mobj.group(1)
2452                        dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2453                        return '/signature/%s' % dec_s
2454
2455                    mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2456
2457                    for df in self._extract_mpd_formats(
2458                            mpd_url, video_id, fatal=dash_mpd_fatal,
2459                            formats_dict=self._formats):
2460                        if not df.get('filesize'):
2461                            df['filesize'] = _extract_filesize(df['url'])
2462                        # Do not overwrite DASH format found in some previous DASH manifest
2463                        if df['format_id'] not in dash_formats:
2464                            dash_formats[df['format_id']] = df
2465                        # Additional DASH manifests may end up in HTTP Error 403 therefore
2466                        # allow them to fail without bug report message if we already have
2467                        # some DASH manifest succeeded. This is temporary workaround to reduce
2468                        # burst of bug reports until we figure out the reason and whether it
2469                        # can be fixed at all.
2470                        dash_mpd_fatal = False
2471                except (ExtractorError, KeyError) as e:
2472                    self.report_warning(
2473                        'Skipping DASH manifest: %r' % e, video_id)
2474                if dash_formats:
2475                    # Remove the formats we found through non-DASH, they
2476                    # contain less info and it can be wrong, because we use
2477                    # fixed values (for example the resolution). See
2478                    # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2479                    # example.
2480                    formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2481                    formats.extend(dash_formats.values())
2482
2483        # Check for malformed aspect ratio
2484        stretched_m = re.search(
2485            r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2486            video_webpage)
2487        if stretched_m:
2488            w = float(stretched_m.group('w'))
2489            h = float(stretched_m.group('h'))
2490            # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2491            # We will only process correct ratios.
2492            if w > 0 and h > 0:
2493                ratio = w / h
2494                for f in formats:
2495                    if f.get('vcodec') != 'none':
2496                        f['stretched_ratio'] = ratio
2497
2498        if not formats:
2499            if 'reason' in video_info:
2500                if 'The uploader has not made this video available in your country.' in video_info['reason']:
2501                    regions_allowed = self._html_search_meta(
2502                        'regionsAllowed', video_webpage, default=None)
2503                    countries = regions_allowed.split(',') if regions_allowed else None
2504                    self.raise_geo_restricted(
2505                        msg=video_info['reason'][0], countries=countries)
2506                reason = video_info['reason'][0]
2507                if 'Invalid parameters' in reason:
2508                    unavailable_message = extract_unavailable_message()
2509                    if unavailable_message:
2510                        reason = unavailable_message
2511                raise ExtractorError(
2512                    'YouTube said: %s' % reason,
2513                    expected=True, video_id=video_id)
2514            if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2515                raise ExtractorError('This video is DRM protected.', expected=True)
2516
2517        self._sort_formats(formats)
2518
2519        self.mark_watched(video_id, video_info, player_response)
2520
2521        return {
2522            'id': video_id,
2523            'uploader': video_uploader,
2524            'uploader_id': video_uploader_id,
2525            'uploader_url': video_uploader_url,
2526            'channel_id': channel_id,
2527            'channel_url': channel_url,
2528            'upload_date': upload_date,
2529            'license': video_license,
2530            'creator': video_creator or artist,
2531            'title': video_title,
2532            'alt_title': video_alt_title or track,
2533            'thumbnails': thumbnails,
2534            'description': video_description,
2535            'categories': video_categories,
2536            'tags': video_tags,
2537            'subtitles': video_subtitles,
2538            'automatic_captions': automatic_captions,
2539            'duration': video_duration,
2540            'age_limit': 18 if age_gate else 0,
2541            'annotations': video_annotations,
2542            'chapters': chapters,
2543            'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2544            'view_count': view_count,
2545            'like_count': like_count,
2546            'dislike_count': dislike_count,
2547            'average_rating': average_rating,
2548            'formats': formats,
2549            'is_live': is_live,
2550            'start_time': start_time,
2551            'end_time': end_time,
2552            'series': series,
2553            'season_number': season_number,
2554            'episode_number': episode_number,
2555            'track': track,
2556            'artist': artist,
2557            'album': album,
2558            'release_date': release_date,
2559            'release_year': release_year,
2560        }
2561
2562
2563class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2564    IE_DESC = 'YouTube.com playlists'
2565    _VALID_URL = r"""(?x)(?:
2566                        (?:https?://)?
2567                        (?:\w+\.)?
2568                        (?:
2569                            (?:
2570                                youtube(?:kids)?\.com|
2571                                invidio\.us
2572                            )
2573                            /
2574                            (?:
2575                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2576                               \? (?:.*?[&;])*? (?:p|a|list)=
2577                            |  p/
2578                            )|
2579                            youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2580                        )
2581                        (
2582                            (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2583                            # Top tracks, they can also include dots
2584                            |(?:MC)[\w\.]*
2585                        )
2586                        .*
2587                     |
2588                        (%(playlist_id)s)
2589                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2590    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2591    _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2592    _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2593    IE_NAME = 'youtube:playlist'
2594    _TESTS = [{
2595        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2596        'info_dict': {
2597            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2598            'uploader': 'Sergey M.',
2599            'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2600            'title': 'youtube-dl public playlist',
2601        },
2602        'playlist_count': 1,
2603    }, {
2604        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2605        'info_dict': {
2606            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2607            'uploader': 'Sergey M.',
2608            'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2609            'title': 'youtube-dl empty playlist',
2610        },
2611        'playlist_count': 0,
2612    }, {
2613        'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2614        'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2615        'info_dict': {
2616            'title': '29C3: Not my department',
2617            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2618            'uploader': 'Christiaan008',
2619            'uploader_id': 'ChRiStIaAn008',
2620        },
2621        'playlist_count': 96,
2622    }, {
2623        'note': 'issue #673',
2624        'url': 'PLBB231211A4F62143',
2625        'info_dict': {
2626            'title': '[OLD]Team Fortress 2 (Class-based LP)',
2627            'id': 'PLBB231211A4F62143',
2628            'uploader': 'Wickydoo',
2629            'uploader_id': 'Wickydoo',
2630        },
2631        'playlist_mincount': 26,
2632    }, {
2633        'note': 'Large playlist',
2634        'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2635        'info_dict': {
2636            'title': 'Uploads from Cauchemar',
2637            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2638            'uploader': 'Cauchemar',
2639            'uploader_id': 'Cauchemar89',
2640        },
2641        'playlist_mincount': 799,
2642    }, {
2643        'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2644        'info_dict': {
2645            'title': 'YDL_safe_search',
2646            'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2647        },
2648        'playlist_count': 2,
2649        'skip': 'This playlist is private',
2650    }, {
2651        'note': 'embedded',
2652        'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2653        'playlist_count': 4,
2654        'info_dict': {
2655            'title': 'JODA15',
2656            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2657            'uploader': 'milan',
2658            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2659        }
2660    }, {
2661        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2662        'playlist_mincount': 485,
2663        'info_dict': {
2664            'title': '2018 Chinese New Singles (11/6 updated)',
2665            'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2666            'uploader': 'LBK',
2667            'uploader_id': 'sdragonfang',
2668        }
2669    }, {
2670        'note': 'Embedded SWF player',
2671        'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2672        'playlist_count': 4,
2673        'info_dict': {
2674            'title': 'JODA7',
2675            'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2676        },
2677        'skip': 'This playlist does not exist',
2678    }, {
2679        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2680        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2681        'info_dict': {
2682            'title': 'Uploads from Interstellar Movie',
2683            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2684            'uploader': 'Interstellar Movie',
2685            'uploader_id': 'InterstellarMovie1',
2686        },
2687        'playlist_mincount': 21,
2688    }, {
2689        # Playlist URL that does not actually serve a playlist
2690        'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2691        'info_dict': {
2692            'id': 'FqZTN594JQw',
2693            'ext': 'webm',
2694            'title': "Smiley's People 01 detective, Adventure Series, Action",
2695            'uploader': 'STREEM',
2696            'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2697            'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2698            'upload_date': '20150526',
2699            'license': 'Standard YouTube License',
2700            'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2701            'categories': ['People & Blogs'],
2702            'tags': list,
2703            'view_count': int,
2704            'like_count': int,
2705            'dislike_count': int,
2706        },
2707        'params': {
2708            'skip_download': True,
2709        },
2710        'skip': 'This video is not available.',
2711        'add_ie': [YoutubeIE.ie_key()],
2712    }, {
2713        'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2714        'info_dict': {
2715            'id': 'yeWKywCrFtk',
2716            'ext': 'mp4',
2717            'title': 'Small Scale Baler and Braiding Rugs',
2718            'uploader': 'Backus-Page House Museum',
2719            'uploader_id': 'backuspagemuseum',
2720            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2721            'upload_date': '20161008',
2722            'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2723            'categories': ['Nonprofits & Activism'],
2724            'tags': list,
2725            'like_count': int,
2726            'dislike_count': int,
2727        },
2728        'params': {
2729            'noplaylist': True,
2730            'skip_download': True,
2731        },
2732    }, {
2733        # https://github.com/ytdl-org/youtube-dl/issues/21844
2734        'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2735        'info_dict': {
2736            'title': 'Data Analysis with Dr Mike Pound',
2737            'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2738            'uploader_id': 'Computerphile',
2739            'uploader': 'Computerphile',
2740        },
2741        'playlist_mincount': 11,
2742    }, {
2743        'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2744        'only_matching': True,
2745    }, {
2746        'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2747        'only_matching': True,
2748    }, {
2749        # music album playlist
2750        'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2751        'only_matching': True,
2752    }, {
2753        'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2754        'only_matching': True,
2755    }, {
2756        'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2757        'only_matching': True,
2758    }]
2759
2760    def _real_initialize(self):
2761        self._login()
2762
2763    def extract_videos_from_page(self, page):
2764        ids_in_page = []
2765        titles_in_page = []
2766
2767        for item in re.findall(
2768                r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2769            attrs = extract_attributes(item)
2770            video_id = attrs['data-video-id']
2771            video_title = unescapeHTML(attrs.get('data-title'))
2772            if video_title:
2773                video_title = video_title.strip()
2774            ids_in_page.append(video_id)
2775            titles_in_page.append(video_title)
2776
2777        # Fallback with old _VIDEO_RE
2778        self.extract_videos_from_page_impl(
2779            self._VIDEO_RE, page, ids_in_page, titles_in_page)
2780
2781        # Relaxed fallbacks
2782        self.extract_videos_from_page_impl(
2783            r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2784            ids_in_page, titles_in_page)
2785        self.extract_videos_from_page_impl(
2786            r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2787            ids_in_page, titles_in_page)
2788
2789        return zip(ids_in_page, titles_in_page)
2790
2791    def _extract_mix(self, playlist_id):
2792        # The mixes are generated from a single video
2793        # the id of the playlist is just 'RD' + video_id
2794        ids = []
2795        last_id = playlist_id[-11:]
2796        for n in itertools.count(1):
2797            url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2798            webpage = self._download_webpage(
2799                url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2800            new_ids = orderedSet(re.findall(
2801                r'''(?xs)data-video-username=".*?".*?
2802                           href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2803                webpage))
2804            # Fetch new pages until all the videos are repeated, it seems that
2805            # there are always 51 unique videos.
2806            new_ids = [_id for _id in new_ids if _id not in ids]
2807            if not new_ids:
2808                break
2809            ids.extend(new_ids)
2810            last_id = ids[-1]
2811
2812        url_results = self._ids_to_results(ids)
2813
2814        search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2815        title_span = (
2816            search_title('playlist-title')
2817            or search_title('title long-title')
2818            or search_title('title'))
2819        title = clean_html(title_span)
2820
2821        return self.playlist_result(url_results, playlist_id, title)
2822
2823    def _extract_playlist(self, playlist_id):
2824        url = self._TEMPLATE_URL % playlist_id
2825        page = self._download_webpage(url, playlist_id)
2826
2827        # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2828        for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2829            match = match.strip()
2830            # Check if the playlist exists or is private
2831            mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2832            if mobj:
2833                reason = mobj.group('reason')
2834                message = 'This playlist %s' % reason
2835                if 'private' in reason:
2836                    message += ', use --username or --netrc to access it'
2837                message += '.'
2838                raise ExtractorError(message, expected=True)
2839            elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2840                raise ExtractorError(
2841                    'Invalid parameters. Maybe URL is incorrect.',
2842                    expected=True)
2843            elif re.match(r'[^<]*Choose your language[^<]*', match):
2844                continue
2845            else:
2846                self.report_warning('Youtube gives an alert message: ' + match)
2847
2848        playlist_title = self._html_search_regex(
2849            r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2850            page, 'title', default=None)
2851
2852        _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2853        uploader = self._html_search_regex(
2854            r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2855            page, 'uploader', default=None)
2856        mobj = re.search(
2857            r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2858            page)
2859        if mobj:
2860            uploader_id = mobj.group('uploader_id')
2861            uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2862        else:
2863            uploader_id = uploader_url = None
2864
2865        has_videos = True
2866
2867        if not playlist_title:
2868            try:
2869                # Some playlist URLs don't actually serve a playlist (e.g.
2870                # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2871                next(self._entries(page, playlist_id))
2872            except StopIteration:
2873                has_videos = False
2874
2875        playlist = self.playlist_result(
2876            self._entries(page, playlist_id), playlist_id, playlist_title)
2877        playlist.update({
2878            'uploader': uploader,
2879            'uploader_id': uploader_id,
2880            'uploader_url': uploader_url,
2881        })
2882
2883        return has_videos, playlist
2884
2885    def _check_download_just_video(self, url, playlist_id):
2886        # Check if it's a video-specific URL
2887        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2888        video_id = query_dict.get('v', [None])[0] or self._search_regex(
2889            r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2890            'video id', default=None)
2891        if video_id:
2892            if self._downloader.params.get('noplaylist'):
2893                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2894                return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2895            else:
2896                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2897                return video_id, None
2898        return None, None
2899
2900    def _real_extract(self, url):
2901        # Extract playlist id
2902        mobj = re.match(self._VALID_URL, url)
2903        if mobj is None:
2904            raise ExtractorError('Invalid URL: %s' % url)
2905        playlist_id = mobj.group(1) or mobj.group(2)
2906
2907        video_id, video = self._check_download_just_video(url, playlist_id)
2908        if video:
2909            return video
2910
2911        if playlist_id.startswith(('RD', 'UL', 'PU')):
2912            # Mixes require a custom extraction process
2913            return self._extract_mix(playlist_id)
2914
2915        has_videos, playlist = self._extract_playlist(playlist_id)
2916        if has_videos or not video_id:
2917            return playlist
2918
2919        # Some playlist URLs don't actually serve a playlist (see
2920        # https://github.com/ytdl-org/youtube-dl/issues/10537).
2921        # Fallback to plain video extraction if there is a video id
2922        # along with playlist id.
2923        return self.url_result(video_id, 'Youtube', video_id=video_id)
2924
2925
2926class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2927    IE_DESC = 'YouTube.com channels'
2928    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2929    _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2930    _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2931    IE_NAME = 'youtube:channel'
2932    _TESTS = [{
2933        'note': 'paginated channel',
2934        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2935        'playlist_mincount': 91,
2936        'info_dict': {
2937            'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2938            'title': 'Uploads from lex will',
2939            'uploader': 'lex will',
2940            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2941        }
2942    }, {
2943        'note': 'Age restricted channel',
2944        # from https://www.youtube.com/user/DeusExOfficial
2945        'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2946        'playlist_mincount': 64,
2947        'info_dict': {
2948            'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2949            'title': 'Uploads from Deus Ex',
2950            'uploader': 'Deus Ex',
2951            'uploader_id': 'DeusExOfficial',
2952        },
2953    }, {
2954        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2955        'only_matching': True,
2956    }, {
2957        'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2958        'only_matching': True,
2959    }]
2960
2961    @classmethod
2962    def suitable(cls, url):
2963        return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2964                else super(YoutubeChannelIE, cls).suitable(url))
2965
2966    def _build_template_url(self, url, channel_id):
2967        return self._TEMPLATE_URL % channel_id
2968
2969    def _real_extract(self, url):
2970        channel_id = self._match_id(url)
2971
2972        url = self._build_template_url(url, channel_id)
2973
2974        # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2975        # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2976        # otherwise fallback on channel by page extraction
2977        channel_page = self._download_webpage(
2978            url + '?view=57', channel_id,
2979            'Downloading channel page', fatal=False)
2980        if channel_page is False:
2981            channel_playlist_id = False
2982        else:
2983            channel_playlist_id = self._html_search_meta(
2984                'channelId', channel_page, 'channel id', default=None)
2985            if not channel_playlist_id:
2986                channel_url = self._html_search_meta(
2987                    ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2988                    channel_page, 'channel url', default=None)
2989                if channel_url:
2990                    channel_playlist_id = self._search_regex(
2991                        r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2992                        channel_url, 'channel id', default=None)
2993        if channel_playlist_id and channel_playlist_id.startswith('UC'):
2994            playlist_id = 'UU' + channel_playlist_id[2:]
2995            return self.url_result(
2996                compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2997
2998        channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2999        autogenerated = re.search(r'''(?x)
3000                class="[^"]*?(?:
3001                    channel-header-autogenerated-label|
3002                    yt-channel-title-autogenerated
3003                )[^"]*"''', channel_page) is not None
3004
3005        if autogenerated:
3006            # The videos are contained in a single page
3007            # the ajax pages can't be used, they are empty
3008            entries = [
3009                self.url_result(
3010                    video_id, 'Youtube', video_id=video_id,
3011                    video_title=video_title)
3012                for video_id, video_title in self.extract_videos_from_page(channel_page)]
3013            return self.playlist_result(entries, channel_id)
3014
3015        try:
3016            next(self._entries(channel_page, channel_id))
3017        except StopIteration:
3018            alert_message = self._html_search_regex(
3019                r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3020                channel_page, 'alert', default=None, group='alert')
3021            if alert_message:
3022                raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3023
3024        return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3025
3026
3027class YoutubeUserIE(YoutubeChannelIE):
3028    IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3029    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3030    _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3031    IE_NAME = 'youtube:user'
3032
3033    _TESTS = [{
3034        'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3035        'playlist_mincount': 320,
3036        'info_dict': {
3037            'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3038            'title': 'Uploads from The Linux Foundation',
3039            'uploader': 'The Linux Foundation',
3040            'uploader_id': 'TheLinuxFoundation',
3041        }
3042    }, {
3043        # Only available via https://www.youtube.com/c/12minuteathlete/videos
3044        # but not https://www.youtube.com/user/12minuteathlete/videos
3045        'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3046        'playlist_mincount': 249,
3047        'info_dict': {
3048            'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3049            'title': 'Uploads from 12 Minute Athlete',
3050            'uploader': '12 Minute Athlete',
3051            'uploader_id': 'the12minuteathlete',
3052        }
3053    }, {
3054        'url': 'ytuser:phihag',
3055        'only_matching': True,
3056    }, {
3057        'url': 'https://www.youtube.com/c/gametrailers',
3058        'only_matching': True,
3059    }, {
3060        'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3061        'only_matching': True,
3062    }, {
3063        'url': 'https://www.youtube.com/gametrailers',
3064        'only_matching': True,
3065    }, {
3066        # This channel is not available, geo restricted to JP
3067        'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3068        'only_matching': True,
3069    }]
3070
3071    @classmethod
3072    def suitable(cls, url):
3073        # Don't return True if the url can be extracted with other youtube
3074        # extractor, the regex would is too permissive and it would match.
3075        other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3076        if any(ie.suitable(url) for ie in other_yt_ies):
3077            return False
3078        else:
3079            return super(YoutubeUserIE, cls).suitable(url)
3080
3081    def _build_template_url(self, url, channel_id):
3082        mobj = re.match(self._VALID_URL, url)
3083        return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3084
3085
3086class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3087    IE_DESC = 'YouTube.com live streams'
3088    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3089    IE_NAME = 'youtube:live'
3090
3091    _TESTS = [{
3092        'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3093        'info_dict': {
3094            'id': 'a48o2S1cPoo',
3095            'ext': 'mp4',
3096            'title': 'The Young Turks - Live Main Show',
3097            'uploader': 'The Young Turks',
3098            'uploader_id': 'TheYoungTurks',
3099            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3100            'upload_date': '20150715',
3101            'license': 'Standard YouTube License',
3102            'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3103            'categories': ['News & Politics'],
3104            'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3105            'like_count': int,
3106            'dislike_count': int,
3107        },
3108        'params': {
3109            'skip_download': True,
3110        },
3111    }, {
3112        'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3113        'only_matching': True,
3114    }, {
3115        'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3116        'only_matching': True,
3117    }, {
3118        'url': 'https://www.youtube.com/TheYoungTurks/live',
3119        'only_matching': True,
3120    }]
3121
3122    def _real_extract(self, url):
3123        mobj = re.match(self._VALID_URL, url)
3124        channel_id = mobj.group('id')
3125        base_url = mobj.group('base_url')
3126        webpage = self._download_webpage(url, channel_id, fatal=False)
3127        if webpage:
3128            page_type = self._og_search_property(
3129                'type', webpage, 'page type', default='')
3130            video_id = self._html_search_meta(
3131                'videoId', webpage, 'video id', default=None)
3132            if page_type.startswith('video') and video_id and re.match(
3133                    r'^[0-9A-Za-z_-]{11}$', video_id):
3134                return self.url_result(video_id, YoutubeIE.ie_key())
3135        return self.url_result(base_url)
3136
3137
3138class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3139    IE_DESC = 'YouTube.com user/channel playlists'
3140    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3141    IE_NAME = 'youtube:playlists'
3142
3143    _TESTS = [{
3144        'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3145        'playlist_mincount': 4,
3146        'info_dict': {
3147            'id': 'ThirstForScience',
3148            'title': 'ThirstForScience',
3149        },
3150    }, {
3151        # with "Load more" button
3152        'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3153        'playlist_mincount': 70,
3154        'info_dict': {
3155            'id': 'igorkle1',
3156            'title': 'Игорь Клейнер',
3157        },
3158    }, {
3159        'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3160        'playlist_mincount': 17,
3161        'info_dict': {
3162            'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3163            'title': 'Chem Player',
3164        },
3165        'skip': 'Blocked',
3166    }, {
3167        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3168        'only_matching': True,
3169    }]
3170
3171
3172class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3173    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3174
3175
3176class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3177    IE_DESC = 'YouTube.com searches'
3178    # there doesn't appear to be a real limit, for example if you search for
3179    # 'python' you get more than 8.000.000 results
3180    _MAX_RESULTS = float('inf')
3181    IE_NAME = 'youtube:search'
3182    _SEARCH_KEY = 'ytsearch'
3183    _SEARCH_PARAMS = None
3184    _TESTS = []
3185
3186    def _entries(self, query, n):
3187        data = {
3188            'context': {
3189                'client': {
3190                    'clientName': 'WEB',
3191                    'clientVersion': '2.20201021.03.00',
3192                }
3193            },
3194            'query': query,
3195        }
3196        if self._SEARCH_PARAMS:
3197            data['params'] = self._SEARCH_PARAMS
3198        total = 0
3199        for page_num in itertools.count(1):
3200            search = self._download_json(
3201                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3202                video_id='query "%s"' % query,
3203                note='Downloading page %s' % page_num,
3204                errnote='Unable to download API page', fatal=False,
3205                data=json.dumps(data).encode('utf8'),
3206                headers={'content-type': 'application/json'})
3207            if not search:
3208                break
3209            slr_contents = try_get(
3210                search,
3211                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3212                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3213                list)
3214            if not slr_contents:
3215                break
3216            isr_contents = try_get(
3217                slr_contents,
3218                lambda x: x[0]['itemSectionRenderer']['contents'],
3219                list)
3220            if not isr_contents:
3221                break
3222            for content in isr_contents:
3223                if not isinstance(content, dict):
3224                    continue
3225                video = content.get('videoRenderer')
3226                if not isinstance(video, dict):
3227                    continue
3228                video_id = video.get('videoId')
3229                if not video_id:
3230                    continue
3231                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3232                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3233                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3234                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3235                view_count = int_or_none(self._search_regex(
3236                    r'^(\d+)', re.sub(r'\s', '', view_count_text),
3237                    'view count', default=None))
3238                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3239                total += 1
3240                yield {
3241                    '_type': 'url_transparent',
3242                    'ie_key': YoutubeIE.ie_key(),
3243                    'id': video_id,
3244                    'url': video_id,
3245                    'title': title,
3246                    'description': description,
3247                    'duration': duration,
3248                    'view_count': view_count,
3249                    'uploader': uploader,
3250                }
3251                if total == n:
3252                    return
3253            token = try_get(
3254                slr_contents,
3255                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3256                compat_str)
3257            if not token:
3258                break
3259            data['continuation'] = token
3260
3261    def _get_n_results(self, query, n):
3262        """Get a specified number of results for a query"""
3263        return self.playlist_result(self._entries(query, n), query)
3264
3265
3266class YoutubeSearchDateIE(YoutubeSearchIE):
3267    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3268    _SEARCH_KEY = 'ytsearchdate'
3269    IE_DESC = 'YouTube.com searches, newest videos first'
3270    _SEARCH_PARAMS = 'CAI%3D'
3271
3272
3273class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3274    IE_DESC = 'YouTube.com search URLs'
3275    IE_NAME = 'youtube:search_url'
3276    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3277    _TESTS = [{
3278        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3279        'playlist_mincount': 5,
3280        'info_dict': {
3281            'title': 'youtube-dl test video',
3282        }
3283    }, {
3284        'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3285        'only_matching': True,
3286    }]
3287
3288    def _real_extract(self, url):
3289        mobj = re.match(self._VALID_URL, url)
3290        query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3291        webpage = self._download_webpage(url, query)
3292        return self.playlist_result(self._process_page(webpage), playlist_title=query)
3293
3294
3295class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3296    IE_DESC = 'YouTube.com (multi-season) shows'
3297    _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3298    IE_NAME = 'youtube:show'
3299    _TESTS = [{
3300        'url': 'https://www.youtube.com/show/airdisasters',
3301        'playlist_mincount': 5,
3302        'info_dict': {
3303            'id': 'airdisasters',
3304            'title': 'Air Disasters',
3305        }
3306    }]
3307
3308    def _real_extract(self, url):
3309        playlist_id = self._match_id(url)
3310        return super(YoutubeShowIE, self)._real_extract(
3311            'https://www.youtube.com/show/%s/playlists' % playlist_id)
3312
3313
3314class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3315    """
3316    Base class for feed extractors
3317    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3318    """
3319    _LOGIN_REQUIRED = True
3320
3321    @property
3322    def IE_NAME(self):
3323        return 'youtube:%s' % self._FEED_NAME
3324
3325    def _real_initialize(self):
3326        self._login()
3327
3328    def _entries(self, page):
3329        # The extraction process is the same as for playlists, but the regex
3330        # for the video ids doesn't contain an index
3331        ids = []
3332        more_widget_html = content_html = page
3333        for page_num in itertools.count(1):
3334            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3335
3336            # 'recommended' feed has infinite 'load more' and each new portion spins
3337            # the same videos in (sometimes) slightly different order, so we'll check
3338            # for unicity and break when portion has no new videos
3339            new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3340            if not new_ids:
3341                break
3342
3343            ids.extend(new_ids)
3344
3345            for entry in self._ids_to_results(new_ids):
3346                yield entry
3347
3348            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3349            if not mobj:
3350                break
3351
3352            more = self._download_json(
3353                'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3354                'Downloading page #%s' % page_num,
3355                transform_source=uppercase_escape,
3356                headers=self._YOUTUBE_CLIENT_HEADERS)
3357            content_html = more['content_html']
3358            more_widget_html = more['load_more_widget_html']
3359
3360    def _real_extract(self, url):
3361        page = self._download_webpage(
3362            'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3363            self._PLAYLIST_TITLE)
3364        return self.playlist_result(
3365            self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3366
3367
3368class YoutubeWatchLaterIE(YoutubePlaylistIE):
3369    IE_NAME = 'youtube:watchlater'
3370    IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3371    _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3372
3373    _TESTS = [{
3374        'url': 'https://www.youtube.com/playlist?list=WL',
3375        'only_matching': True,
3376    }, {
3377        'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3378        'only_matching': True,
3379    }]
3380
3381    def _real_extract(self, url):
3382        _, video = self._check_download_just_video(url, 'WL')
3383        if video:
3384            return video
3385        _, playlist = self._extract_playlist('WL')
3386        return playlist
3387
3388
3389class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3390    IE_NAME = 'youtube:favorites'
3391    IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3392    _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3393    _LOGIN_REQUIRED = True
3394
3395    def _real_extract(self, url):
3396        webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3397        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3398        return self.url_result(playlist_id, 'YoutubePlaylist')
3399
3400
3401class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3402    IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3403    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3404    _FEED_NAME = 'recommended'
3405    _PLAYLIST_TITLE = 'Youtube Recommended videos'
3406
3407
3408class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3409    IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3410    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3411    _FEED_NAME = 'subscriptions'
3412    _PLAYLIST_TITLE = 'Youtube Subscriptions'
3413
3414
3415class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3416    IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3417    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3418    _FEED_NAME = 'history'
3419    _PLAYLIST_TITLE = 'Youtube History'
3420
3421
3422class YoutubeTruncatedURLIE(InfoExtractor):
3423    IE_NAME = 'youtube:truncated_url'
3424    IE_DESC = False  # Do not list
3425    _VALID_URL = r'''(?x)
3426        (?:https?://)?
3427        (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3428        (?:watch\?(?:
3429            feature=[a-z_]+|
3430            annotation_id=annotation_[^&]+|
3431            x-yt-cl=[0-9]+|
3432            hl=[^&]*|
3433            t=[0-9]+
3434        )?
3435        |
3436            attribution_link\?a=[^&]+
3437        )
3438        $
3439    '''
3440
3441    _TESTS = [{
3442        'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3443        'only_matching': True,
3444    }, {
3445        'url': 'https://www.youtube.com/watch?',
3446        'only_matching': True,
3447    }, {
3448        'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3449        'only_matching': True,
3450    }, {
3451        'url': 'https://www.youtube.com/watch?feature=foo',
3452        'only_matching': True,
3453    }, {
3454        'url': 'https://www.youtube.com/watch?hl=en-GB',
3455        'only_matching': True,
3456    }, {
3457        'url': 'https://www.youtube.com/watch?t=2372',
3458        'only_matching': True,
3459    }]
3460
3461    def _real_extract(self, url):
3462        raise ExtractorError(
3463            'Did you forget to quote the URL? Remember that & is a meta '
3464            'character in most shells, so you want to put the URL in quotes, '
3465            'like  youtube-dl '
3466            '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3467            ' or simply  youtube-dl BaW_jenozKc  .',
3468            expected=True)
3469
3470
3471class YoutubeTruncatedIDIE(InfoExtractor):
3472    IE_NAME = 'youtube:truncated_id'
3473    IE_DESC = False  # Do not list
3474    _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3475
3476    _TESTS = [{
3477        'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3478        'only_matching': True,
3479    }]
3480
3481    def _real_extract(self, url):
3482        video_id = self._match_id(url)
3483        raise ExtractorError(
3484            'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3485            expected=True)