tTVhze1w

· 5 years ago · Feb 07, 2021, 11:28 PM
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5
6import itertools
7import json
8import os.path
9import random
10import re
11import time
12import traceback
13
14from .common import InfoExtractor, SearchInfoExtractor
15from ..jsinterp import JSInterpreter
16from ..swfinterp import SWFInterpreter
17from ..compat import (
18    compat_chr,
19    compat_HTTPError,
20    compat_parse_qs,
21    compat_urllib_parse_unquote,
22    compat_urllib_parse_unquote_plus,
23    compat_urllib_parse_urlencode,
24    compat_urllib_parse_urlparse,
25    compat_urlparse,
26    compat_str,
27)
28from ..utils import (
29    bool_or_none,
30    clean_html,
31    error_to_compat_str,
32    ExtractorError,
33    float_or_none,
34    get_element_by_id,
35    int_or_none,
36    mimetype2ext,
37    parse_codecs,
38    parse_duration,
39    remove_quotes,
40    remove_start,
41    smuggle_url,
42    str_or_none,
43    str_to_int,
44    try_get,
45    unescapeHTML,
46    unified_strdate,
47    unsmuggle_url,
48    update_url_query,
49    uppercase_escape,
50    url_or_none,
51    urlencode_postdata,
52    urljoin,
53)
54
55
56class YoutubeBaseInfoExtractor(InfoExtractor):
57    """Provide base functions for Youtube extractors"""
58    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
59    _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
60
61    _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
62    _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
63    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
64
65    _NETRC_MACHINE = 'youtube'
66    # If True it will raise an error if no login info is provided
67    _LOGIN_REQUIRED = False
68
69    _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
70
71    def _set_language(self):
72        self._set_cookie(
73            '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
74            # YouTube sets the expire time to about two months
75            expire_time=time.time() + 2 * 30 * 24 * 3600)
76
77    def _ids_to_results(self, ids):
78        return [
79            self.url_result(vid_id, 'Youtube', video_id=vid_id)
80            for vid_id in ids]
81
82    def _login(self):
83        """
84        Attempt to log in to YouTube.
85        True is returned if successful or skipped.
86        False is returned if login failed.
87
88        If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
89        """
90        username, password = self._get_login_info()
91        # No authentication to be performed
92        if username is None:
93            if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
94                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
95            return True
96
97        login_page = self._download_webpage(
98            self._LOGIN_URL, None,
99            note='Downloading login page',
100            errnote='unable to fetch login page', fatal=False)
101        if login_page is False:
102            return
103
104        login_form = self._hidden_inputs(login_page)
105
106        def req(url, f_req, note, errnote):
107            data = login_form.copy()
108            data.update({
109                'pstMsg': 1,
110                'checkConnection': 'youtube',
111                'checkedDomains': 'youtube',
112                'hl': 'en',
113                'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
114                'f.req': json.dumps(f_req),
115                'flowName': 'GlifWebSignIn',
116                'flowEntry': 'ServiceLogin',
117                # TODO: reverse actual botguard identifier generation algo
118                'bgRequest': '["identifier",""]',
119            })
120            return self._download_json(
121                url, None, note=note, errnote=errnote,
122                transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123                fatal=False,
124                data=urlencode_postdata(data), headers={
125                    'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126                    'Google-Accounts-XSRF': 1,
127                })
128
129        def warn(message):
130            self._downloader.report_warning(message)
131
132        lookup_req = [
133            username,
134            None, [], None, 'US', None, None, 2, False, True,
135            [
136                None, None,
137                [2, 1, None, 1,
138                 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139                 None, [], 4],
140                1, [None, None, []], None, None, None, True
141            ],
142            username,
143        ]
144
145        lookup_results = req(
146            self._LOOKUP_URL, lookup_req,
147            'Looking up account info', 'Unable to look up account info')
148
149        if lookup_results is False:
150            return False
151
152        user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153        if not user_hash:
154            warn('Unable to extract user hash')
155            return False
156
157        challenge_req = [
158            user_hash,
159            None, 1, None, [1, None, None, None, [password, None, True]],
160            [
161                None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162                1, [None, None, []], None, None, None, True
163            ]]
164
165        challenge_results = req(
166            self._CHALLENGE_URL, challenge_req,
167            'Logging in', 'Unable to log in')
168
169        if challenge_results is False:
170            return
171
172        login_res = try_get(challenge_results, lambda x: x[0][5], list)
173        if login_res:
174            login_msg = try_get(login_res, lambda x: x[5], compat_str)
175            warn(
176                'Unable to login: %s' % 'Invalid password'
177                if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178            return False
179
180        res = try_get(challenge_results, lambda x: x[0][-1], list)
181        if not res:
182            warn('Unable to extract result entry')
183            return False
184
185        login_challenge = try_get(res, lambda x: x[0][0], list)
186        if login_challenge:
187            challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188            if challenge_str == 'TWO_STEP_VERIFICATION':
189                # SEND_SUCCESS - TFA code has been successfully sent to phone
190                # QUOTA_EXCEEDED - reached the limit of TFA codes
191                status = try_get(login_challenge, lambda x: x[5], compat_str)
192                if status == 'QUOTA_EXCEEDED':
193                    warn('Exceeded the limit of TFA codes, try later')
194                    return False
195
196                tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197                if not tl:
198                    warn('Unable to extract TL')
199                    return False
200
201                tfa_code = self._get_tfa_info('2-step verification code')
202
203                if not tfa_code:
204                    warn(
205                        'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206                        '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207                    return False
208
209                tfa_code = remove_start(tfa_code, 'G-')
210
211                tfa_req = [
212                    user_hash, None, 2, None,
213                    [
214                        9, None, None, None, None, None, None, None,
215                        [None, tfa_code, True, 2]
216                    ]]
217
218                tfa_results = req(
219                    self._TFA_URL.format(tl), tfa_req,
220                    'Submitting TFA code', 'Unable to submit TFA code')
221
222                if tfa_results is False:
223                    return False
224
225                tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226                if tfa_res:
227                    tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228                    warn(
229                        'Unable to finish TFA: %s' % 'Invalid TFA code'
230                        if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231                    return False
232
233                check_cookie_url = try_get(
234                    tfa_results, lambda x: x[0][-1][2], compat_str)
235            else:
236                CHALLENGES = {
237                    'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238                    'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239                    'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240                }
241                challenge = CHALLENGES.get(
242                    challenge_str,
243                    '%s returned error %s.' % (self.IE_NAME, challenge_str))
244                warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245                return False
246        else:
247            check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249        if not check_cookie_url:
250            warn('Unable to extract CheckCookie URL')
251            return False
252
253        check_cookie_results = self._download_webpage(
254            check_cookie_url, None, 'Checking cookie', fatal=False)
255
256        if check_cookie_results is False:
257            return False
258
259        if 'https://myaccount.google.com/' not in check_cookie_results:
260            warn('Unable to log in')
261            return False
262
263        return True
264
265    def _real_initialize(self):
266        if self._downloader is None:
267            return
268        self._set_language()
269        if not self._login():
270            return
271
272    _DEFAULT_API_DATA = {
273        'context': {
274            'client': {
275                'clientName': 'WEB',
276                'clientVersion': '2.20201021.03.00',
277            }
278        },
279    }
280
281    _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
282    _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
283    _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
284
285    def _call_api(self, ep, query, video_id):
286        data = self._DEFAULT_API_DATA.copy()
287        data.update(query)
288
289        response = self._download_json(
290            'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
291            note='Downloading API JSON', errnote='Unable to download API page',
292            data=json.dumps(data).encode('utf8'),
293            headers={'content-type': 'application/json'},
294            query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
295
296        return response
297
298    def _extract_yt_initial_data(self, video_id, webpage):
299        return self._parse_json(
300            self._search_regex(
301                (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
302                 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
303            video_id)
304
305    def _extract_ytcfg(self, video_id, webpage):
306        return self._parse_json(
307            self._search_regex(
308                r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
309                default='{}'), video_id, fatal=False)
310
311
312class YoutubeIE(YoutubeBaseInfoExtractor):
313    IE_DESC = 'YouTube.com'
314    _VALID_URL = r"""(?x)^
315                     (
316                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
317                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
318                            (?:www\.)?deturl\.com/www\.youtube\.com/|
319                            (?:www\.)?pwnyoutube\.com/|
320                            (?:www\.)?hooktube\.com/|
321                            (?:www\.)?yourepeat\.com/|
322                            tube\.majestyc\.net/|
323                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
324                            (?:(?:www|dev)\.)?invidio\.us/|
325                            (?:(?:www|no)\.)?invidiou\.sh/|
326                            (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
327                            (?:www\.)?invidious\.kabi\.tk/|
328                            (?:www\.)?invidious\.13ad\.de/|
329                            (?:www\.)?invidious\.mastodon\.host/|
330                            (?:www\.)?invidious\.zapashcanon\.fr/|
331                            (?:www\.)?invidious\.kavin\.rocks/|
332                            (?:www\.)?invidious\.tube/|
333                            (?:www\.)?invidiou\.site/|
334                            (?:www\.)?invidious\.site/|
335                            (?:www\.)?invidious\.xyz/|
336                            (?:www\.)?invidious\.nixnet\.xyz/|
337                            (?:www\.)?invidious\.drycat\.fr/|
338                            (?:www\.)?tube\.poal\.co/|
339                            (?:www\.)?tube\.connect\.cafe/|
340                            (?:www\.)?vid\.wxzm\.sx/|
341                            (?:www\.)?vid\.mint\.lgbt/|
342                            (?:www\.)?yewtu\.be/|
343                            (?:www\.)?yt\.elukerio\.org/|
344                            (?:www\.)?yt\.lelux\.fi/|
345                            (?:www\.)?invidious\.ggc-project\.de/|
346                            (?:www\.)?yt\.maisputain\.ovh/|
347                            (?:www\.)?invidious\.13ad\.de/|
348                            (?:www\.)?invidious\.toot\.koeln/|
349                            (?:www\.)?invidious\.fdn\.fr/|
350                            (?:www\.)?watch\.nettohikari\.com/|
351                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
352                            (?:www\.)?qklhadlycap4cnod\.onion/|
353                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
354                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
355                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
356                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
357                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
358                            (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
359                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
360                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
361                         (?:                                                  # the various things that can precede the ID:
362                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
363                             |(?:                                             # or the v= param in all its forms
364                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
365                                 (?:\?|\#!?)                                  # the params delimiter ? or # or #!
366                                 (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
367                                 v=
368                             )
369                         ))
370                         |(?:
371                            youtu\.be|                                        # just youtu.be/xxxx
372                            vid\.plus|                                        # or vid.plus/xxxx
373                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
374                         )/
375                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
376                         )
377                     )?                                                       # all until now is optional -> you can pass the naked ID
378                     (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
379                     (?!.*?\blist=
380                        (?:
381                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
382                            WL                                                # WL are handled by the watch later IE
383                        )
384                     )
385                     (?(1).+)?                                                # if we found the ID, everything can follow
386                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
387    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
388    _PLAYER_INFO_RE = (
389        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
390        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
391    )
392    _formats = {
393        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
394        '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
395        '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
396        '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
397        '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
398        '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
399        '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
400        '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
401        # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
402        '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
403        '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
404        '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
405        '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
406        '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
407        '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
408        '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
409        '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
410        '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
411
412
413        # 3D videos
414        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
415        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
416        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
417        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
418        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
419        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
420        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
421
422        # Apple HTTP Live Streaming
423        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
424        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
425        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
426        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
427        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
428        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
429        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
430        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
431
432        # DASH mp4 video
433        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
434        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
435        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
436        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
437        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
438        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
439        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
440        '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
441        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
442        '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
443        '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
444        '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
445
446        # Dash mp4 audio
447        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
448        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
449        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
450        '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
451        '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
452        '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
453        '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
454
455        # Dash webm
456        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
457        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
458        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
459        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
460        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
461        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
462        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
463        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
465        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
466        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
467        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
468        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
469        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
470        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
471        # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
472        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
473        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
474        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
475        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
476        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
477        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
478
479        # Dash webm audio
480        '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
481        '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
482
483        # Dash webm audio with opus inside
484        '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
485        '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
486        '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
487
488        # RTMP (unnamed)
489        '_rtmp': {'protocol': 'rtmp'},
490
491        # av01 video only formats sometimes served with "unknown" codecs
492        '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
493        '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
494        '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
495        '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
496    }
497    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
498
499    _GEO_BYPASS = False
500
501    IE_NAME = 'youtube'
502    _TESTS = [
503        {
504            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
505            'info_dict': {
506                'id': 'BaW_jenozKc',
507                'ext': 'mp4',
508                'title': 'youtube-dl test video "\'/\\Ã¤â†ð•',
509                'uploader': 'Philipp Hagemeister',
510                'uploader_id': 'phihag',
511                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
512                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
513                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
514                'upload_date': '20121002',
515                'description': 'test chars:  "\'/\\Ã¤â†ð•\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
516                'categories': ['Science & Technology'],
517                'tags': ['youtube-dl'],
518                'duration': 10,
519                'view_count': int,
520                'like_count': int,
521                'dislike_count': int,
522                'start_time': 1,
523                'end_time': 9,
524            }
525        },
526        {
527            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
528            'note': 'Embed-only video (#1746)',
529            'info_dict': {
530                'id': 'yZIXLfi8CZQ',
531                'ext': 'mp4',
532                'upload_date': '20120608',
533                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
534                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
535                'uploader': 'SET India',
536                'uploader_id': 'setindia',
537                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
538                'age_limit': 18,
539            }
540        },
541        {
542            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
543            'note': 'Use the first video ID in the URL',
544            'info_dict': {
545                'id': 'BaW_jenozKc',
546                'ext': 'mp4',
547                'title': 'youtube-dl test video "\'/\\Ã¤â†ð•',
548                'uploader': 'Philipp Hagemeister',
549                'uploader_id': 'phihag',
550                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
551                'upload_date': '20121002',
552                'description': 'test chars:  "\'/\\Ã¤â†ð•\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
553                'categories': ['Science & Technology'],
554                'tags': ['youtube-dl'],
555                'duration': 10,
556                'view_count': int,
557                'like_count': int,
558                'dislike_count': int,
559            },
560            'params': {
561                'skip_download': True,
562            },
563        },
564        {
565            'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
566            'note': '256k DASH audio (format 141) via DASH manifest',
567            'info_dict': {
568                'id': 'a9LDPn-MO4I',
569                'ext': 'm4a',
570                'upload_date': '20121002',
571                'uploader_id': '8KVIDEO',
572                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
573                'description': '',
574                'uploader': '8KVIDEO',
575                'title': 'UHDTV TEST 8K VIDEO.mp4'
576            },
577            'params': {
578                'youtube_include_dash_manifest': True,
579                'format': '141',
580            },
581            'skip': 'format 141 not served anymore',
582        },
583        # DASH manifest with encrypted signature
584        {
585            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
586            'info_dict': {
587                'id': 'IB3lcPjvWLA',
588                'ext': 'm4a',
589                'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
590                'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
591                'duration': 244,
592                'uploader': 'AfrojackVEVO',
593                'uploader_id': 'AfrojackVEVO',
594                'upload_date': '20131011',
595            },
596            'params': {
597                'youtube_include_dash_manifest': True,
598                'format': '141/bestaudio[ext=m4a]',
599            },
600        },
601        # Controversy video
602        {
603            'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
604            'info_dict': {
605                'id': 'T4XJQO3qol8',
606                'ext': 'mp4',
607                'duration': 219,
608                'upload_date': '20100909',
609                'uploader': 'Amazing Atheist',
610                'uploader_id': 'TheAmazingAtheist',
611                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
612                'title': 'Burning Everyone\'s Koran',
613                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
614            }
615        },
616        # Normal age-gate video (No vevo, embed allowed), available via embed page
617        {
618            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
619            'info_dict': {
620                'id': 'HtVdAasjOgU',
621                'ext': 'mp4',
622                'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
623                'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
624                'duration': 142,
625                'uploader': 'The Witcher',
626                'uploader_id': 'WitcherGame',
627                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
628                'upload_date': '20140605',
629                'age_limit': 18,
630            },
631        },
632        {
633            # Age-gated video only available with authentication (unavailable
634            # via embed page workaround)
635            'url': 'XgnwCQzjau8',
636            'only_matching': True,
637        },
638        # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
639        # YouTube Red ad is not captured for creator
640        {
641            'url': '__2ABJjxzNo',
642            'info_dict': {
643                'id': '__2ABJjxzNo',
644                'ext': 'mp4',
645                'duration': 266,
646                'upload_date': '20100430',
647                'uploader_id': 'deadmau5',
648                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
649                'creator': 'Dada Life, deadmau5',
650                'description': 'md5:12c56784b8032162bb936a5f76d55360',
651                'uploader': 'deadmau5',
652                'title': 'Deadmau5 - Some Chords (HD)',
653                'alt_title': 'This Machine Kills Some Chords',
654            },
655            'expected_warnings': [
656                'DASH manifest missing',
657            ]
658        },
659        # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
660        {
661            'url': 'lqQg6PlCWgI',
662            'info_dict': {
663                'id': 'lqQg6PlCWgI',
664                'ext': 'mp4',
665                'duration': 6085,
666                'upload_date': '20150827',
667                'uploader_id': 'olympic',
668                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
669                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
670                'uploader': 'Olympic',
671                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
672            },
673            'params': {
674                'skip_download': 'requires avconv',
675            }
676        },
677        # Non-square pixels
678        {
679            'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
680            'info_dict': {
681                'id': '_b-2C3KPAM0',
682                'ext': 'mp4',
683                'stretched_ratio': 16 / 9.,
684                'duration': 85,
685                'upload_date': '20110310',
686                'uploader_id': 'AllenMeow',
687                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
688                'description': 'made by Wacom from Korea | å—å¹•&åŠ æ²¹æ·»é†‹ by TY\'s Allen | æ„Ÿè¬heylisa00cavey1001åŒå¸ç†±æƒ…æä¾›æ¢—åŠç¿»è¯',
689                'uploader': 'å«á„‹á„…',
690                'title': '[A-made] è®Šæ…‹å¦å—å¹•ç‰ˆ å¤ªå¦ æˆ‘å°±æ˜¯é€™æ¨£çš„äºº',
691            },
692        },
693        # url_encoded_fmt_stream_map is empty string
694        {
695            'url': 'qEJwOuvDf7I',
696            'info_dict': {
697                'id': 'qEJwOuvDf7I',
698                'ext': 'webm',
699                'title': 'ÐžÐ±ÑÑƒÐ¶Ð´ÐµÐ½Ð¸Ðµ ÑÑƒÐ´ÐµÐ±Ð½Ð¾Ð¹ Ð¿Ñ€Ð°ÐºÑ‚Ð¸ÐºÐ¸ Ð¿Ð¾ Ð²Ñ‹Ð±Ð¾Ñ€Ð°Ð¼ 14 ÑÐµÐ½Ñ‚ÑÐ±Ñ€Ñ 2014 Ð³Ð¾Ð´Ð° Ð² Ð¡Ð°Ð½ÐºÑ‚-ÐŸÐµÑ‚ÐµÑ€Ð±ÑƒÑ€Ð³Ðµ',
700                'description': '',
701                'upload_date': '20150404',
702                'uploader_id': 'spbelect',
703                'uploader': 'ÐÐ°Ð±Ð»ÑŽÐ´Ð°Ñ‚ÐµÐ»Ð¸ ÐŸÐµÑ‚ÐµÑ€Ð±ÑƒÑ€Ð³Ð°',
704            },
705            'params': {
706                'skip_download': 'requires avconv',
707            },
708            'skip': 'This live event has ended.',
709        },
710        # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
711        {
712            'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
713            'info_dict': {
714                'id': 'FIl7x6_3R5Y',
715                'ext': 'webm',
716                'title': 'md5:7b81415841e02ecd4313668cde88737a',
717                'description': 'md5:116377fd2963b81ec4ce64b542173306',
718                'duration': 220,
719                'upload_date': '20150625',
720                'uploader_id': 'dorappi2000',
721                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
722                'uploader': 'dorappi2000',
723                'formats': 'mincount:31',
724            },
725            'skip': 'not actual anymore',
726        },
727        # DASH manifest with segment_list
728        {
729            'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
730            'md5': '8ce563a1d667b599d21064e982ab9e31',
731            'info_dict': {
732                'id': 'CsmdDsKjzN8',
733                'ext': 'mp4',
734                'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
735                'uploader': 'Airtek',
736                'description': 'RetransmisiÃ³n en directo de la XVIII media maratÃ³n de Zaragoza.',
737                'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
738                'title': 'RetransmisiÃ³n XVIII Media maratÃ³n Zaragoza 2015',
739            },
740            'params': {
741                'youtube_include_dash_manifest': True,
742                'format': '135',  # bestvideo
743            },
744            'skip': 'This live event has ended.',
745        },
746        {
747            # Multifeed videos (multiple cameras), URL is for Main Camera
748            'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
749            'info_dict': {
750                'id': 'jqWvoWXjCVs',
751                'title': 'teamPGP: Rocket League Noob Stream',
752                'description': 'md5:dc7872fb300e143831327f1bae3af010',
753            },
754            'playlist': [{
755                'info_dict': {
756                    'id': 'jqWvoWXjCVs',
757                    'ext': 'mp4',
758                    'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
759                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
760                    'duration': 7335,
761                    'upload_date': '20150721',
762                    'uploader': 'Beer Games Beer',
763                    'uploader_id': 'beergamesbeer',
764                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
765                    'license': 'Standard YouTube License',
766                },
767            }, {
768                'info_dict': {
769                    'id': '6h8e8xoXJzg',
770                    'ext': 'mp4',
771                    'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
772                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
773                    'duration': 7337,
774                    'upload_date': '20150721',
775                    'uploader': 'Beer Games Beer',
776                    'uploader_id': 'beergamesbeer',
777                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
778                    'license': 'Standard YouTube License',
779                },
780            }, {
781                'info_dict': {
782                    'id': 'PUOgX5z9xZw',
783                    'ext': 'mp4',
784                    'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
785                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
786                    'duration': 7337,
787                    'upload_date': '20150721',
788                    'uploader': 'Beer Games Beer',
789                    'uploader_id': 'beergamesbeer',
790                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
791                    'license': 'Standard YouTube License',
792                },
793            }, {
794                'info_dict': {
795                    'id': 'teuwxikvS5k',
796                    'ext': 'mp4',
797                    'title': 'teamPGP: Rocket League Noob Stream (zim)',
798                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
799                    'duration': 7334,
800                    'upload_date': '20150721',
801                    'uploader': 'Beer Games Beer',
802                    'uploader_id': 'beergamesbeer',
803                    'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
804                    'license': 'Standard YouTube License',
805                },
806            }],
807            'params': {
808                'skip_download': True,
809            },
810            'skip': 'This video is not available.',
811        },
812        {
813            # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
814            'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
815            'info_dict': {
816                'id': 'gVfLd0zydlo',
817                'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
818            },
819            'playlist_count': 2,
820            'skip': 'Not multifeed anymore',
821        },
822        {
823            'url': 'https://vid.plus/FlRa-iH7PGw',
824            'only_matching': True,
825        },
826        {
827            'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
828            'only_matching': True,
829        },
830        {
831            # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
832            # Also tests cut-off URL expansion in video description (see
833            # https://github.com/ytdl-org/youtube-dl/issues/1892,
834            # https://github.com/ytdl-org/youtube-dl/issues/8164)
835            'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
836            'info_dict': {
837                'id': 'lsguqyKfVQg',
838                'ext': 'mp4',
839                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
840                'alt_title': 'Dark Walk - Position Music',
841                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
842                'duration': 133,
843                'upload_date': '20151119',
844                'uploader_id': 'IronSoulElf',
845                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
846                'uploader': 'IronSoulElf',
847                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
848                'track': 'Dark Walk - Position Music',
849                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
850                'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
851            },
852            'params': {
853                'skip_download': True,
854            },
855        },
856        {
857            # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
858            'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
859            'only_matching': True,
860        },
861        {
862            # Video with yt:stretch=17:0
863            'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
864            'info_dict': {
865                'id': 'Q39EVAstoRM',
866                'ext': 'mp4',
867                'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
868                'description': 'md5:ee18a25c350637c8faff806845bddee9',
869                'upload_date': '20151107',
870                'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
871                'uploader': 'CH GAMER DROID',
872            },
873            'params': {
874                'skip_download': True,
875            },
876            'skip': 'This video does not exist.',
877        },
878        {
879            # Video licensed under Creative Commons
880            'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
881            'info_dict': {
882                'id': 'M4gD1WSo5mA',
883                'ext': 'mp4',
884                'title': 'md5:e41008789470fc2533a3252216f1c1d1',
885                'description': 'md5:a677553cf0840649b731a3024aeff4cc',
886                'duration': 721,
887                'upload_date': '20150127',
888                'uploader_id': 'BerkmanCenter',
889                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
890                'uploader': 'The Berkman Klein Center for Internet & Society',
891                'license': 'Creative Commons Attribution license (reuse allowed)',
892            },
893            'params': {
894                'skip_download': True,
895            },
896        },
897        {
898            # Channel-like uploader_url
899            'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
900            'info_dict': {
901                'id': 'eQcmzGIKrzg',
902                'ext': 'mp4',
903                'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
904                'description': 'md5:dda0d780d5a6e120758d1711d062a867',
905                'duration': 4060,
906                'upload_date': '20151119',
907                'uploader': 'Bernie Sanders',
908                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
909                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
910                'license': 'Creative Commons Attribution license (reuse allowed)',
911            },
912            'params': {
913                'skip_download': True,
914            },
915        },
916        {
917            'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
918            'only_matching': True,
919        },
920        {
921            # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
922            'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
923            'only_matching': True,
924        },
925        {
926            # Rental video preview
927            'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
928            'info_dict': {
929                'id': 'uGpuVWrhIzE',
930                'ext': 'mp4',
931                'title': 'Piku - Trailer',
932                'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
933                'upload_date': '20150811',
934                'uploader': 'FlixMatrix',
935                'uploader_id': 'FlixMatrixKaravan',
936                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
937                'license': 'Standard YouTube License',
938            },
939            'params': {
940                'skip_download': True,
941            },
942            'skip': 'This video is not available.',
943        },
944        {
945            # YouTube Red video with episode data
946            'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
947            'info_dict': {
948                'id': 'iqKdEhx-dD4',
949                'ext': 'mp4',
950                'title': 'Isolation - Mind Field (Ep 1)',
951                'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
952                'duration': 2085,
953                'upload_date': '20170118',
954                'uploader': 'Vsauce',
955                'uploader_id': 'Vsauce',
956                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
957                'series': 'Mind Field',
958                'season_number': 1,
959                'episode_number': 1,
960            },
961            'params': {
962                'skip_download': True,
963            },
964            'expected_warnings': [
965                'Skipping DASH manifest',
966            ],
967        },
968        {
969            # The following content has been identified by the YouTube community
970            # as inappropriate or offensive to some audiences.
971            'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
972            'info_dict': {
973                'id': '6SJNVb0GnPI',
974                'ext': 'mp4',
975                'title': 'Race Differences in Intelligence',
976                'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
977                'duration': 965,
978                'upload_date': '20140124',
979                'uploader': 'New Century Foundation',
980                'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
981                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
982            },
983            'params': {
984                'skip_download': True,
985            },
986        },
987        {
988            # itag 212
989            'url': '1t24XAntNCY',
990            'only_matching': True,
991        },
992        {
993            # geo restricted to JP
994            'url': 'sJL6WA-aGkQ',
995            'only_matching': True,
996        },
997        {
998            'url': 'https://invidio.us/watch?v=BaW_jenozKc',
999            'only_matching': True,
1000        },
1001        {
1002            # DRM protected
1003            'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1004            'only_matching': True,
1005        },
1006        {
1007            # Video with unsupported adaptive stream type formats
1008            'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1009            'info_dict': {
1010                'id': 'Z4Vy8R84T1U',
1011                'ext': 'mp4',
1012                'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1013                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1014                'duration': 433,
1015                'upload_date': '20130923',
1016                'uploader': 'Amelia Putri Harwita',
1017                'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1018                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1019                'formats': 'maxcount:10',
1020            },
1021            'params': {
1022                'skip_download': True,
1023                'youtube_include_dash_manifest': False,
1024            },
1025            'skip': 'not actual anymore',
1026        },
1027        {
1028            # Youtube Music Auto-generated description
1029            'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1030            'info_dict': {
1031                'id': 'MgNrAu2pzNs',
1032                'ext': 'mp4',
1033                'title': 'Voyeur Girl',
1034                'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1035                'upload_date': '20190312',
1036                'uploader': 'Stephen - Topic',
1037                'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1038                'artist': 'Stephen',
1039                'track': 'Voyeur Girl',
1040                'album': 'it\'s too much love to know my dear',
1041                'release_date': '20190313',
1042                'release_year': 2019,
1043            },
1044            'params': {
1045                'skip_download': True,
1046            },
1047        },
1048        {
1049            'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1050            'only_matching': True,
1051        },
1052        {
1053            # invalid -> valid video id redirection
1054            'url': 'DJztXj2GPfl',
1055            'info_dict': {
1056                'id': 'DJztXj2GPfk',
1057                'ext': 'mp4',
1058                'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1059                'description': 'md5:bf577a41da97918e94fa9798d9228825',
1060                'upload_date': '20090125',
1061                'uploader': 'Prochorowka',
1062                'uploader_id': 'Prochorowka',
1063                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1064                'artist': 'Panjabi MC',
1065                'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1066                'album': 'Beware of the Boys (Mundian To Bach Ke)',
1067            },
1068            'params': {
1069                'skip_download': True,
1070            },
1071        },
1072        {
1073            # empty description results in an empty string
1074            'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1075            'info_dict': {
1076                'id': 'x41yOUIvK2k',
1077                'ext': 'mp4',
1078                'title': 'IMG 3456',
1079                'description': '',
1080                'upload_date': '20170613',
1081                'uploader_id': 'ElevageOrVert',
1082                'uploader': 'ElevageOrVert',
1083            },
1084            'params': {
1085                'skip_download': True,
1086            },
1087        },
1088        {
1089            # with '};' inside yt initial data (see [1])
1090            # see [2] for an example with '};' inside ytInitialPlayerResponse
1091            # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1092            # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1093            'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1094            'info_dict': {
1095                'id': 'CHqg6qOn4no',
1096                'ext': 'mp4',
1097                'title': 'Part 77   Sort a list of simple types in c#',
1098                'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1099                'upload_date': '20130831',
1100                'uploader_id': 'kudvenkat',
1101                'uploader': 'kudvenkat',
1102            },
1103            'params': {
1104                'skip_download': True,
1105            },
1106        },
1107        {
1108            # another example of '};' in ytInitialData
1109            'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1110            'only_matching': True,
1111        },
1112        {
1113            'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1114            'only_matching': True,
1115        },
1116    ]
1117
1118    def __init__(self, *args, **kwargs):
1119        super(YoutubeIE, self).__init__(*args, **kwargs)
1120        self._player_cache = {}
1121
1122    def report_video_info_webpage_download(self, video_id):
1123        """Report attempt to download video info webpage."""
1124        self.to_screen('%s: Downloading video info webpage' % video_id)
1125
1126    def report_information_extraction(self, video_id):
1127        """Report attempt to extract video information."""
1128        self.to_screen('%s: Extracting video information' % video_id)
1129
1130    def report_unavailable_format(self, video_id, format):
1131        """Report extracted video URL."""
1132        self.to_screen('%s: Format %s not available' % (video_id, format))
1133
1134    def report_rtmp_download(self):
1135        """Indicate the download will use the RTMP protocol."""
1136        self.to_screen('RTMP download detected')
1137
1138    def _signature_cache_id(self, example_sig):
1139        """ Return a string representation of a signature """
1140        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1141
1142    @classmethod
1143    def _extract_player_info(cls, player_url):
1144        for player_re in cls._PLAYER_INFO_RE:
1145            id_m = re.search(player_re, player_url)
1146            if id_m:
1147                break
1148        else:
1149            raise ExtractorError('Cannot identify player %r' % player_url)
1150        return id_m.group('ext'), id_m.group('id')
1151
1152    def _extract_signature_function(self, video_id, player_url, example_sig):
1153        player_type, player_id = self._extract_player_info(player_url)
1154
1155        # Read from filesystem cache
1156        func_id = '%s_%s_%s' % (
1157            player_type, player_id, self._signature_cache_id(example_sig))
1158        assert os.path.basename(func_id) == func_id
1159
1160        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1161        if cache_spec is not None:
1162            return lambda s: ''.join(s[i] for i in cache_spec)
1163
1164        download_note = (
1165            'Downloading player %s' % player_url
1166            if self._downloader.params.get('verbose') else
1167            'Downloading %s player %s' % (player_type, player_id)
1168        )
1169        if player_type == 'js':
1170            code = self._download_webpage(
1171                player_url, video_id,
1172                note=download_note,
1173                errnote='Download of %s failed' % player_url)
1174            res = self._parse_sig_js(code)
1175        elif player_type == 'swf':
1176            urlh = self._request_webpage(
1177                player_url, video_id,
1178                note=download_note,
1179                errnote='Download of %s failed' % player_url)
1180            code = urlh.read()
1181            res = self._parse_sig_swf(code)
1182        else:
1183            assert False, 'Invalid player type %r' % player_type
1184
1185        test_string = ''.join(map(compat_chr, range(len(example_sig))))
1186        cache_res = res(test_string)
1187        cache_spec = [ord(c) for c in cache_res]
1188
1189        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1190        return res
1191
1192    def _print_sig_code(self, func, example_sig):
1193        def gen_sig_code(idxs):
1194            def _genslice(start, end, step):
1195                starts = '' if start == 0 else str(start)
1196                ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1197                steps = '' if step == 1 else (':%d' % step)
1198                return 's[%s%s%s]' % (starts, ends, steps)
1199
1200            step = None
1201            # Quelch pyflakes warnings - start will be set when step is set
1202            start = '(Never used)'
1203            for i, prev in zip(idxs[1:], idxs[:-1]):
1204                if step is not None:
1205                    if i - prev == step:
1206                        continue
1207                    yield _genslice(start, prev, step)
1208                    step = None
1209                    continue
1210                if i - prev in [-1, 1]:
1211                    step = i - prev
1212                    start = prev
1213                    continue
1214                else:
1215                    yield 's[%d]' % prev
1216            if step is None:
1217                yield 's[%d]' % i
1218            else:
1219                yield _genslice(start, i, step)
1220
1221        test_string = ''.join(map(compat_chr, range(len(example_sig))))
1222        cache_res = func(test_string)
1223        cache_spec = [ord(c) for c in cache_res]
1224        expr_code = ' + '.join(gen_sig_code(cache_spec))
1225        signature_id_tuple = '(%s)' % (
1226            ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1227        code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1228                '    return %s\n') % (signature_id_tuple, expr_code)
1229        self.to_screen('Extracted signature function:\n' + code)
1230
1231    def _parse_sig_js(self, jscode):
1232        funcname = self._search_regex(
1233            (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1234             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1235             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1236             r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1237             # Obsolete patterns
1238             r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1239             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1240             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1241             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1242             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1243             r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1244             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1245             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1246            jscode, 'Initial JS player signature function name', group='sig')
1247
1248        jsi = JSInterpreter(jscode)
1249        initial_function = jsi.extract_function(funcname)
1250        return lambda s: initial_function([s])
1251
1252    def _parse_sig_swf(self, file_contents):
1253        swfi = SWFInterpreter(file_contents)
1254        TARGET_CLASSNAME = 'SignatureDecipher'
1255        searched_class = swfi.extract_class(TARGET_CLASSNAME)
1256        initial_function = swfi.extract_function(searched_class, 'decipher')
1257        return lambda s: initial_function([s])
1258
1259    def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1260        """Turn the encrypted s field into a working signature"""
1261
1262        if player_url is None:
1263            raise ExtractorError('Cannot decrypt signature without player_url')
1264
1265        if player_url.startswith('//'):
1266            player_url = 'https:' + player_url
1267        elif not re.match(r'https?://', player_url):
1268            player_url = compat_urlparse.urljoin(
1269                'https://www.youtube.com', player_url)
1270        try:
1271            player_id = (player_url, self._signature_cache_id(s))
1272            if player_id not in self._player_cache:
1273                func = self._extract_signature_function(
1274                    video_id, player_url, s
1275                )
1276                self._player_cache[player_id] = func
1277            func = self._player_cache[player_id]
1278            if self._downloader.params.get('youtube_print_sig_code'):
1279                self._print_sig_code(func, s)
1280            return func(s)
1281        except Exception as e:
1282            tb = traceback.format_exc()
1283            raise ExtractorError(
1284                'Signature extraction failed: ' + tb, cause=e)
1285
1286    def _get_subtitles(self, video_id, webpage):
1287        try:
1288            subs_doc = self._download_xml(
1289                'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1290                video_id, note=False)
1291        except ExtractorError as err:
1292            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1293            return {}
1294
1295        sub_lang_list = {}
1296        for track in subs_doc.findall('track'):
1297            lang = track.attrib['lang_code']
1298            if lang in sub_lang_list:
1299                continue
1300            sub_formats = []
1301            for ext in self._SUBTITLE_FORMATS:
1302                params = compat_urllib_parse_urlencode({
1303                    'lang': lang,
1304                    'v': video_id,
1305                    'fmt': ext,
1306                    'name': track.attrib['name'].encode('utf-8'),
1307                })
1308                sub_formats.append({
1309                    'url': 'https://www.youtube.com/api/timedtext?' + params,
1310                    'ext': ext,
1311                })
1312            sub_lang_list[lang] = sub_formats
1313        if not sub_lang_list:
1314            self._downloader.report_warning('video doesn\'t have subtitles')
1315            return {}
1316        return sub_lang_list
1317
1318    def _get_ytplayer_config(self, video_id, webpage):
1319        patterns = (
1320            # User data may contain arbitrary character sequences that may affect
1321            # JSON extraction with regex, e.g. when '};' is contained the second
1322            # regex won't capture the whole JSON. Yet working around by trying more
1323            # concrete regex first keeping in mind proper quoted string handling
1324            # to be implemented in future that will replace this workaround (see
1325            # https://github.com/ytdl-org/youtube-dl/issues/7468,
1326            # https://github.com/ytdl-org/youtube-dl/pull/7599)
1327            r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1328            r';ytplayer\.config\s*=\s*({.+?});',
1329        )
1330        config = self._search_regex(
1331            patterns, webpage, 'ytplayer.config', default=None)
1332        if config:
1333            return self._parse_json(
1334                uppercase_escape(config), video_id, fatal=False)
1335
1336    def _get_automatic_captions(self, video_id, player_response, player_config):
1337        """We need the webpage for getting the captions url, pass it as an
1338           argument to speed up the process."""
1339        self.to_screen('%s: Looking for automatic captions' % video_id)
1340        err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1341        if not (player_response or player_config):
1342            self._downloader.report_warning(err_msg)
1343            return {}
1344        try:
1345            args = player_config.get('args') if player_config else {}
1346            caption_url = args.get('ttsurl')
1347            if caption_url:
1348                timestamp = args['timestamp']
1349                # We get the available subtitles
1350                list_params = compat_urllib_parse_urlencode({
1351                    'type': 'list',
1352                    'tlangs': 1,
1353                    'asrs': 1,
1354                })
1355                list_url = caption_url + '&' + list_params
1356                caption_list = self._download_xml(list_url, video_id)
1357                original_lang_node = caption_list.find('track')
1358                if original_lang_node is None:
1359                    self._downloader.report_warning('Video doesn\'t have automatic captions')
1360                    return {}
1361                original_lang = original_lang_node.attrib['lang_code']
1362                caption_kind = original_lang_node.attrib.get('kind', '')
1363
1364                sub_lang_list = {}
1365                for lang_node in caption_list.findall('target'):
1366                    sub_lang = lang_node.attrib['lang_code']
1367                    sub_formats = []
1368                    for ext in self._SUBTITLE_FORMATS:
1369                        params = compat_urllib_parse_urlencode({
1370                            'lang': original_lang,
1371                            'tlang': sub_lang,
1372                            'fmt': ext,
1373                            'ts': timestamp,
1374                            'kind': caption_kind,
1375                        })
1376                        sub_formats.append({
1377                            'url': caption_url + '&' + params,
1378                            'ext': ext,
1379                        })
1380                    sub_lang_list[sub_lang] = sub_formats
1381                return sub_lang_list
1382
1383            def make_captions(sub_url, sub_langs):
1384                parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1385                caption_qs = compat_parse_qs(parsed_sub_url.query)
1386                captions = {}
1387                for sub_lang in sub_langs:
1388                    sub_formats = []
1389                    for ext in self._SUBTITLE_FORMATS:
1390                        caption_qs.update({
1391                            'tlang': [sub_lang],
1392                            'fmt': [ext],
1393                        })
1394                        sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1395                            query=compat_urllib_parse_urlencode(caption_qs, True)))
1396                        sub_formats.append({
1397                            'url': sub_url,
1398                            'ext': ext,
1399                        })
1400                    captions[sub_lang] = sub_formats
1401                return captions
1402
1403            # New captions format as of 22.06.2017
1404            if player_response:
1405                renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1406                base_url = renderer['captionTracks'][0]['baseUrl']
1407                sub_lang_list = []
1408                for lang in renderer['translationLanguages']:
1409                    lang_code = lang.get('languageCode')
1410                    if lang_code:
1411                        sub_lang_list.append(lang_code)
1412                return make_captions(base_url, sub_lang_list)
1413
1414            # Some videos don't provide ttsurl but rather caption_tracks and
1415            # caption_translation_languages (e.g. 20LmZk1hakA)
1416            # Does not used anymore as of 22.06.2017
1417            caption_tracks = args['caption_tracks']
1418            caption_translation_languages = args['caption_translation_languages']
1419            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1420            sub_lang_list = []
1421            for lang in caption_translation_languages.split(','):
1422                lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1423                sub_lang = lang_qs.get('lc', [None])[0]
1424                if sub_lang:
1425                    sub_lang_list.append(sub_lang)
1426            return make_captions(caption_url, sub_lang_list)
1427        # An extractor error can be raise by the download process if there are
1428        # no automatic captions but there are subtitles
1429        except (KeyError, IndexError, ExtractorError):
1430            self._downloader.report_warning(err_msg)
1431            return {}
1432
1433    def _mark_watched(self, video_id, video_info, player_response):
1434        playback_url = url_or_none(try_get(
1435            player_response,
1436            lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1437            video_info, lambda x: x['videostats_playback_base_url'][0]))
1438        if not playback_url:
1439            return
1440        parsed_playback_url = compat_urlparse.urlparse(playback_url)
1441        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1442
1443        # cpn generation algorithm is reverse engineered from base.js.
1444        # In fact it works even with dummy cpn.
1445        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1446        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1447
1448        qs.update({
1449            'ver': ['2'],
1450            'cpn': [cpn],
1451        })
1452        playback_url = compat_urlparse.urlunparse(
1453            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1454
1455        self._download_webpage(
1456            playback_url, video_id, 'Marking watched',
1457            'Unable to mark watched', fatal=False)
1458
1459    @staticmethod
1460    def _extract_urls(webpage):
1461        # Embedded YouTube player
1462        entries = [
1463            unescapeHTML(mobj.group('url'))
1464            for mobj in re.finditer(r'''(?x)
1465            (?:
1466                <iframe[^>]+?src=|
1467                data-video-url=|
1468                <embed[^>]+?src=|
1469                embedSWF\(?:\s*|
1470                <object[^>]+data=|
1471                new\s+SWFObject\(
1472            )
1473            (["\'])
1474                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1475                (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1476            \1''', webpage)]
1477
1478        # lazyYT YouTube embed
1479        entries.extend(list(map(
1480            unescapeHTML,
1481            re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1482
1483        # Wordpress "YouTube Video Importer" plugin
1484        matches = re.findall(r'''(?x)<div[^>]+
1485            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1486            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1487        entries.extend(m[-1] for m in matches)
1488
1489        return entries
1490
1491    @staticmethod
1492    def _extract_url(webpage):
1493        urls = YoutubeIE._extract_urls(webpage)
1494        return urls[0] if urls else None
1495
1496    @classmethod
1497    def extract_id(cls, url):
1498        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1499        if mobj is None:
1500            raise ExtractorError('Invalid URL: %s' % url)
1501        video_id = mobj.group(2)
1502        return video_id
1503
1504    def _extract_chapters_from_json(self, webpage, video_id, duration):
1505        if not webpage:
1506            return
1507        data = self._extract_yt_initial_data(video_id, webpage)
1508        if not data or not isinstance(data, dict):
1509            return
1510        chapters_list = try_get(
1511            data,
1512            lambda x: x['playerOverlays']
1513                       ['playerOverlayRenderer']
1514                       ['decoratedPlayerBarRenderer']
1515                       ['decoratedPlayerBarRenderer']
1516                       ['playerBar']
1517                       ['chapteredPlayerBarRenderer']
1518                       ['chapters'],
1519            list)
1520        if not chapters_list:
1521            return
1522
1523        def chapter_time(chapter):
1524            return float_or_none(
1525                try_get(
1526                    chapter,
1527                    lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1528                    int),
1529                scale=1000)
1530        chapters = []
1531        for next_num, chapter in enumerate(chapters_list, start=1):
1532            start_time = chapter_time(chapter)
1533            if start_time is None:
1534                continue
1535            end_time = (chapter_time(chapters_list[next_num])
1536                        if next_num < len(chapters_list) else duration)
1537            if end_time is None:
1538                continue
1539            title = try_get(
1540                chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1541                compat_str)
1542            chapters.append({
1543                'start_time': start_time,
1544                'end_time': end_time,
1545                'title': title,
1546            })
1547        return chapters
1548
1549    @staticmethod
1550    def _extract_chapters_from_description(description, duration):
1551        if not description:
1552            return None
1553        chapter_lines = re.findall(
1554            r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1555            description)
1556        if not chapter_lines:
1557            return None
1558        chapters = []
1559        for next_num, (chapter_line, time_point) in enumerate(
1560                chapter_lines, start=1):
1561            start_time = parse_duration(time_point)
1562            if start_time is None:
1563                continue
1564            if start_time > duration:
1565                break
1566            end_time = (duration if next_num == len(chapter_lines)
1567                        else parse_duration(chapter_lines[next_num][1]))
1568            if end_time is None:
1569                continue
1570            if end_time > duration:
1571                end_time = duration
1572            if start_time > end_time:
1573                break
1574            chapter_title = re.sub(
1575                r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1576            chapter_title = re.sub(r'\s+', ' ', chapter_title)
1577            chapters.append({
1578                'start_time': start_time,
1579                'end_time': end_time,
1580                'title': chapter_title,
1581            })
1582        return chapters
1583
1584    def _extract_chapters(self, webpage, description, video_id, duration):
1585        return (self._extract_chapters_from_json(webpage, video_id, duration)
1586                or self._extract_chapters_from_description(description, duration))
1587
1588    def _real_extract(self, url):
1589        url, smuggled_data = unsmuggle_url(url, {})
1590
1591        proto = (
1592            'http' if self._downloader.params.get('prefer_insecure', False)
1593            else 'https')
1594
1595        start_time = None
1596        end_time = None
1597        parsed_url = compat_urllib_parse_urlparse(url)
1598        for component in [parsed_url.fragment, parsed_url.query]:
1599            query = compat_parse_qs(component)
1600            if start_time is None and 't' in query:
1601                start_time = parse_duration(query['t'][0])
1602            if start_time is None and 'start' in query:
1603                start_time = parse_duration(query['start'][0])
1604            if end_time is None and 'end' in query:
1605                end_time = parse_duration(query['end'][0])
1606
1607        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1608        mobj = re.search(self._NEXT_URL_RE, url)
1609        if mobj:
1610            url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1611        video_id = self.extract_id(url)
1612
1613        # Check url is youtube music
1614        is_music = 1 #re.match(r'^https?:\/\/music\.youtube\.com\/.+', url) is not None
1615
1616        # Get video webpage
1617        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1618        video_webpage, urlh = self._download_webpage_handle(url, video_id)
1619
1620        qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1621        video_id = qs.get('v', [None])[0] or video_id
1622
1623        # Attempt to extract SWF player URL
1624        mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1625        if mobj is not None:
1626            player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1627        else:
1628            player_url = None
1629
1630        dash_mpds = []
1631
1632        def add_dash_mpd(video_info):
1633            dash_mpd = video_info.get('dashmpd')
1634            if dash_mpd and dash_mpd[0] not in dash_mpds:
1635                dash_mpds.append(dash_mpd[0])
1636
1637        def add_dash_mpd_pr(pl_response):
1638            dash_mpd = url_or_none(try_get(
1639                pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1640                compat_str))
1641            if dash_mpd and dash_mpd not in dash_mpds:
1642                dash_mpds.append(dash_mpd)
1643
1644        is_live = None
1645        view_count = None
1646
1647        def extract_view_count(v_info):
1648            return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1649
1650        def extract_player_response(player_response, video_id):
1651            pl_response = str_or_none(player_response)
1652            if not pl_response:
1653                return
1654            pl_response = self._parse_json(pl_response, video_id, fatal=False)
1655            if isinstance(pl_response, dict):
1656                add_dash_mpd_pr(pl_response)
1657                return pl_response
1658
1659        player_response = {}
1660
1661        # Get video info
1662        video_info = {}
1663        embed_webpage = None
1664        ytplayer_config = None
1665		
1666        # Youtube music should be parsed from get_video_info
1667        # instead of youtube for 256kbps aac codec
1668        if is_music or re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
1669            age_gate = True
1670            # We simulate the access to the video from www.youtube.com/v/{video_id}
1671            # this can be viewed without login into Youtube
1672            url = proto + '://www.youtube.com/embed/%s' % video_id
1673            embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1674
1675            if is_music:
1676                # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1677                # maybe paramter of youtube music player?
1678                data = compat_urllib_parse_urlencode({
1679                    'video_id': video_id,
1680                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1681                    'el': 'detailpage',
1682                    'c': 'WEB_REMIX',
1683                    'cver': '0.1',
1684                    'cplayer': 'UNIPLAYER',
1685                    'sts': self._search_regex(
1686                        r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1687                })
1688            else:
1689                # Remove youtube music parameter for normal video
1690                data = compat_urllib_parse_urlencode({
1691                    'video_id': video_id,
1692                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1693                    'sts': self._search_regex(
1694                        r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1695                })
1696            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1697            try:
1698                video_info_webpage = self._download_webpage(
1699                    video_info_url, video_id,
1700                    note='Refetching age-gated info webpage',
1701                    errnote='unable to download video info webpage')
1702            except ExtractorError:
1703                video_info_webpage = None
1704            if video_info_webpage:
1705                video_info = compat_parse_qs(video_info_webpage)
1706                pl_response = video_info.get('player_response', [None])[0]
1707                player_response = extract_player_response(pl_response, video_id)
1708                add_dash_mpd(video_info)
1709                view_count = extract_view_count(video_info)
1710        else:
1711            age_gate = False
1712            # Try looking directly into the video webpage
1713            ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1714            if ytplayer_config:
1715                args = ytplayer_config['args']
1716                if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1717                    # Convert to the same format returned by compat_parse_qs
1718                    video_info = dict((k, [v]) for k, v in args.items())
1719                    add_dash_mpd(video_info)
1720                # Rental video is not rented but preview is available (e.g.
1721                # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1722                # https://github.com/ytdl-org/youtube-dl/issues/10532)
1723                if not video_info and args.get('ypc_vid'):
1724                    return self.url_result(
1725                        args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1726                if args.get('livestream') == '1' or args.get('live_playback') == 1:
1727                    is_live = True
1728                if not player_response:
1729                    player_response = extract_player_response(args.get('player_response'), video_id)
1730            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1731                add_dash_mpd_pr(player_response)
1732
1733        if not video_info and not player_response:
1734            player_response = extract_player_response(
1735                self._search_regex(
1736                    (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1737                     self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1738                    'initial player response', default='{}'),
1739                video_id)
1740
1741        def extract_unavailable_message():
1742            messages = []
1743            for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1744                msg = self._html_search_regex(
1745                    r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1746                    video_webpage, 'unavailable %s' % kind, default=None)
1747                if msg:
1748                    messages.append(msg)
1749            if messages:
1750                return '\n'.join(messages)
1751
1752        if not video_info and not player_response:
1753            unavailable_message = extract_unavailable_message()
1754            if not unavailable_message:
1755                unavailable_message = 'Unable to extract video data'
1756            raise ExtractorError(
1757                'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1758
1759        if not isinstance(video_info, dict):
1760            video_info = {}
1761
1762        video_details = try_get(
1763            player_response, lambda x: x['videoDetails'], dict) or {}
1764
1765        microformat = try_get(
1766            player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1767
1768        video_title = video_info.get('title', [None])[0] or video_details.get('title')
1769        if not video_title:
1770            self._downloader.report_warning('Unable to extract video title')
1771            video_title = '_'
1772
1773        description_original = video_description = get_element_by_id("eow-description", video_webpage)
1774        if video_description:
1775
1776            def replace_url(m):
1777                redir_url = compat_urlparse.urljoin(url, m.group(1))
1778                parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1779                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1780                    qs = compat_parse_qs(parsed_redir_url.query)
1781                    q = qs.get('q')
1782                    if q and q[0]:
1783                        return q[0]
1784                return redir_url
1785
1786            description_original = video_description = re.sub(r'''(?x)
1787                <a\s+
1788                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
1789                    (?:title|href)="([^"]+)"\s+
1790                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
1791                    class="[^"]*"[^>]*>
1792                [^<]+\.{3}\s*
1793                </a>
1794            ''', replace_url, video_description)
1795            video_description = clean_html(video_description)
1796        else:
1797            video_description = video_details.get('shortDescription')
1798            if video_description is None:
1799                video_description = self._html_search_meta('description', video_webpage)
1800
1801        if not smuggled_data.get('force_singlefeed', False):
1802            if not self._downloader.params.get('noplaylist'):
1803                multifeed_metadata_list = try_get(
1804                    player_response,
1805                    lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1806                    compat_str) or try_get(
1807                    video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1808                if multifeed_metadata_list:
1809                    entries = []
1810                    feed_ids = []
1811                    for feed in multifeed_metadata_list.split(','):
1812                        # Unquote should take place before split on comma (,) since textual
1813                        # fields may contain comma as well (see
1814                        # https://github.com/ytdl-org/youtube-dl/issues/8536)
1815                        feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1816
1817                        def feed_entry(name):
1818                            return try_get(feed_data, lambda x: x[name][0], compat_str)
1819
1820                        feed_id = feed_entry('id')
1821                        if not feed_id:
1822                            continue
1823                        feed_title = feed_entry('title')
1824                        title = video_title
1825                        if feed_title:
1826                            title += ' (%s)' % feed_title
1827                        entries.append({
1828                            '_type': 'url_transparent',
1829                            'ie_key': 'Youtube',
1830                            'url': smuggle_url(
1831                                '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1832                                {'force_singlefeed': True}),
1833                            'title': title,
1834                        })
1835                        feed_ids.append(feed_id)
1836                    self.to_screen(
1837                        'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1838                        % (', '.join(feed_ids), video_id))
1839                    return self.playlist_result(entries, video_id, video_title, video_description)
1840            else:
1841                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1842
1843        if view_count is None:
1844            view_count = extract_view_count(video_info)
1845        if view_count is None and video_details:
1846            view_count = int_or_none(video_details.get('viewCount'))
1847        if view_count is None and microformat:
1848            view_count = int_or_none(microformat.get('viewCount'))
1849
1850        if is_live is None:
1851            is_live = bool_or_none(video_details.get('isLive'))
1852
1853        # Check for "rental" videos
1854        if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1855            raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1856
1857        def _extract_filesize(media_url):
1858            return int_or_none(self._search_regex(
1859                r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1860
1861        streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1862        streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1863
1864        if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1865            self.report_rtmp_download()
1866            formats = [{
1867                'format_id': '_rtmp',
1868                'protocol': 'rtmp',
1869                'url': video_info['conn'][0],
1870                'player_url': player_url,
1871            }]
1872        elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1873            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1874            if 'rtmpe%3Dyes' in encoded_url_map:
1875                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1876            formats = []
1877            formats_spec = {}
1878            fmt_list = video_info.get('fmt_list', [''])[0]
1879            if fmt_list:
1880                for fmt in fmt_list.split(','):
1881                    spec = fmt.split('/')
1882                    if len(spec) > 1:
1883                        width_height = spec[1].split('x')
1884                        if len(width_height) == 2:
1885                            formats_spec[spec[0]] = {
1886                                'resolution': spec[1],
1887                                'width': int_or_none(width_height[0]),
1888                                'height': int_or_none(width_height[1]),
1889                            }
1890            for fmt in streaming_formats:
1891                itag = str_or_none(fmt.get('itag'))
1892                if not itag:
1893                    continue
1894                quality = fmt.get('quality')
1895                quality_label = fmt.get('qualityLabel') or quality
1896                formats_spec[itag] = {
1897                    'asr': int_or_none(fmt.get('audioSampleRate')),
1898                    'filesize': int_or_none(fmt.get('contentLength')),
1899                    'format_note': quality_label,
1900                    'fps': int_or_none(fmt.get('fps')),
1901                    'height': int_or_none(fmt.get('height')),
1902                    # bitrate for itag 43 is always 2147483647
1903                    'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1904                    'width': int_or_none(fmt.get('width')),
1905                }
1906
1907            for fmt in streaming_formats:
1908                if fmt.get('drmFamilies') or fmt.get('drm_families'):
1909                    continue
1910                url = url_or_none(fmt.get('url'))
1911
1912                if not url:
1913                    cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1914                    if not cipher:
1915                        continue
1916                    url_data = compat_parse_qs(cipher)
1917                    url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1918                    if not url:
1919                        continue
1920                else:
1921                    cipher = None
1922                    url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1923
1924                stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1925                # Unsupported FORMAT_STREAM_TYPE_OTF
1926                if stream_type == 3:
1927                    continue
1928
1929                format_id = fmt.get('itag') or url_data['itag'][0]
1930                if not format_id:
1931                    continue
1932                format_id = compat_str(format_id)
1933
1934                if cipher:
1935                    if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1936                        ASSETS_RE = (
1937                            r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
1938                            r'"jsUrl"\s*:\s*("[^"]+")',
1939                            r'"assets":.+?"js":\s*("[^"]+")')
1940                        jsplayer_url_json = self._search_regex(
1941                            ASSETS_RE,
1942                            embed_webpage if age_gate else video_webpage,
1943                            'JS player URL (1)', default=None)
1944                        if not jsplayer_url_json and not age_gate:
1945                            # We need the embed website after all
1946                            if embed_webpage is None:
1947                                embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1948                                embed_webpage = self._download_webpage(
1949                                    embed_url, video_id, 'Downloading embed webpage')
1950                            jsplayer_url_json = self._search_regex(
1951                                ASSETS_RE, embed_webpage, 'JS player URL')
1952
1953                        player_url = json.loads(jsplayer_url_json)
1954                        if player_url is None:
1955                            player_url_json = self._search_regex(
1956                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1957                                video_webpage, 'age gate player URL')
1958                            player_url = json.loads(player_url_json)
1959
1960                    if 'sig' in url_data:
1961                        url += '&signature=' + url_data['sig'][0]
1962                    elif 's' in url_data:
1963                        encrypted_sig = url_data['s'][0]
1964
1965                        if self._downloader.params.get('verbose'):
1966                            if player_url is None:
1967                                player_desc = 'unknown'
1968                            else:
1969                                player_type, player_version = self._extract_player_info(player_url)
1970                                player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
1971                            parts_sizes = self._signature_cache_id(encrypted_sig)
1972                            self.to_screen('{%s} signature length %s, %s' %
1973                                           (format_id, parts_sizes, player_desc))
1974
1975                        signature = self._decrypt_signature(
1976                            encrypted_sig, video_id, player_url, age_gate)
1977                        sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
1978                        url += '&%s=%s' % (sp, signature)
1979                if 'ratebypass' not in url:
1980                    url += '&ratebypass=yes'
1981
1982                dct = {
1983                    'format_id': format_id,
1984                    'url': url,
1985                    'player_url': player_url,
1986                }
1987                if format_id in self._formats:
1988                    dct.update(self._formats[format_id])
1989                if format_id in formats_spec:
1990                    dct.update(formats_spec[format_id])
1991
1992                # Some itags are not included in DASH manifest thus corresponding formats will
1993                # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
1994                # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1995                mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1996                width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1997
1998                if width is None:
1999                    width = int_or_none(fmt.get('width'))
2000                if height is None:
2001                    height = int_or_none(fmt.get('height'))
2002
2003                filesize = int_or_none(url_data.get(
2004                    'clen', [None])[0]) or _extract_filesize(url)
2005
2006                quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2007                quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2008
2009                tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2010                       or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2011                fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2012
2013                more_fields = {
2014                    'filesize': filesize,
2015                    'tbr': tbr,
2016                    'width': width,
2017                    'height': height,
2018                    'fps': fps,
2019                    'format_note': quality_label or quality,
2020                }
2021                for key, value in more_fields.items():
2022                    if value:
2023                        dct[key] = value
2024                type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2025                if type_:
2026                    type_split = type_.split(';')
2027                    kind_ext = type_split[0].split('/')
2028                    if len(kind_ext) == 2:
2029                        kind, _ = kind_ext
2030                        dct['ext'] = mimetype2ext(type_split[0])
2031                        if kind in ('audio', 'video'):
2032                            codecs = None
2033                            for mobj in re.finditer(
2034                                    r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2035                                if mobj.group('key') == 'codecs':
2036                                    codecs = mobj.group('val')
2037                                    break
2038                            if codecs:
2039                                dct.update(parse_codecs(codecs))
2040                if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2041                    dct['downloader_options'] = {
2042                        # Youtube throttles chunks >~10M
2043                        'http_chunk_size': 10485760,
2044                    }
2045                formats.append(dct)
2046        else:
2047            manifest_url = (
2048                url_or_none(try_get(
2049                    player_response,
2050                    lambda x: x['streamingData']['hlsManifestUrl'],
2051                    compat_str))
2052                or url_or_none(try_get(
2053                    video_info, lambda x: x['hlsvp'][0], compat_str)))
2054            if manifest_url:
2055                formats = []
2056                m3u8_formats = self._extract_m3u8_formats(
2057                    manifest_url, video_id, 'mp4', fatal=False)
2058                for a_format in m3u8_formats:
2059                    itag = self._search_regex(
2060                        r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2061                    if itag:
2062                        a_format['format_id'] = itag
2063                        if itag in self._formats:
2064                            dct = self._formats[itag].copy()
2065                            dct.update(a_format)
2066                            a_format = dct
2067                    a_format['player_url'] = player_url
2068                    # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2069                    a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2070                    formats.append(a_format)
2071            else:
2072                error_message = extract_unavailable_message()
2073                if not error_message:
2074                    reason_list = try_get(
2075                        player_response,
2076                        lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2077                        list) or []
2078                    for reason in reason_list:
2079                        if not isinstance(reason, dict):
2080                            continue
2081                        reason_text = try_get(reason, lambda x: x['text'], compat_str)
2082                        if reason_text:
2083                            if not error_message:
2084                                error_message = ''
2085                            error_message += reason_text
2086                    if error_message:
2087                        error_message = clean_html(error_message)
2088                if not error_message:
2089                    error_message = clean_html(try_get(
2090                        player_response, lambda x: x['playabilityStatus']['reason'],
2091                        compat_str))
2092                if not error_message:
2093                    error_message = clean_html(
2094                        try_get(video_info, lambda x: x['reason'][0], compat_str))
2095                if error_message:
2096                    raise ExtractorError(error_message, expected=True)
2097                raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2098
2099        # uploader
2100        video_uploader = try_get(
2101            video_info, lambda x: x['author'][0],
2102            compat_str) or str_or_none(video_details.get('author'))
2103        if video_uploader:
2104            video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2105        else:
2106            self._downloader.report_warning('unable to extract uploader name')
2107
2108        # uploader_id
2109        video_uploader_id = None
2110        video_uploader_url = None
2111        mobj = re.search(
2112            r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2113            video_webpage)
2114        if mobj is not None:
2115            video_uploader_id = mobj.group('uploader_id')
2116            video_uploader_url = mobj.group('uploader_url')
2117        else:
2118            owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2119            if owner_profile_url:
2120                video_uploader_id = self._search_regex(
2121                    r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2122                    default=None)
2123                video_uploader_url = owner_profile_url
2124
2125        channel_id = (
2126            str_or_none(video_details.get('channelId'))
2127            or self._html_search_meta(
2128                'channelId', video_webpage, 'channel id', default=None)
2129            or self._search_regex(
2130                r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2131                video_webpage, 'channel id', default=None, group='id'))
2132        channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2133
2134        thumbnails = []
2135        thumbnails_list = try_get(
2136            video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2137        for t in thumbnails_list:
2138            if not isinstance(t, dict):
2139                continue
2140            thumbnail_url = url_or_none(t.get('url'))
2141            if not thumbnail_url:
2142                continue
2143            thumbnails.append({
2144                'url': thumbnail_url,
2145                'width': int_or_none(t.get('width')),
2146                'height': int_or_none(t.get('height')),
2147            })
2148
2149        if not thumbnails:
2150            video_thumbnail = None
2151            # We try first to get a high quality image:
2152            m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2153                                video_webpage, re.DOTALL)
2154            if m_thumb is not None:
2155                video_thumbnail = m_thumb.group(1)
2156            thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2157            if thumbnail_url:
2158                video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2159            if video_thumbnail:
2160                thumbnails.append({'url': video_thumbnail})
2161
2162        # upload date
2163        upload_date = self._html_search_meta(
2164            'datePublished', video_webpage, 'upload date', default=None)
2165        if not upload_date:
2166            upload_date = self._search_regex(
2167                [r'(?s)id="eow-date.*?>(.*?)</span>',
2168                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2169                video_webpage, 'upload date', default=None)
2170        if not upload_date:
2171            upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2172        upload_date = unified_strdate(upload_date)
2173
2174        video_license = self._html_search_regex(
2175            r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2176            video_webpage, 'license', default=None)
2177
2178        m_music = re.search(
2179            r'''(?x)
2180                <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2181                <ul[^>]*>\s*
2182                <li>(?P<title>.+?)
2183                by (?P<creator>.+?)
2184                (?:
2185                    \(.+?\)|
2186                    <a[^>]*
2187                        (?:
2188                            \bhref=["\']/red[^>]*>|             # drop possible
2189                            >\s*Listen ad-free with YouTube Red # YouTube Red ad
2190                        )
2191                    .*?
2192                )?</li
2193            ''',
2194            video_webpage)
2195        if m_music:
2196            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2197            video_creator = clean_html(m_music.group('creator'))
2198        else:
2199            video_alt_title = video_creator = None
2200
2201        def extract_meta(field):
2202            return self._html_search_regex(
2203                r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2204                video_webpage, field, default=None)
2205
2206        track = extract_meta('Song')
2207        artist = extract_meta('Artist')
2208        album = extract_meta('Album')
2209
2210        # Youtube Music Auto-generated description
2211        release_date = release_year = None
2212        if video_description:
2213            mobj = re.search(r'(?s)(?P<track>[^Â·\n]+)Â·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?â„—\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2214            if mobj:
2215                if not track:
2216                    track = mobj.group('track').strip()
2217                if not artist:
2218                    artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('Â·'))
2219                if not album:
2220                    album = mobj.group('album'.strip())
2221                release_year = mobj.group('release_year')
2222                release_date = mobj.group('release_date')
2223                if release_date:
2224                    release_date = release_date.replace('-', '')
2225                    if not release_year:
2226                        release_year = int(release_date[:4])
2227                if release_year:
2228                    release_year = int(release_year)
2229
2230        yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2231        contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2232        for content in contents:
2233            rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2234            multiple_songs = False
2235            for row in rows:
2236                if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2237                    multiple_songs = True
2238                    break
2239            for row in rows:
2240                mrr = row.get('metadataRowRenderer') or {}
2241                mrr_title = try_get(
2242                    mrr, lambda x: x['title']['simpleText'], compat_str)
2243                mrr_contents = try_get(
2244                    mrr, lambda x: x['contents'][0], dict) or {}
2245                mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2246                if not (mrr_title and mrr_contents_text):
2247                    continue
2248                if mrr_title == 'License':
2249                    video_license = mrr_contents_text
2250                elif not multiple_songs:
2251                    if mrr_title == 'Album':
2252                        album = mrr_contents_text
2253                    elif mrr_title == 'Artist':
2254                        artist = mrr_contents_text
2255                    elif mrr_title == 'Song':
2256                        track = mrr_contents_text
2257
2258        m_episode = re.search(
2259            r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*â€¢\s*E(?P<episode>\d+)</span>',
2260            video_webpage)
2261        if m_episode:
2262            series = unescapeHTML(m_episode.group('series'))
2263            season_number = int(m_episode.group('season'))
2264            episode_number = int(m_episode.group('episode'))
2265        else:
2266            series = season_number = episode_number = None
2267
2268        m_cat_container = self._search_regex(
2269            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2270            video_webpage, 'categories', default=None)
2271        category = None
2272        if m_cat_container:
2273            category = self._html_search_regex(
2274                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2275                default=None)
2276        if not category:
2277            category = try_get(
2278                microformat, lambda x: x['category'], compat_str)
2279        video_categories = None if category is None else [category]
2280
2281        video_tags = [
2282            unescapeHTML(m.group('content'))
2283            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2284        if not video_tags:
2285            video_tags = try_get(video_details, lambda x: x['keywords'], list)
2286
2287        def _extract_count(count_name):
2288            return str_to_int(self._search_regex(
2289                (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2290                 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2291                video_webpage, count_name, default=None))
2292
2293        like_count = _extract_count('like')
2294        dislike_count = _extract_count('dislike')
2295
2296        if view_count is None:
2297            view_count = str_to_int(self._search_regex(
2298                r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2299                'view count', default=None))
2300
2301        average_rating = (
2302            float_or_none(video_details.get('averageRating'))
2303            or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2304
2305        # subtitles
2306        video_subtitles = self.extract_subtitles(video_id, video_webpage)
2307        automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
2308
2309        video_duration = try_get(
2310            video_info, lambda x: int_or_none(x['length_seconds'][0]))
2311        if not video_duration:
2312            video_duration = int_or_none(video_details.get('lengthSeconds'))
2313        if not video_duration:
2314            video_duration = parse_duration(self._html_search_meta(
2315                'duration', video_webpage, 'video duration'))
2316
2317        # annotations
2318        video_annotations = None
2319        if self._downloader.params.get('writeannotations', False):
2320            xsrf_token = None
2321            ytcfg = self._extract_ytcfg(video_id, video_webpage)
2322            if ytcfg:
2323                xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2324            if not xsrf_token:
2325                xsrf_token = self._search_regex(
2326                    r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2327                    video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2328            invideo_url = try_get(
2329                player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2330            if xsrf_token and invideo_url:
2331                xsrf_field_name = None
2332                if ytcfg:
2333                    xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2334                if not xsrf_field_name:
2335                    xsrf_field_name = self._search_regex(
2336                        r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2337                        video_webpage, 'xsrf field name',
2338                        group='xsrf_field_name', default='session_token')
2339                video_annotations = self._download_webpage(
2340                    self._proto_relative_url(invideo_url),
2341                    video_id, note='Downloading annotations',
2342                    errnote='Unable to download video annotations', fatal=False,
2343                    data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2344
2345        chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2346
2347        # Look for the DASH manifest
2348        if self._downloader.params.get('youtube_include_dash_manifest', True):
2349            dash_mpd_fatal = True
2350            for mpd_url in dash_mpds:
2351                dash_formats = {}
2352                try:
2353                    def decrypt_sig(mobj):
2354                        s = mobj.group(1)
2355                        dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2356                        return '/signature/%s' % dec_s
2357
2358                    mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2359
2360                    for df in self._extract_mpd_formats(
2361                            mpd_url, video_id, fatal=dash_mpd_fatal,
2362                            formats_dict=self._formats):
2363                        if not df.get('filesize'):
2364                            df['filesize'] = _extract_filesize(df['url'])
2365                        # Do not overwrite DASH format found in some previous DASH manifest
2366                        if df['format_id'] not in dash_formats:
2367                            dash_formats[df['format_id']] = df
2368                        # Additional DASH manifests may end up in HTTP Error 403 therefore
2369                        # allow them to fail without bug report message if we already have
2370                        # some DASH manifest succeeded. This is temporary workaround to reduce
2371                        # burst of bug reports until we figure out the reason and whether it
2372                        # can be fixed at all.
2373                        dash_mpd_fatal = False
2374                except (ExtractorError, KeyError) as e:
2375                    self.report_warning(
2376                        'Skipping DASH manifest: %r' % e, video_id)
2377                if dash_formats:
2378                    # Remove the formats we found through non-DASH, they
2379                    # contain less info and it can be wrong, because we use
2380                    # fixed values (for example the resolution). See
2381                    # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2382                    # example.
2383                    formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2384                    formats.extend(dash_formats.values())
2385
2386        # Check for malformed aspect ratio
2387        stretched_m = re.search(
2388            r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2389            video_webpage)
2390        if stretched_m:
2391            w = float(stretched_m.group('w'))
2392            h = float(stretched_m.group('h'))
2393            # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2394            # We will only process correct ratios.
2395            if w > 0 and h > 0:
2396                ratio = w / h
2397                for f in formats:
2398                    if f.get('vcodec') != 'none':
2399                        f['stretched_ratio'] = ratio
2400
2401        if not formats:
2402            if 'reason' in video_info:
2403                if 'The uploader has not made this video available in your country.' in video_info['reason']:
2404                    regions_allowed = self._html_search_meta(
2405                        'regionsAllowed', video_webpage, default=None)
2406                    countries = regions_allowed.split(',') if regions_allowed else None
2407                    self.raise_geo_restricted(
2408                        msg=video_info['reason'][0], countries=countries)
2409                reason = video_info['reason'][0]
2410                if 'Invalid parameters' in reason:
2411                    unavailable_message = extract_unavailable_message()
2412                    if unavailable_message:
2413                        reason = unavailable_message
2414                raise ExtractorError(
2415                    'YouTube said: %s' % reason,
2416                    expected=True, video_id=video_id)
2417            if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2418                raise ExtractorError('This video is DRM protected.', expected=True)
2419
2420        self._sort_formats(formats)
2421
2422        self.mark_watched(video_id, video_info, player_response)
2423
2424        return {
2425            'id': video_id,
2426            'uploader': video_uploader,
2427            'uploader_id': video_uploader_id,
2428            'uploader_url': video_uploader_url,
2429            'channel_id': channel_id,
2430            'channel_url': channel_url,
2431            'upload_date': upload_date,
2432            'license': video_license,
2433            'creator': video_creator or artist,
2434            'title': video_title,
2435            'alt_title': video_alt_title or track,
2436            'thumbnails': thumbnails,
2437            'description': video_description,
2438            'categories': video_categories,
2439            'tags': video_tags,
2440            'subtitles': video_subtitles,
2441            'automatic_captions': automatic_captions,
2442            'duration': video_duration,
2443            'age_limit': 18 if age_gate else 0,
2444            'annotations': video_annotations,
2445            'chapters': chapters,
2446            'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2447            'view_count': view_count,
2448            'like_count': like_count,
2449            'dislike_count': dislike_count,
2450            'average_rating': average_rating,
2451            'formats': formats,
2452            'is_live': is_live,
2453            'start_time': start_time,
2454            'end_time': end_time,
2455            'series': series,
2456            'season_number': season_number,
2457            'episode_number': episode_number,
2458            'track': track,
2459            'artist': artist,
2460            'album': album,
2461            'release_date': release_date,
2462            'release_year': release_year,
2463        }
2464
2465
2466class YoutubeTabIE(YoutubeBaseInfoExtractor):
2467    IE_DESC = 'YouTube.com tab'
2468    _VALID_URL = r'''(?x)
2469                    https?://
2470                        (?:\w+\.)?
2471                        (?:
2472                            youtube(?:kids)?\.com|
2473                            invidio\.us
2474                        )/
2475                        (?:
2476                            (?:channel|c|user|feed)/|
2477                            (?:playlist|watch)\?.*?\blist=|
2478                            (?!(?:watch|embed|v|e)\b)
2479                        )
2480                        (?P<id>[^/?\#&]+)
2481                    '''
2482    IE_NAME = 'youtube:tab'
2483
2484    _TESTS = [{
2485        # playlists, multipage
2486        'url': 'https://www.youtube.com/c/Ð˜Ð³Ð¾Ñ€ÑŒÐšÐ»ÐµÐ¹Ð½ÐµÑ€/playlists?view=1&flow=grid',
2487        'playlist_mincount': 94,
2488        'info_dict': {
2489            'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2490            'title': 'Ð˜Ð³Ð¾Ñ€ÑŒ ÐšÐ»ÐµÐ¹Ð½ÐµÑ€ - Playlists',
2491            'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2492        },
2493    }, {
2494        # playlists, multipage, different order
2495        'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2496        'playlist_mincount': 94,
2497        'info_dict': {
2498            'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2499            'title': 'Ð˜Ð³Ð¾Ñ€ÑŒ ÐšÐ»ÐµÐ¹Ð½ÐµÑ€ - Playlists',
2500            'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2501        },
2502    }, {
2503        # playlists, singlepage
2504        'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2505        'playlist_mincount': 4,
2506        'info_dict': {
2507            'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2508            'title': 'ThirstForScience - Playlists',
2509            'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2510        }
2511    }, {
2512        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2513        'only_matching': True,
2514    }, {
2515        # basic, single video playlist
2516        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2517        'info_dict': {
2518            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2519            'uploader': 'Sergey M.',
2520            'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2521            'title': 'youtube-dl public playlist',
2522        },
2523        'playlist_count': 1,
2524    }, {
2525        # empty playlist
2526        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2527        'info_dict': {
2528            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2529            'uploader': 'Sergey M.',
2530            'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2531            'title': 'youtube-dl empty playlist',
2532        },
2533        'playlist_count': 0,
2534    }, {
2535        # Home tab
2536        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2537        'info_dict': {
2538            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2539            'title': 'lex will - Home',
2540            'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2541        },
2542        'playlist_mincount': 2,
2543    }, {
2544        # Videos tab
2545        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2546        'info_dict': {
2547            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2548            'title': 'lex will - Videos',
2549            'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2550        },
2551        'playlist_mincount': 975,
2552    }, {
2553        # Videos tab, sorted by popular
2554        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2555        'info_dict': {
2556            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2557            'title': 'lex will - Videos',
2558            'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2559        },
2560        'playlist_mincount': 199,
2561    }, {
2562        # Playlists tab
2563        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2564        'info_dict': {
2565            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2566            'title': 'lex will - Playlists',
2567            'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2568        },
2569        'playlist_mincount': 17,
2570    }, {
2571        # Community tab
2572        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2573        'info_dict': {
2574            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2575            'title': 'lex will - Community',
2576            'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2577        },
2578        'playlist_mincount': 18,
2579    }, {
2580        # Channels tab
2581        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2582        'info_dict': {
2583            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2584            'title': 'lex will - Channels',
2585            'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2586        },
2587        'playlist_mincount': 138,
2588    }, {
2589        'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2590        'only_matching': True,
2591    }, {
2592        'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2593        'only_matching': True,
2594    }, {
2595        'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2596        'only_matching': True,
2597    }, {
2598        'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2599        'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2600        'info_dict': {
2601            'title': '29C3: Not my department',
2602            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2603            'uploader': 'Christiaan008',
2604            'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2605        },
2606        'playlist_count': 96,
2607    }, {
2608        'note': 'Large playlist',
2609        'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2610        'info_dict': {
2611            'title': 'Uploads from Cauchemar',
2612            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2613            'uploader': 'Cauchemar',
2614            'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2615        },
2616        'playlist_mincount': 1123,
2617    }, {
2618        # even larger playlist, 8832 videos
2619        'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2620        'only_matching': True,
2621    }, {
2622        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2623        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2624        'info_dict': {
2625            'title': 'Uploads from Interstellar Movie',
2626            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2627            'uploader': 'Interstellar Movie',
2628            'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2629        },
2630        'playlist_mincount': 21,
2631    }, {
2632        # https://github.com/ytdl-org/youtube-dl/issues/21844
2633        'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2634        'info_dict': {
2635            'title': 'Data Analysis with Dr Mike Pound',
2636            'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2637            'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2638            'uploader': 'Computerphile',
2639        },
2640        'playlist_mincount': 11,
2641    }, {
2642        'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2643        'only_matching': True,
2644    }, {
2645        # Playlist URL that does not actually serve a playlist
2646        'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2647        'info_dict': {
2648            'id': 'FqZTN594JQw',
2649            'ext': 'webm',
2650            'title': "Smiley's People 01 detective, Adventure Series, Action",
2651            'uploader': 'STREEM',
2652            'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2653            'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2654            'upload_date': '20150526',
2655            'license': 'Standard YouTube License',
2656            'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2657            'categories': ['People & Blogs'],
2658            'tags': list,
2659            'view_count': int,
2660            'like_count': int,
2661            'dislike_count': int,
2662        },
2663        'params': {
2664            'skip_download': True,
2665        },
2666        'skip': 'This video is not available.',
2667        'add_ie': [YoutubeIE.ie_key()],
2668    }, {
2669        'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2670        'only_matching': True,
2671    }, {
2672        'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2673        'only_matching': True,
2674    }, {
2675        'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2676        'info_dict': {
2677            'id': '9Auq9mYxFEE',
2678            'ext': 'mp4',
2679            'title': 'Watch Sky News live',
2680            'uploader': 'Sky News',
2681            'uploader_id': 'skynews',
2682            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2683            'upload_date': '20191102',
2684            'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2685            'categories': ['News & Politics'],
2686            'tags': list,
2687            'like_count': int,
2688            'dislike_count': int,
2689        },
2690        'params': {
2691            'skip_download': True,
2692        },
2693    }, {
2694        'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2695        'info_dict': {
2696            'id': 'a48o2S1cPoo',
2697            'ext': 'mp4',
2698            'title': 'The Young Turks - Live Main Show',
2699            'uploader': 'The Young Turks',
2700            'uploader_id': 'TheYoungTurks',
2701            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2702            'upload_date': '20150715',
2703            'license': 'Standard YouTube License',
2704            'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2705            'categories': ['News & Politics'],
2706            'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2707            'like_count': int,
2708            'dislike_count': int,
2709        },
2710        'params': {
2711            'skip_download': True,
2712        },
2713        'only_matching': True,
2714    }, {
2715        'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2716        'only_matching': True,
2717    }, {
2718        'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2719        'only_matching': True,
2720    }, {
2721        'url': 'https://www.youtube.com/feed/trending',
2722        'only_matching': True,
2723    }, {
2724        # needs auth
2725        'url': 'https://www.youtube.com/feed/library',
2726        'only_matching': True,
2727    }, {
2728        # needs auth
2729        'url': 'https://www.youtube.com/feed/history',
2730        'only_matching': True,
2731    }, {
2732        # needs auth
2733        'url': 'https://www.youtube.com/feed/subscriptions',
2734        'only_matching': True,
2735    }, {
2736        # needs auth
2737        'url': 'https://www.youtube.com/feed/watch_later',
2738        'only_matching': True,
2739    }, {
2740        # no longer available?
2741        'url': 'https://www.youtube.com/feed/recommended',
2742        'only_matching': True,
2743    }, {
2744        # inline playlist with not always working continuations
2745        'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2746        'only_matching': True,
2747    }, {
2748        'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2749        'only_matching': True,
2750    }, {
2751        'url': 'https://www.youtube.com/course',
2752        'only_matching': True,
2753    }, {
2754        'url': 'https://www.youtube.com/zsecurity',
2755        'only_matching': True,
2756    }, {
2757        'url': 'http://www.youtube.com/NASAgovVideo/videos',
2758        'only_matching': True,
2759    }, {
2760        'url': 'https://www.youtube.com/TheYoungTurks/live',
2761        'only_matching': True,
2762    }]
2763
2764    @classmethod
2765    def suitable(cls, url):
2766        return False if YoutubeIE.suitable(url) else super(
2767            YoutubeTabIE, cls).suitable(url)
2768
2769    def _extract_channel_id(self, webpage):
2770        channel_id = self._html_search_meta(
2771            'channelId', webpage, 'channel id', default=None)
2772        if channel_id:
2773            return channel_id
2774        channel_url = self._html_search_meta(
2775            ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2776             'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2777             'twitter:app:url:googleplay'), webpage, 'channel url')
2778        return self._search_regex(
2779            r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2780            channel_url, 'channel id')
2781
2782    @staticmethod
2783    def _extract_grid_item_renderer(item):
2784        for item_kind in ('Playlist', 'Video', 'Channel'):
2785            renderer = item.get('grid%sRenderer' % item_kind)
2786            if renderer:
2787                return renderer
2788
2789    def _extract_video(self, renderer):
2790        video_id = renderer.get('videoId')
2791        title = try_get(
2792            renderer,
2793            (lambda x: x['title']['runs'][0]['text'],
2794             lambda x: x['title']['simpleText']), compat_str)
2795        description = try_get(
2796            renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2797            compat_str)
2798        duration = parse_duration(try_get(
2799            renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2800        view_count_text = try_get(
2801            renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2802        view_count = str_to_int(self._search_regex(
2803            r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2804            'view count', default=None))
2805        uploader = try_get(
2806            renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2807        return {
2808            '_type': 'url_transparent',
2809            'ie_key': YoutubeIE.ie_key(),
2810            'id': video_id,
2811            'url': video_id,
2812            'title': title,
2813            'description': description,
2814            'duration': duration,
2815            'view_count': view_count,
2816            'uploader': uploader,
2817        }
2818
2819    def _grid_entries(self, grid_renderer):
2820        for item in grid_renderer['items']:
2821            if not isinstance(item, dict):
2822                continue
2823            renderer = self._extract_grid_item_renderer(item)
2824            if not isinstance(renderer, dict):
2825                continue
2826            title = try_get(
2827                renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2828            # playlist
2829            playlist_id = renderer.get('playlistId')
2830            if playlist_id:
2831                yield self.url_result(
2832                    'https://www.youtube.com/playlist?list=%s' % playlist_id,
2833                    ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2834                    video_title=title)
2835            # video
2836            video_id = renderer.get('videoId')
2837            if video_id:
2838                yield self._extract_video(renderer)
2839            # channel
2840            channel_id = renderer.get('channelId')
2841            if channel_id:
2842                title = try_get(
2843                    renderer, lambda x: x['title']['simpleText'], compat_str)
2844                yield self.url_result(
2845                    'https://www.youtube.com/channel/%s' % channel_id,
2846                    ie=YoutubeTabIE.ie_key(), video_title=title)
2847
2848    def _shelf_entries_from_content(self, shelf_renderer):
2849        content = shelf_renderer.get('content')
2850        if not isinstance(content, dict):
2851            return
2852        renderer = content.get('gridRenderer')
2853        if renderer:
2854            # TODO: add support for nested playlists so each shelf is processed
2855            # as separate playlist
2856            # TODO: this includes only first N items
2857            for entry in self._grid_entries(renderer):
2858                yield entry
2859        renderer = content.get('horizontalListRenderer')
2860        if renderer:
2861            # TODO
2862            pass
2863
2864    def _shelf_entries(self, shelf_renderer, skip_channels=False):
2865        ep = try_get(
2866            shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2867            compat_str)
2868        shelf_url = urljoin('https://www.youtube.com', ep)
2869        if shelf_url:
2870            # Skipping links to another channels, note that checking for
2871            # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2872            # will not work
2873            if skip_channels and '/channels?' in shelf_url:
2874                return
2875            title = try_get(
2876                shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2877            yield self.url_result(shelf_url, video_title=title)
2878        # Shelf may not contain shelf URL, fallback to extraction from content
2879        for entry in self._shelf_entries_from_content(shelf_renderer):
2880            yield entry
2881
2882    def _playlist_entries(self, video_list_renderer):
2883        for content in video_list_renderer['contents']:
2884            if not isinstance(content, dict):
2885                continue
2886            renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2887            if not isinstance(renderer, dict):
2888                continue
2889            video_id = renderer.get('videoId')
2890            if not video_id:
2891                continue
2892            yield self._extract_video(renderer)
2893
2894    def _video_entry(self, video_renderer):
2895        video_id = video_renderer.get('videoId')
2896        if video_id:
2897            return self._extract_video(video_renderer)
2898
2899    def _post_thread_entries(self, post_thread_renderer):
2900        post_renderer = try_get(
2901            post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2902        if not post_renderer:
2903            return
2904        # video attachment
2905        video_renderer = try_get(
2906            post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2907        video_id = None
2908        if video_renderer:
2909            entry = self._video_entry(video_renderer)
2910            if entry:
2911                yield entry
2912        # inline video links
2913        runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2914        for run in runs:
2915            if not isinstance(run, dict):
2916                continue
2917            ep_url = try_get(
2918                run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2919            if not ep_url:
2920                continue
2921            if not YoutubeIE.suitable(ep_url):
2922                continue
2923            ep_video_id = YoutubeIE._match_id(ep_url)
2924            if video_id == ep_video_id:
2925                continue
2926            yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2927
2928    def _post_thread_continuation_entries(self, post_thread_continuation):
2929        contents = post_thread_continuation.get('contents')
2930        if not isinstance(contents, list):
2931            return
2932        for content in contents:
2933            renderer = content.get('backstagePostThreadRenderer')
2934            if not isinstance(renderer, dict):
2935                continue
2936            for entry in self._post_thread_entries(renderer):
2937                yield entry
2938
2939    @staticmethod
2940    def _build_continuation_query(continuation, ctp=None):
2941        query = {
2942            'ctoken': continuation,
2943            'continuation': continuation,
2944        }
2945        if ctp:
2946            query['itct'] = ctp
2947        return query
2948
2949    @staticmethod
2950    def _extract_next_continuation_data(renderer):
2951        next_continuation = try_get(
2952            renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2953        if not next_continuation:
2954            return
2955        continuation = next_continuation.get('continuation')
2956        if not continuation:
2957            return
2958        ctp = next_continuation.get('clickTrackingParams')
2959        return YoutubeTabIE._build_continuation_query(continuation, ctp)
2960
2961    @classmethod
2962    def _extract_continuation(cls, renderer):
2963        next_continuation = cls._extract_next_continuation_data(renderer)
2964        if next_continuation:
2965            return next_continuation
2966        contents = renderer.get('contents')
2967        if not isinstance(contents, list):
2968            return
2969        for content in contents:
2970            if not isinstance(content, dict):
2971                continue
2972            continuation_ep = try_get(
2973                content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2974                dict)
2975            if not continuation_ep:
2976                continue
2977            continuation = try_get(
2978                continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2979            if not continuation:
2980                continue
2981            ctp = continuation_ep.get('clickTrackingParams')
2982            return YoutubeTabIE._build_continuation_query(continuation, ctp)
2983
2984    def _entries(self, tab, identity_token):
2985        tab_content = try_get(tab, lambda x: x['content'], dict)
2986        if not tab_content:
2987            return
2988        slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2989        if not slr_renderer:
2990            return
2991        is_channels_tab = tab.get('title') == 'Channels'
2992        continuation = None
2993        slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
2994        for slr_content in slr_contents:
2995            if not isinstance(slr_content, dict):
2996                continue
2997            is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
2998            if not is_renderer:
2999                continue
3000            isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3001            for isr_content in isr_contents:
3002                if not isinstance(isr_content, dict):
3003                    continue
3004                renderer = isr_content.get('playlistVideoListRenderer')
3005                if renderer:
3006                    for entry in self._playlist_entries(renderer):
3007                        yield entry
3008                    continuation = self._extract_continuation(renderer)
3009                    continue
3010                renderer = isr_content.get('gridRenderer')
3011                if renderer:
3012                    for entry in self._grid_entries(renderer):
3013                        yield entry
3014                    continuation = self._extract_continuation(renderer)
3015                    continue
3016                renderer = isr_content.get('shelfRenderer')
3017                if renderer:
3018                    for entry in self._shelf_entries(renderer, not is_channels_tab):
3019                        yield entry
3020                    continue
3021                renderer = isr_content.get('backstagePostThreadRenderer')
3022                if renderer:
3023                    for entry in self._post_thread_entries(renderer):
3024                        yield entry
3025                    continuation = self._extract_continuation(renderer)
3026                    continue
3027                renderer = isr_content.get('videoRenderer')
3028                if renderer:
3029                    entry = self._video_entry(renderer)
3030                    if entry:
3031                        yield entry
3032
3033            if not continuation:
3034                continuation = self._extract_continuation(is_renderer)
3035
3036        if not continuation:
3037            continuation = self._extract_continuation(slr_renderer)
3038
3039        headers = {
3040            'x-youtube-client-name': '1',
3041            'x-youtube-client-version': '2.20201112.04.01',
3042        }
3043        if identity_token:
3044            headers['x-youtube-identity-token'] = identity_token
3045
3046        for page_num in itertools.count(1):
3047            if not continuation:
3048                break
3049            count = 0
3050            retries = 3
3051            while count <= retries:
3052                try:
3053                    # Downloading page may result in intermittent 5xx HTTP error
3054                    # that is usually worked around with a retry
3055                    browse = self._download_json(
3056                        'https://www.youtube.com/browse_ajax', None,
3057                        'Downloading page %d%s'
3058                        % (page_num, ' (retry #%d)' % count if count else ''),
3059                        headers=headers, query=continuation)
3060                    break
3061                except ExtractorError as e:
3062                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3063                        count += 1
3064                        if count <= retries:
3065                            continue
3066                    raise
3067            if not browse:
3068                break
3069            response = try_get(browse, lambda x: x[1]['response'], dict)
3070            if not response:
3071                break
3072
3073            continuation_contents = try_get(
3074                response, lambda x: x['continuationContents'], dict)
3075            if continuation_contents:
3076                continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3077                if continuation_renderer:
3078                    for entry in self._playlist_entries(continuation_renderer):
3079                        yield entry
3080                    continuation = self._extract_continuation(continuation_renderer)
3081                    continue
3082                continuation_renderer = continuation_contents.get('gridContinuation')
3083                if continuation_renderer:
3084                    for entry in self._grid_entries(continuation_renderer):
3085                        yield entry
3086                    continuation = self._extract_continuation(continuation_renderer)
3087                    continue
3088                continuation_renderer = continuation_contents.get('itemSectionContinuation')
3089                if continuation_renderer:
3090                    for entry in self._post_thread_continuation_entries(continuation_renderer):
3091                        yield entry
3092                    continuation = self._extract_continuation(continuation_renderer)
3093                    continue
3094
3095            continuation_items = try_get(
3096                response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3097            if continuation_items:
3098                continuation_item = continuation_items[0]
3099                if not isinstance(continuation_item, dict):
3100                    continue
3101                renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3102                if renderer:
3103                    video_list_renderer = {'contents': continuation_items}
3104                    for entry in self._playlist_entries(video_list_renderer):
3105                        yield entry
3106                    continuation = self._extract_continuation(video_list_renderer)
3107                    continue
3108
3109            break
3110
3111    @staticmethod
3112    def _extract_selected_tab(tabs):
3113        for tab in tabs:
3114            if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3115                return tab['tabRenderer']
3116        else:
3117            raise ExtractorError('Unable to find selected tab')
3118
3119    @staticmethod
3120    def _extract_uploader(data):
3121        uploader = {}
3122        sidebar_renderer = try_get(
3123            data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3124        if sidebar_renderer:
3125            for item in sidebar_renderer:
3126                if not isinstance(item, dict):
3127                    continue
3128                renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3129                if not isinstance(renderer, dict):
3130                    continue
3131                owner = try_get(
3132                    renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3133                if owner:
3134                    uploader['uploader'] = owner.get('text')
3135                    uploader['uploader_id'] = try_get(
3136                        owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3137                    uploader['uploader_url'] = urljoin(
3138                        'https://www.youtube.com/',
3139                        try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3140        return uploader
3141
3142    @staticmethod
3143    def _extract_alert(data):
3144        alerts = []
3145        for alert in try_get(data, lambda x: x['alerts'], list) or []:
3146            if not isinstance(alert, dict):
3147                continue
3148            alert_text = try_get(
3149                alert, lambda x: x['alertRenderer']['text'], dict)
3150            if not alert_text:
3151                continue
3152            text = try_get(
3153                alert_text,
3154                (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
3155                compat_str)
3156            if text:
3157                alerts.append(text)
3158        return '\n'.join(alerts)
3159
3160    def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3161        selected_tab = self._extract_selected_tab(tabs)
3162        renderer = try_get(
3163            data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3164        playlist_id = title = description = None
3165        if renderer:
3166            channel_title = renderer.get('title') or item_id
3167            tab_title = selected_tab.get('title')
3168            title = channel_title or item_id
3169            if tab_title:
3170                title += ' - %s' % tab_title
3171            description = renderer.get('description')
3172            playlist_id = renderer.get('externalId')
3173        renderer = try_get(
3174            data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3175        if renderer:
3176            title = renderer.get('title')
3177            description = None
3178            playlist_id = item_id
3179        playlist = self.playlist_result(
3180            self._entries(selected_tab, identity_token),
3181            playlist_id=playlist_id, playlist_title=title,
3182            playlist_description=description)
3183        playlist.update(self._extract_uploader(data))
3184        return playlist
3185
3186    def _extract_from_playlist(self, item_id, url, data, playlist):
3187        title = playlist.get('title') or try_get(
3188            data, lambda x: x['titleText']['simpleText'], compat_str)
3189        playlist_id = playlist.get('playlistId') or item_id
3190        # Inline playlist rendition continuation does not always work
3191        # at Youtube side, so delegating regular tab-based playlist URL
3192        # processing whenever possible.
3193        playlist_url = urljoin(url, try_get(
3194            playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3195            compat_str))
3196        if playlist_url and playlist_url != url:
3197            return self.url_result(
3198                playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3199                video_title=title)
3200        return self.playlist_result(
3201            self._playlist_entries(playlist), playlist_id=playlist_id,
3202            playlist_title=title)
3203
3204    def _extract_identity_token(self, webpage, item_id):
3205        ytcfg = self._extract_ytcfg(item_id, webpage)
3206        if ytcfg:
3207            token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3208            if token:
3209                return token
3210        return self._search_regex(
3211            r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3212            'identity token', default=None)
3213
3214    def _real_extract(self, url):
3215        item_id = self._match_id(url)
3216        url = compat_urlparse.urlunparse(
3217            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3218        # Handle both video/playlist URLs
3219        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3220        video_id = qs.get('v', [None])[0]
3221        playlist_id = qs.get('list', [None])[0]
3222        if video_id and playlist_id:
3223            if self._downloader.params.get('noplaylist'):
3224                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3225                return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3226            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3227        webpage = self._download_webpage(url, item_id)
3228        identity_token = self._extract_identity_token(webpage, item_id)
3229        data = self._extract_yt_initial_data(item_id, webpage)
3230        tabs = try_get(
3231            data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3232        if tabs:
3233            return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3234        playlist = try_get(
3235            data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3236        if playlist:
3237            return self._extract_from_playlist(item_id, url, data, playlist)
3238        # Fallback to video extraction if no playlist alike page is recognized.
3239        # First check for the current video then try the v attribute of URL query.
3240        video_id = try_get(
3241            data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3242            compat_str) or video_id
3243        if video_id:
3244            return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3245        # Capture and output alerts
3246        alert = self._extract_alert(data)
3247        if alert:
3248            raise ExtractorError(alert, expected=True)
3249        # Failed to recognize
3250        raise ExtractorError('Unable to recognize tab page')
3251
3252
3253class YoutubePlaylistIE(InfoExtractor):
3254    IE_DESC = 'YouTube.com playlists'
3255    _VALID_URL = r'''(?x)(?:
3256                        (?:https?://)?
3257                        (?:\w+\.)?
3258                        (?:
3259                            (?:
3260                                youtube(?:kids)?\.com|
3261                                invidio\.us
3262                            )
3263                            /.*?\?.*?\blist=
3264                        )?
3265                        (?P<id>%(playlist_id)s)
3266                     )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3267    IE_NAME = 'youtube:playlist'
3268    _TESTS = [{
3269        'note': 'issue #673',
3270        'url': 'PLBB231211A4F62143',
3271        'info_dict': {
3272            'title': '[OLD]Team Fortress 2 (Class-based LP)',
3273            'id': 'PLBB231211A4F62143',
3274            'uploader': 'Wickydoo',
3275            'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3276        },
3277        'playlist_mincount': 29,
3278    }, {
3279        'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3280        'info_dict': {
3281            'title': 'YDL_safe_search',
3282            'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3283        },
3284        'playlist_count': 2,
3285        'skip': 'This playlist is private',
3286    }, {
3287        'note': 'embedded',
3288        'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3289        'playlist_count': 4,
3290        'info_dict': {
3291            'title': 'JODA15',
3292            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3293            'uploader': 'milan',
3294            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3295        }
3296    }, {
3297        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3298        'playlist_mincount': 982,
3299        'info_dict': {
3300            'title': '2018 Chinese New Singles (11/6 updated)',
3301            'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3302            'uploader': 'LBK',
3303            'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3304        }
3305    }, {
3306        'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3307        'only_matching': True,
3308    }, {
3309        # music album playlist
3310        'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3311        'only_matching': True,
3312    }]
3313
3314    @classmethod
3315    def suitable(cls, url):
3316        return False if YoutubeTabIE.suitable(url) else super(
3317            YoutubePlaylistIE, cls).suitable(url)
3318
3319    def _real_extract(self, url):
3320        playlist_id = self._match_id(url)
3321        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3322        if not qs:
3323            qs = {'list': playlist_id}
3324        return self.url_result(
3325            update_url_query('https://www.youtube.com/playlist', qs),
3326            ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3327
3328
3329class YoutubeYtBeIE(InfoExtractor):
3330    _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3331    _TESTS = [{
3332        'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3333        'info_dict': {
3334            'id': 'yeWKywCrFtk',
3335            'ext': 'mp4',
3336            'title': 'Small Scale Baler and Braiding Rugs',
3337            'uploader': 'Backus-Page House Museum',
3338            'uploader_id': 'backuspagemuseum',
3339            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3340            'upload_date': '20161008',
3341            'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3342            'categories': ['Nonprofits & Activism'],
3343            'tags': list,
3344            'like_count': int,
3345            'dislike_count': int,
3346        },
3347        'params': {
3348            'noplaylist': True,
3349            'skip_download': True,
3350        },
3351    }, {
3352        'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3353        'only_matching': True,
3354    }]
3355
3356    def _real_extract(self, url):
3357        mobj = re.match(self._VALID_URL, url)
3358        video_id = mobj.group('id')
3359        playlist_id = mobj.group('playlist_id')
3360        return self.url_result(
3361            update_url_query('https://www.youtube.com/watch', {
3362                'v': video_id,
3363                'list': playlist_id,
3364                'feature': 'youtu.be',
3365            }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3366
3367
3368class YoutubeYtUserIE(InfoExtractor):
3369    _VALID_URL = r'ytuser:(?P<id>.+)'
3370    _TESTS = [{
3371        'url': 'ytuser:phihag',
3372        'only_matching': True,
3373    }]
3374
3375    def _real_extract(self, url):
3376        user_id = self._match_id(url)
3377        return self.url_result(
3378            'https://www.youtube.com/user/%s' % user_id,
3379            ie=YoutubeTabIE.ie_key(), video_id=user_id)
3380
3381
3382class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3383    IE_NAME = 'youtube:favorites'
3384    IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3385    _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3386    _LOGIN_REQUIRED = True
3387    _TESTS = [{
3388        'url': ':ytfav',
3389        'only_matching': True,
3390    }, {
3391        'url': ':ytfavorites',
3392        'only_matching': True,
3393    }]
3394
3395    def _real_extract(self, url):
3396        return self.url_result(
3397            'https://www.youtube.com/playlist?list=LL',
3398            ie=YoutubeTabIE.ie_key())
3399
3400
3401class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3402    IE_DESC = 'YouTube.com searches'
3403    # there doesn't appear to be a real limit, for example if you search for
3404    # 'python' you get more than 8.000.000 results
3405    _MAX_RESULTS = float('inf')
3406    IE_NAME = 'youtube:search'
3407    _SEARCH_KEY = 'ytsearch'
3408    _SEARCH_PARAMS = None
3409    _TESTS = []
3410
3411    def _entries(self, query, n):
3412        data = {
3413            'context': {
3414                'client': {
3415                    'clientName': 'WEB',
3416                    'clientVersion': '2.20201021.03.00',
3417                }
3418            },
3419            'query': query,
3420        }
3421        if self._SEARCH_PARAMS:
3422            data['params'] = self._SEARCH_PARAMS
3423        total = 0
3424        for page_num in itertools.count(1):
3425            search = self._download_json(
3426                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3427                video_id='query "%s"' % query,
3428                note='Downloading page %s' % page_num,
3429                errnote='Unable to download API page', fatal=False,
3430                data=json.dumps(data).encode('utf8'),
3431                headers={'content-type': 'application/json'})
3432            if not search:
3433                break
3434            slr_contents = try_get(
3435                search,
3436                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3437                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3438                list)
3439            if not slr_contents:
3440                break
3441            isr_contents = try_get(
3442                slr_contents,
3443                lambda x: x[0]['itemSectionRenderer']['contents'],
3444                list)
3445            if not isr_contents:
3446                break
3447            for content in isr_contents:
3448                if not isinstance(content, dict):
3449                    continue
3450                video = content.get('videoRenderer')
3451                if not isinstance(video, dict):
3452                    continue
3453                video_id = video.get('videoId')
3454                if not video_id:
3455                    continue
3456                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3457                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3458                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3459                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3460                view_count = int_or_none(self._search_regex(
3461                    r'^(\d+)', re.sub(r'\s', '', view_count_text),
3462                    'view count', default=None))
3463                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3464                total += 1
3465                yield {
3466                    '_type': 'url_transparent',
3467                    'ie_key': YoutubeIE.ie_key(),
3468                    'id': video_id,
3469                    'url': video_id,
3470                    'title': title,
3471                    'description': description,
3472                    'duration': duration,
3473                    'view_count': view_count,
3474                    'uploader': uploader,
3475                }
3476                if total == n:
3477                    return
3478            token = try_get(
3479                slr_contents,
3480                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3481                compat_str)
3482            if not token:
3483                break
3484            data['continuation'] = token
3485
3486    def _get_n_results(self, query, n):
3487        """Get a specified number of results for a query"""
3488        return self.playlist_result(self._entries(query, n), query)
3489
3490
3491class YoutubeSearchDateIE(YoutubeSearchIE):
3492    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3493    _SEARCH_KEY = 'ytsearchdate'
3494    IE_DESC = 'YouTube.com searches, newest videos first'
3495    _SEARCH_PARAMS = 'CAI%3D'
3496
3497
3498r"""
3499class YoutubeSearchURLIE(YoutubeSearchIE):
3500    IE_DESC = 'YouTube.com search URLs'
3501    IE_NAME = 'youtube:search_url'
3502    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3503    _TESTS = [{
3504        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3505        'playlist_mincount': 5,
3506        'info_dict': {
3507            'title': 'youtube-dl test video',
3508        }
3509    }, {
3510        'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3511        'only_matching': True,
3512    }]
3513
3514    def _real_extract(self, url):
3515        mobj = re.match(self._VALID_URL, url)
3516        query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3517        webpage = self._download_webpage(url, query)
3518        return self.playlist_result(self._process_page(webpage), playlist_title=query)
3519"""
3520
3521
3522class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3523    """
3524    Base class for feed extractors
3525    Subclasses must define the _FEED_NAME property.
3526    """
3527    _LOGIN_REQUIRED = True
3528
3529    @property
3530    def IE_NAME(self):
3531        return 'youtube:%s' % self._FEED_NAME
3532
3533    def _real_initialize(self):
3534        self._login()
3535
3536    def _real_extract(self, url):
3537        return self.url_result(
3538            'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3539            ie=YoutubeTabIE.ie_key())
3540
3541
3542class YoutubeWatchLaterIE(InfoExtractor):
3543    IE_NAME = 'youtube:watchlater'
3544    IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3545    _VALID_URL = r':ytwatchlater'
3546    _TESTS = [{
3547        'url': ':ytwatchlater',
3548        'only_matching': True,
3549    }]
3550
3551    def _real_extract(self, url):
3552        return self.url_result(
3553            'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3554
3555
3556class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3557    IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3558    _VALID_URL = r':ytrec(?:ommended)?'
3559    _FEED_NAME = 'recommended'
3560    _TESTS = [{
3561        'url': ':ytrec',
3562        'only_matching': True,
3563    }, {
3564        'url': ':ytrecommended',
3565        'only_matching': True,
3566    }]
3567
3568
3569class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3570    IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3571    _VALID_URL = r':ytsubs(?:criptions)?'
3572    _FEED_NAME = 'subscriptions'
3573    _TESTS = [{
3574        'url': ':ytsubs',
3575        'only_matching': True,
3576    }, {
3577        'url': ':ytsubscriptions',
3578        'only_matching': True,
3579    }]
3580
3581
3582class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3583    IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3584    _VALID_URL = r':ythistory'
3585    _FEED_NAME = 'history'
3586    _TESTS = [{
3587        'url': ':ythistory',
3588        'only_matching': True,
3589    }]
3590
3591
3592class YoutubeTruncatedURLIE(InfoExtractor):
3593    IE_NAME = 'youtube:truncated_url'
3594    IE_DESC = False  # Do not list
3595    _VALID_URL = r'''(?x)
3596        (?:https?://)?
3597        (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3598        (?:watch\?(?:
3599            feature=[a-z_]+|
3600            annotation_id=annotation_[^&]+|
3601            x-yt-cl=[0-9]+|
3602            hl=[^&]*|
3603            t=[0-9]+
3604        )?
3605        |
3606            attribution_link\?a=[^&]+
3607        )
3608        $
3609    '''
3610
3611    _TESTS = [{
3612        'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3613        'only_matching': True,
3614    }, {
3615        'url': 'https://www.youtube.com/watch?',
3616        'only_matching': True,
3617    }, {
3618        'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3619        'only_matching': True,
3620    }, {
3621        'url': 'https://www.youtube.com/watch?feature=foo',
3622        'only_matching': True,
3623    }, {
3624        'url': 'https://www.youtube.com/watch?hl=en-GB',
3625        'only_matching': True,
3626    }, {
3627        'url': 'https://www.youtube.com/watch?t=2372',
3628        'only_matching': True,
3629    }]
3630
3631    def _real_extract(self, url):
3632        raise ExtractorError(
3633            'Did you forget to quote the URL? Remember that & is a meta '
3634            'character in most shells, so you want to put the URL in quotes, '
3635            'like  youtube-dl '
3636            '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3637            ' or simply  youtube-dl BaW_jenozKc  .',
3638            expected=True)
3639
3640
3641class YoutubeTruncatedIDIE(InfoExtractor):
3642    IE_NAME = 'youtube:truncated_id'
3643    IE_DESC = False  # Do not list
3644    _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3645
3646    _TESTS = [{
3647        'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3648        'only_matching': True,
3649    }]
3650
3651    def _real_extract(self, url):
3652        video_id = self._match_id(url)
3653        raise ExtractorError(
3654            'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3655            expected=True)
3656