o
    {#iE                     @   s@  d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZmZ ee Z!de"fddZ#dd Z$dd Z%de"de&fddZ'de"de"e"ffddZ(de"de"fddZ)de"de"fddZ*de"de"fddZ+de"de"de"fd d!Z,de"d"e"de"fd#d$Z-d%ede"fd&d'Z.d(e"de"fd)d*Z/d+e"dee"ee" f fd,d+Z0d(e"de	fd-d.Z1d(e"de	fd/d0Z2d(e"de"fd1d2Z3d3e
d4e
d5e"ddfd6d7Z4d8e
ddfd9d:Z5de"de"fd;d<Z6de"de"fd=d>Z7dee fd?d@Z8dS )AzBThis module contains all non-cipher related data extraction logic.    N)OrderedDict)datetime)AnyDictListOptionalTuple)parse_qsquote	urlencodeurlparse)Cipher)HTMLParseErrorLiveStreamErrorRegexMatchErrorregex_search)YouTubeMetadata)parse_for_objectparse_for_all_objects
watch_htmlc                 C   s4   z	t d| dd}W n
 ty   Y dS w t|dS )zExtract publish date
    :param str watch_html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Publish date of the video.
    z;(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}r   groupNz%Y-%m-%d)r   r   r   strptime)r   result r   U/var/www/GraceOrthoHospital/venv_grace/lib/python3.10/site-packages/pytube/extract.pypublish_date   s   
r   c                 C   s"   dg}|D ]	}|| v r dS qdS )zCheck if live stream recording is available.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    z,This live stream recording is not available.FTr   )r   unavailable_stringsstringr   r   r   recording_available&   s   
r    c                 C   s$   g d}|D ]	}|| v r dS qdS )zCheck if content is private.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    )zFThis is a private video. Please sign in to verify that you may see it.z"simpleText":"Private video"zThis video is private.TFr   )r   private_stringsr   r   r   r   
is_private8   s   	r"   returnc                 C   s*   z
t d| dd W dS  ty   Y dS w )zCheck if content is age restricted.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is age restricted.
    zog:restrictions:ager   r   FT)r   r   )r   r   r   r   is_age_restrictedL   s   	r$   c                 C   sd   t | }|di }d|v rdS d|v r-d|v r!|d |d gfS d|v r-|d |d fS ddgfS )a  Return the playability status and status explanation of a video.

    For example, a video may have a status of LOGIN_REQUIRED, and an explanation
    of "This is a private video. Please sign in to verify that you may see it."

    This explanation is what gets incorporated into the media player overlay.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Playability status and reason of the video.
    playabilityStatusliveStreamability)LIVE_STREAMzVideo is a live stream.statusreasonmessagesN)initial_player_responseget)r   player_responsestatus_dictr   r   r   playability_status\   s   
r/   urlc                 C   s   t d| ddS )ar  Extract the ``video_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/watch?v={video_id}`
    - :samp:`https://youtube.com/embed/{video_id}`
    - :samp:`https://youtu.be/{video_id}`

    :param str url:
        A YouTube url containing a video id.
    :rtype: str
    :returns:
        YouTube video id.
    z(?:v=|\/)([0-9A-Za-z_-]{11}).*   r   r   )r0   r   r   r   video_idv   s   r2   c                 C   s   t j| }t|jd d S )ao  Extract the ``playlist_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/playlist?list={playlist_id}`
    - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`

    :param str url:
        A YouTube url containing a playlist id.
    :rtype: str
    :returns:
        YouTube playlist id.
    listr   )urllibparser   r	   query)r0   parsedr   r   r   playlist_id   s   r8   c                 C   sj   g d}|D ](}t |}|| }|r.td| |d}|d}d| d|   S qtddd)	a  Extract the ``channel_name`` or ``channel_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/c/{channel_name}/*`
    - :samp:`https://youtube.com/channel/{channel_id}/*
    - :samp:`https://youtube.com/u/{channel_name}/*`
    - :samp:`https://youtube.com/user/{channel_id}/*

    :param str url:
        A YouTube url containing a channel name.
    :rtype: str
    :returns:
        YouTube channel name.
    )z(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)z%(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)z(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)z"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"finished regex search, matched: %sr1      /channel_namepatternscallerpatternrecompilesearchloggerdebugr   r   )r0   r=   r@   regexfunction_match	uri_styleuri_identifierr   r   r   r<      s   



r<   	watch_urlc                 C   s*   t d| fddt|fddddg}t|S )a  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str watch_url:
        A YouTube watch url.
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    r2   )psdefaulteurl)hlen_UShtml51cTVHTML5cverz
7.20201028)r   r
   _video_info_url)r2   rK   paramsr   r   r   video_info_url   s   
r[   
embed_htmlc                 C   s\   z	t d|dd}W n ty   d}Y nw d|  }td| fd|fd|fd	d
dg}t|S )a<  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str embed_html:
        The html contents of the embed page (for age restricted videos).
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    z"sts"\s*:\s*(\d+)r1   r    z!https://youtube.googleapis.com/v/r2   rN   stsrQ   rT   rW   )r   r   r   rY   )r2   r\   r^   rN   rZ   r   r   r   video_info_url_age_restricted   s    

r_   rZ   c                 C   s   dt |  S )Nz'https://www.youtube.com/get_video_info?)r   )rZ   r   r   r   rY      s   rY   htmlc              	   C   s@   zt | d d }W d| S  ttfy   t| }Y d| S w )zGet the base JavaScript url.

    Construct the base JavaScript url, which contains the decipher
    "transforms".

    :param str html:
        The html contents of the watch page.
    assetsjszhttps://youtube.com)get_ytplayer_configKeyErrorr   get_ytplayer_js)r`   base_jsr   r   r   js_url   s   	
rg   mime_type_codecc                 C   sL   d}t |}|| }|std|d| \}}|dd |dD fS )a  Parse the type data.

    Breaks up the data in the ``type`` key of the manifest, which contains the
    mime type and codecs serialized together, and splits them into separate
    elements.

    **Example**:

    mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])

    :param str mime_type_codec:
        String containing mime type and codecs.
    :rtype: tuple
    :returns:
        The mime type and a list of codecs.

    z,(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\"rh   r>   c                 S   s   g | ]}|  qS r   )strip).0rU   r   r   r   
<listcomp>$      z#mime_type_codec.<locals>.<listcomp>,)rB   rC   rD   r   groupssplit)rh   r@   rG   results	mime_typecodecsr   r   r   rh     s   

c                 C   sR   dg}|D ]}t |}|| }|r"td| |d}|  S qtddd)zGet the YouTube player base JavaScript path.

    :param str html
        The html contents of the watch page.
    :rtype: str
    :returns:
        Path to YouTube's base.js file.
    z'(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)r9   r1   re   js_url_patternsr>   rA   )r`   rs   r@   rG   rH   yt_player_jsr   r   r   re   '  s   



re   c                 C   s   t d ddg}|D ])}zt| |W   S  ty4 } zt d|  t | W Y d}~qd}~ww dg}|D ]}zt| |W   S  tyM   Y q:w tddd	)
a  Get the YouTube player configuration data from the watch html.

    Extract the ``ytplayer_config``, which is json data embedded within the
    watch html and serves as the primary source of obtaining the stream
    manifest data.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    zfinding initial function namezytplayer\.config\s*=\s*ytInitialPlayerResponse\s*=\s*zPattern failed: Nz,yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*rc   z#config_patterns, setconfig_patternsr>   )rE   rF   r   r   r   )r`   config_patternsr@   esetconfig_patternsr   r   r   rc   @  s0   


rc   c              	   C   sf   i }ddg}|D ]}zt | |}|D ]}|| qW q ty$   Y qw t|dkr-|S tddd)a;  Get the entirety of the ytcfg object.

    This is built over multiple pieces, so we have to find all matches and
    combine the dicts together.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    z
ytcfg\s=\szytcfg\.set\(r   	get_ytcfgytcfg_pattenrsr>   )r   updater   lenr   )r`   ytcfgytcfg_patternsr@   found_objectsobjr   r   r   ry   n  s$   
ry   stream_manifestvid_inforb   c              	   C   s2  t |d}t| D ]\}}z|d }W n ty+   |di d}|r)tdY nw d|v s<d|vrBd|v s<d	|v rBtd
 q	|j|d d}td|d  t|}	t	t|j
}
dd |
 D }
||
d< d|
 vrt|
d }||}||
d< |	j d|	j |	j dt|
 }|| | d< q	dS )zApply the decrypted signature to the stream manifest.

    :param dict stream_manifest:
        Details of the media streams available.
    :param str js:
        The contents of the base.js asset file.

    )rb   r0   r%   r&   UNKNOWN	signaturesz&sig=z&lsig=zsignature found, skip decipher)ciphered_signaturez+finished descrambling signature for itag=%sitagc                 S   s   i | ]	\}}||d  qS )r   r   )rj   kvr   r   r   
<dictcomp>  s    z#apply_signature.<locals>.<dictcomp>sig
ratebypassnz://?N)r   	enumeraterd   r,   r   rE   rF   get_signaturer   r	   r6   itemskeysr3   calculate_nschemenetlocpathr   )r   r   rb   cipheristreamr0   live_streamr   
parsed_urlquery_params	initial_nnew_nr   r   r   apply_signature  sB   
	


"r   stream_datac                 C   s   d| v rdS g }d|   v r|| d  d|   v r"|| d  |D ])}d|vrDd|v rDt|d }|d d |d< |d d |d< |dd	k|d
< q$td |S )a-  Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict stream_data:
        Dictionary containing query string encoded values.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    r0   NformatsadaptiveFormatssignatureCipherr   r   typeFORMAT_STREAM_TYPE_OTFis_otfzapplying descrambler)r   extendr	   r,   rE   rF   )r   r   data
cipher_urlr   r   r   apply_descrambler  s    
r   c              	   C   @   ddg}|D ]}zt | |W   S  ty   Y qw tddd)zExtract the ytInitialData json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z'window\[['\"]ytInitialData['\"]]\s*=\s*zytInitialData\s*=\s*initial_datainitial_data_patternr>   r   r   r   r   r=   r@   r   r   r   r     s   
r   c              	   C   r   )a  Extract the ytInitialPlayerResponse json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z1window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*ru   r+   initial_player_response_patternr>   r   r   r   r   r   r+     s   
r+   c              	   C   st   z| d d d d d d d d d d }W n t tfy'   tg  Y S w td	d
 |}dd |D }t|S )u<  Get the informational metadata for the video.

    e.g.:
    [
        {
            'Song': '강남스타일(Gangnam Style)',
            'Artist': 'PSY',
            'Album': 'PSY SIX RULES Pt.1',
            'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
        }
    ]

    :rtype: YouTubeMetadata
    contentstwoColumnWatchNextResultsrp   r1   videoSecondaryInfoRenderermetadataRowContainermetadataRowContainerRendererrowsc                 S   s   d|   v S )NmetadataRowRenderer)r   )xr   r   r   <lambda>;  s    zmetadata.<locals>.<lambda>c                 S   s   g | ]}|d  qS )r   r   )rj   r   r   r   r   rk   A  rl   zmetadata.<locals>.<listcomp>)rd   
IndexErrorr   filter)r   metadata_rowsr   r   r   metadata!  s6   
r   )9__doc__loggingurllib.parser4   rB   collectionsr   r   typingr   r   r   r   r   r	   r
   r   r   pytube.cipherr   pytube.exceptionsr   r   r   pytube.helpersr   pytube.metadatar   pytube.parserr   r   	getLogger__name__rE   strr   r    r"   boolr$   r/   r2   r8   r<   r[   r_   rY   rg   rh   re   rc   ry   r   r   r   r+   r   r   r   r   r   <module>   sF    
$ ."9)