
    yf,                        d Z ddlZddlZddlmZmZ  G d d      Z G d de      Z G d	 d
e      Z	 ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Zd Zd Z G d d      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Zd  Zy)!u  
A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
to be able to assemble a single stand-alone subtitle file, suitably adjusting
timestamps on the way, while everything else is passed through unmodified.

Regular expressions based on the W3C WebVTT specification
<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
    N   )int_or_nonetimetuple_from_msecc                   .    e Zd ZdZd Zd Zd Zd Zd Zy)_MatchParserz
    An object that maintains the current parsing position and allows
    conveniently advancing it as syntax elements are successfully parsed.
    c                      || _         d| _        y Nr   )_data_pos)selfstrings     //usr/lib/python3/dist-packages/yt_dlp/webvtt.py__init__z_MatchParser.__init__   s    
	    c                    t        |t        j                        r&|j                  | j                  | j
                        S t        |t              r2| j                  j                  || j
                        rt        |      S y t        |      N)

isinstancerePatternmatchr
   r   str
startswithlen
ValueErrorr   rs     r   r   z_MatchParser.match   sa    a$774::tyy11azz$$Q		21vmr   c                    |d}not        |t        j                        rt        |j	                  d            }n:t        |t
              rt        |      }nt        |t              r|}nt        |      | xj                  |z  c_        |S r	   )	r   r   Matchr   groupr   intr   r   )r   byamts      r   advancez_MatchParser.advance$   sk    :CBHH%bhhqk"CC b'CC CR. 		S		r   c                 B    | j                  | j                  |            S r   )r#   r   r   s     r   consumez_MatchParser.consume2   s    ||DJJqM**r   c                     t        |       S r   )_MatchChildParserr   s    r   childz_MatchParser.child5   s     &&r   N)	__name__
__module____qualname____doc__r   r   r#   r%   r)    r   r   r   r      s     
+'r   r   c                   (     e Zd ZdZ fdZd Z xZS )r'   z
    A child parser state, which advances through the same data as
    its parent, but has an independent position. This is useful when
    advancing through syntax elements we might later want to backtrack
    from.
    c                 h    t         |   |j                         || _        |j                  | _        y r   )superr   r
   _MatchChildParser__parentr   )r   parent	__class__s     r   r   z_MatchChildParser.__init__A   s&    &KK	r   c                 P    | j                   | j                  _         | j                  S )zW
        Advance the parent state to the current position of this child state.
        )r   r2   r(   s    r   commitz_MatchChildParser.commitF   s     "YY}}r   )r*   r+   r,   r-   r   r6   __classcell__r4   s   @r   r'   r'   9   s     
r   r'   c                        e Zd Z fdZ xZS )
ParseErrorc                     t         |   d|j                  |j                  |j                  |j                  dz    fz         y )Nz$Parse error at position %u (near %r)d   )r1   r   r   r
   )r   parserr4   s     r   r   zParseError.__init__O   s?    ?KKfkk&++2CDC
 
 	r   )r*   r+   r,   r   r7   r8   s   @r   r:   r:   N   s     r   r:   zL(?x)
    (?:([0-9]{1,}):)?
    ([0-9]{2}):
    ([0-9]{2})\.
    ([0-9]{3})?
z\Zz(?:\r\n|[\r\n]|$)z(?:\r\n|[\r\n])+z[ \t]*c                 \    dt        d t        | j                         d      D              z  S )z
    Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
    into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
    Z   c              3   F   K   | ]  \  }}t        |xs d       |z    yw)r   N)r    ).0partmults      r   	<genexpr>z_parse_ts.<locals>.<genexpr>j   s(      _",$DIA_s   !)i6 i`  i  r   )sumzipgroupstss    r   	_parse_tsrJ   e   s8    
  _03BIIKA\0]_ _ _ _r   c                 <    dt        t        | dz   dz              z  S )zn
    Convert an MPEG PES timestamp into a WebVTT timestamp.
    This will lose sub-millisecond precision.
    z%02u:%02u:%02u.%03u-   r?   )r   r    rH   s    r   
_format_tsrM   n   s"    
 !#6sBG?7K#LLLr   c                   ,    e Zd ZdZd Zed        Zd Zy)Blockz#
    An abstract WebVTT block.
    c                 N    |j                         D ]  \  }}t        | ||        y r   )itemssetattr)r   kwargskeyvals       r   r   zBlock.__init__{   s'     	$HCD#s#	$r   c                     |j                  | j                        }|sy |j                  |        | |j                  d            S )Nr   )raw)r   _REGEXr#   r   )clsr=   ms      r   parsezBlock.parse   s9    LL$qqwwqz""r   c                 :    |j                  | j                         y r   )writerW   r   streams     r   
write_intozBlock.write_into   s    TXXr   N)r*   r+   r,   r-   r   classmethodr[   r`   r.   r   r   rO   rO   v   s%    $ # #r   rO   c                       e Zd ZdZy)HeaderBlockzm
    A WebVTT block that may only appear in the header part of the file,
    i.e. before any cue blocks.
    N)r*   r+   r,   r-   r.   r   r   rc   rc      s     	r   rc   c                   
   e Zd Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z	 ej                  d      Z
ed        Zed        Zd	 Zy
)Magicz,\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])zX-TIMESTAMP-MAP=zLOCAL:zMPEGTS:([0-9]+)z[ \t]*,[ \t]*z6(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])c                    |j                         }	 |j                  | j                        }|r:|j                  t              }|t	        |      t        |      }|Zt	        |      |j                  | j                        }|r't        |j                  d            }|t	        |      t	        |      |j                  | j                        r|j                  t              rnt	        |      |j                          fS )Nr   )r)   r%   _REGEX_TSMAP_LOCAL	_REGEX_TSr:   rJ   _REGEX_TSMAP_MPEGTSr   r   _REGEX_TSMAP_SEP	_REGEX_NLr6   )rY   r=   rZ   localmpegtss        r   __parse_tsmapzMagic.__parse_tsmap   s    s556ANN9-9$V,,!!=$V,,NN3#:#:;(4F~(00$V,,~~c223~~i(V$$f}r   c                    |j                         }|j                  | j                        }|st        |      |j	                  d      }d\  }}}|j                  t
              sm|j                  | j                        r| j                  |      \  }}E|j                  | j                        }|r||j	                  d      z  }wt        |      |j                           | ||||      S )Nr   )NN r   )extrarm   rl   meta)
r)   r%   rX   r:   r   rk   _REGEX_TSMAP_Magic__parse_tsmap_REGEX_METAr6   )rY   r=   rZ   rq   rl   rm   rr   s          r   r[   zMagic.parse   s    NN3::&V$$
,vt..+~~c../ # 1 1& 9vs/A
"V$$vUFFr   c                 d   |j                  d       | j                  |j                  | j                         |j                  d       | j                  s| j                  r|j                  d       |j                  t	        | j                  | j                  nd             |j                  d       |j                  t        | j                  | j                  nd             |j                  d       | j                  r|j                  | j                         |j                  d       y )NWEBVTT
zX-TIMESTAMP-MAP=LOCAL:r   z,MPEGTS:)r]   rq   rl   rm   rM   r   rr   r^   s     r   r`   zMagic.write_into   s    X::!LL$T::LL12LL$**2HDJJaPQLL$LLDKK,CT[[KLLL99LL#Tr   N)r*   r+   r,   r   compilerX   rs   rg   ri   rj   ru   ra   rt   r[   r`   r.   r   r   re   re      s    RZZGHF 2::12L#I.$"**%78!rzz"23 "**VWK : G G*r   re   c                   0    e Zd Z ej                  d      Zy)
StyleBlockzs(?x)
        STYLE[\ \t]*(?:\r\n|[\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    Nr*   r+   r,   r   ry   rX   r.   r   r   r{   r{          RZZ  	Fr   r{   c                   0    e Zd Z ej                  d      Zy)RegionBlockze(?x)
        REGION[\ \t]*
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    Nr|   r.   r   r   r   r      r}   r   r   c                   0    e Zd Z ej                  d      Zy)CommentBlockzo(?x)
        NOTE(?:\r\n|[\ \t\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    Nr|   r.   r   r   r   r      r}   r   r   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej
                  d      Z ej
                  d      Z	e
d        Zd Zed        Zd	 Ze
d
        Zd Zy)CueBlockz6
    A cue block. The payload is not interpreted.
    z$((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])z[ \t]+-->[ \t]+z[ \t]+((?:(?!-->)[^\r\n])+)z[^\r\n]+(?:\r\n|[\r\n])?c                    |j                         }d }|j                  | j                        }|r|j                  d      }|j                  t              }|sy |j                  | j
                        sy |j                  t              }|sy |j                  | j                        }|j                  t               |j                  t              sy t        |      }t        |      }||j                  d      nd }	t        j                         }
	 |j                  | j                        }|sn!|
j                  |j                  d             ?|j                           | ||||	|
j                               S )Nr   r   )idstartendsettingstext)r)   r%   	_REGEX_IDr   rh   _REGEX_ARROW_REGEX_SETTINGS_REGEX_OPTIONAL_WHITESPACErk   rJ   ioStringIO_REGEX_PAYLOADr]   r6   getvalue)rY   r=   r   rZ   m0m1m2r   r   r   r   s              r   r[   zCueBlock.parse  s2   NN3==)B^^I&~~c../^^I&^^C//012~~i("m"$.288A;d{{}s112AJJqwwqz"	  	S8
 	
r   c                    | j                   ,|j                  | j                          |j                  d       |j                  t        | j                               |j                  d       |j                  t        | j                               | j
                  ,|j                  d       |j                  | j
                         |j                  d       |j                  | j                         |j                  d       y )Nrx   z -->  )r   r]   rM   r   r   r   r   r^   s     r   r`   zCueBlock.write_into8  s    77LL!LLZ

+,WZ)*==$LLLL'TTYYTr   c                 v    | j                   | j                  | j                  | j                  | j                  dS )Nr   r   r   r   r   r   r(   s    r   as_jsonzCueBlock.as_jsonF  s1     ''ZZ88II
 	
r   c                 4    | j                   |j                   k(  S r   )r   r   others     r   __eq__zCueBlock.__eq__P  s    ||u}},,r   c                 :     | |d   |d   |d   |d   |d         S )Nr   r   r   r   r   r   r.   )rY   jsons     r   	from_jsonzCueBlock.from_jsonS  s3    Dzw-Uf*%
 	
r   c                     | j                   |j                   k7  ry| j                  |j                  k7  ry| j                  | j                  cxk  xr! |j                  cxk(  xr |j                  k  S c S )NF)r   r   r   r   r   s     r   hingeszCueBlock.hinges]  sS    99

"==ENN*zzTXXAA		AAAAr   N)r*   r+   r,   r-   r   ry   r   r   r   r   ra   r[   r`   propertyr   r   r   r   r.   r   r   r   r     s     

BCI2::01L bjj!?@ORZZ ;<N%
 %
N 
 
- 
 
Br   r   c              #   H  K   t        | j                               }t        j                  |       |j	                  t
              sk|j                  t              r+t        j                  |      }|r| Gt        j                  |      }|r| ct        j                  |      }|r| 	 |j	                  t
              sY|j                  t              r+t        j                  |      }|r| Gt        j                  |      }|r| ct        |      yw)z
    A generator that yields (partially) parsed WebVTT blocks when given
    a bytes object containing the raw contents of a WebVTT file.
    N)r   decodere   r[   r   
_REGEX_EOFr%   _REGEX_BLANKr   r{   r   r   r:   )frag_contentr=   blocks      r   parse_fragmentr   e  s      ,--/0F
++f
ll:&>>,'!!&)K  (K""6*Kll:&>>,'""6*Kv&K   's   D D")r-   r   r   utilsr   r   r   r'   	Exceptionr:   ry   rh   r   rk   r   r   rJ   rM   rO   rc   re   r{   r   r   r   r   r.   r   r   <module>r      s    
 	 3%' %'P *  BJJ  	 RZZ
BJJ+,	rzz-.'RZZ	2 _M *	% 	XK Xv	 		+ 		5 	\Bu \B~*!r   