MOON
Server: Apache
System: Linux server30c.hostingraja.org 3.10.0-962.3.2.lve1.5.63.el7.x86_64 #1 SMP Fri Oct 8 12:03:35 UTC 2021 x86_64
User: jibhires (1887)
PHP: 8.1.30
Disabled: show_source, system, shell_exec, passthru, exec, popen, proc_open, allow_url_fopen, symlink, escapeshellcmd, pcntl_exec
Upload Files
File: //opt/alt/python37/lib/python3.7/site-packages/html5lib/__pycache__/_inputstream.cpython-37.pyc
B

63)Z�~�!@sddlmZmZmZddlmZmZddlmZm	Z	ddl
Z
ddlZddlZddl
mZmZmZmZddl
mZddlmZdd	lmZydd
lmZWnek
r�eZYnXedd�eD��Zed
d�eD��Zedd�eD��Zeeddg�BZdZej�rFeddk�r"e� d�dk�s&t!�e�"edd�e#d�d�Z$n
e�"e�Z$e%dddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4g �Z&e�"d5�Z'iZ(Gd6d7�d7e)�Z*d8d9�Z+Gd:d;�d;e)�Z,Gd<d=�d=e,�Z-Gd>d?�d?e.�Z/Gd@dA�dAe)�Z0GdBdC�dCe)�Z1dDdE�Z2dS)F�)�absolute_import�division�unicode_literals)�	text_type�binary_type)�http_client�urllibN�)�EOF�spaceCharacters�asciiLetters�asciiUppercase)�_ReparseException)�_utils)�StringIO)�BytesIOcCsg|]}|�d��qS)�ascii)�encode)�.0�item�r�F/opt/alt/python37/lib/python3.7/site-packages/html5lib/_inputstream.py�
<listcomp>srcCsg|]}|�d��qS)r)r)rrrrrrscCsg|]}|�d��qS)r)r)rrrrrrs�>�<u�[---Ÿ﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]����]z"\uD800-\uDFFF"i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��	i��	i��
i��
i��i��i��i��i��
i��
i��i��i��i��i��i��z[	-
 -/:-@\[-`{-~]c@sHeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dS)�BufferedStreamz�Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    cCs||_g|_ddg|_dS)Nrr)�stream�buffer�position)�selfrrrr�__init__@szBufferedStream.__init__cCs@d}x(|jd|jd�D]}|t|�7}qW||jd7}|S)Nrr	)rr �len)r!�pos�chunkrrr�tellEs
zBufferedStream.tellcCsX||��kst�|}d}x0t|j|�|krH|t|j|�8}|d7}qW||g|_dS)Nrr	)�_bufferedBytes�AssertionErrorr#rr )r!r$�offset�irrr�seekLszBufferedStream.seekcCsT|js|�|�S|jdt|j�krF|jdt|jd�krF|�|�S|�|�SdS)Nrr	r)r�_readStreamr r#�_readFromBuffer)r!�bytesrrr�readUs

zBufferedStream.readcCstdd�|jD��S)NcSsg|]}t|��qSr)r#)rrrrrr_sz1BufferedStream._bufferedBytes.<locals>.<listcomp>)�sumr)r!rrrr'^szBufferedStream._bufferedBytescCs<|j�|�}|j�|�|jdd7<t|�|jd<|S)Nrr	)rr/r�appendr r#)r!r.�datarrrr,as
zBufferedStream._readStreamcCs�|}g}|jd}|jd}x�|t|j�kr�|dkr�|dks@t�|j|}|t|�|krn|}|||g|_n"t|�|}|t|�g|_|d7}|�||||��||8}d}qW|r�|�|�|��d�|�S)Nrr	�)r r#rr(r1r,�join)r!r.ZremainingBytes�rvZbufferIndexZbufferOffsetZbufferedDataZbytesToReadrrrr-hs&


zBufferedStream._readFromBufferN)�__name__�
__module__�__qualname__�__doc__r"r&r+r/r'r,r-rrrrr9s		rcKs�t|tj�s(t|tjj�r.t|jtj�r.d}n&t|d�rJt|�d�t	�}n
t|t	�}|r�dd�|D�}|rvt
d|��t|f|�St|f|�SdS)NFr/rcSsg|]}|�d�r|�qS)Z	_encoding)�endswith)r�xrrrr�sz#HTMLInputStream.<locals>.<listcomp>z3Cannot set an encoding with a unicode input, set %r)
�
isinstancerZHTTPResponserZresponseZaddbase�fp�hasattrr/r�	TypeError�HTMLUnicodeInputStream�HTMLBinaryInputStream)�source�kwargsZ	isUnicodeZ	encodingsrrr�HTMLInputStream�s

rDc@speZdZdZdZdd�Zdd�Zdd�Zd	d
�Zdd�Z	d
d�Z
ddd�Zdd�Zdd�Z
ddd�Zdd�ZdS)r@z�Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i(cCsZtjsd|_ntd�dkr$|j|_n|j|_dg|_td�df|_|�	|�|_
|��dS)a�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        Nu􏿿r	rzutf-8�certain)r�supports_lone_surrogates�reportCharacterErrorsr#�characterErrorsUCS4�characterErrorsUCS2ZnewLines�lookupEncoding�charEncoding�
openStream�
dataStream�reset)r!rBrrrr"�s
zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)N�r)r%�	chunkSize�chunkOffset�errors�prevNumLines�prevNumCols�_bufferedCharacter)r!rrrrN�szHTMLUnicodeInputStream.resetcCst|d�r|}nt|�}|S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r/)r>r)r!rBrrrrrL�s
z!HTMLUnicodeInputStream.openStreamcCsT|j}|�dd|�}|j|}|�dd|�}|dkr@|j|}n||d}||fS)N�
rrr	)r%�countrS�rfindrT)r!r)r%ZnLinesZpositionLineZlastLinePosZpositionColumnrrr�	_position�s
z HTMLUnicodeInputStream._positioncCs|�|j�\}}|d|fS)z:Returns (line, col) of the current position in the stream.r	)rYrQ)r!�line�colrrrr �szHTMLUnicodeInputStream.positioncCs6|j|jkr|��stS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r	)rQrP�	readChunkr
r%)r!rQ�charrrrr]�s

zHTMLUnicodeInputStream.charNcCs�|dkr|j}|�|j�\|_|_d|_d|_d|_|j�|�}|j	rX|j	|}d|_	n|s`dSt
|�dkr�t|d�}|dks�d|kr�dkr�nn|d|_	|dd�}|jr�|�|�|�
d	d
�}|�
dd
�}||_t
|�|_dS)
NrOrFr	r�
i�i��z
rV�
T)�_defaultChunkSizerYrPrSrTr%rQrMr/rUr#�ordrG�replace)r!rPr2Zlastvrrrr\�s0
 


z HTMLUnicodeInputStream.readChunkcCs,x&ttt�|���D]}|j�d�qWdS)Nzinvalid-codepoint)�ranger#�invalid_unicode_re�findallrRr1)r!r2�_rrrrH%sz*HTMLUnicodeInputStream.characterErrorsUCS4cCs�d}x�t�|�D]�}|rqt|���}|��}t�|||d��rtt�|||d��}|tkrn|j	�
d�d}q|dkr�|dkr�|t|�dkr�|j	�
d�qd}|j	�
d�qWdS)NF�zinvalid-codepointTi�i��r	)rd�finditerra�group�startrZisSurrogatePairZsurrogatePairToCodepoint�non_bmp_invalid_codepointsrRr1r#)r!r2�skip�matchZ	codepointr$Zchar_valrrrrI)s z*HTMLUnicodeInputStream.characterErrorsUCS2Fc
Csyt||f}Wnltk
r|x|D]}t|�dks&t�q&Wd�dd�|D��}|s^d|}t�d|�}t||f<YnXg}x||�|j|j	�}|dkr�|j	|j
kr�Pn0|��}||j
kr�|�|j|j	|��||_	P|�|j|j	d��|�
�s�Pq�Wd�|�}	|	S)z� Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        �rOcSsg|]}dt|��qS)z\x%02x)ra)r�crrrrNsz5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)�charsUntilRegEx�KeyErrorrar(r4�re�compilermr%rQrP�endr1r\)
r!Z
charactersZopposite�charsroZregexr5�mrt�rrrr�
charsUntil@s2
 

z!HTMLUnicodeInputStream.charsUntilcCsT|dk	rP|jdkr.||j|_|jd7_n"|jd8_|j|j|ksPt�dS)Nrr	)rQr%rPr()r!r]rrr�ungetos
zHTMLUnicodeInputStream.unget)N)F)r6r7r8r9r`r"rNrLrYr r]r\rHrIrxryrrrrr@�s 
&
/r@c@sLeZdZdZddd�Zdd�Zd	d
�Zddd�Zd
d�Zdd�Z	dd�Z
dS)rAz�Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    N�windows-1252TcCsn|�|�|_t�||j�d|_d|_||_||_||_||_	||_
|�|�|_|jddk	sbt
�|��dS)a�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        i�drN)rL�	rawStreamr@r"�numBytesMeta�numBytesChardet�override_encoding�transport_encoding�same_origin_parent_encoding�likely_encoding�default_encoding�determineEncodingrKr(rN)r!rBrr�r�r�r�Z
useChardetrrrr"�szHTMLBinaryInputStream.__init__cCs&|jdj�|jd�|_t�|�dS)Nrrb)rKZ
codec_info�streamreaderr|rMr@rN)r!rrrrN�szHTMLBinaryInputStream.resetcCsDt|d�r|}nt|�}y|�|���Wnt|�}YnX|S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r/)r>rr+r&r)r!rBrrrrrL�s
z HTMLBinaryInputStream.openStreamcCs�|��df}|ddk	r|St|j�df}|ddk	r:|St|j�df}|ddk	rX|S|��df}|ddk	rt|St|j�df}|ddk	r�|dj�d�s�|St|j�df}|ddk	r�|S|�rryddl	m
}Wntk
r�Yn�Xg}|�}xF|j�s<|j
�|j�}t|t��st�|�s&P|�|�|�|�q�W|��t|jd�}|j
�d�|dk	�rr|dfSt|j�df}|ddk	�r�|Std�dfS)NrErZ	tentativezutf-16)�UniversalDetector�encodingzwindows-1252)�	detectBOMrJrr��detectEncodingMetar��name�
startswithr�Zchardet.universaldetectorr��ImportErrorZdoner|r/r~r<r.r(r1Zfeed�close�resultr+r�)r!ZchardetrKr�ZbuffersZdetectorrr�rrrr��sR


z'HTMLBinaryInputStream.determineEncodingcCs�|jddkst�t|�}|dkr&dS|jdkrFtd�}|dk	s�t�nT||jdkrf|jddf|_n4|j�d�|df|_|��td|jd|f��dS)Nr	rE)zutf-16bezutf-16lezutf-8rzEncoding changed from %s to %s)rKr(rJr�r|r+rNr)r!ZnewEncodingrrr�changeEncodings

z$HTMLBinaryInputStream.changeEncodingc
Cs�tjdtjdtjdtjdtjdi}|j�d�}t|t	�s<t
�|�|dd��}d}|s~|�|�}d}|s~|�|dd	��}d	}|r�|j�|�t
|�S|j�d
�dSdS)z�Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Nonezutf-8zutf-16lezutf-16bezutf-32lezutf-32be�N�rgr)�codecs�BOM_UTF8�BOM_UTF16_LE�BOM_UTF16_BE�BOM_UTF32_LE�BOM_UTF32_BEr|r/r<r.r(�getr+rJ)r!ZbomDict�stringr�r+rrrr�s$
zHTMLBinaryInputStream.detectBOMcCsV|j�|j�}t|t�st�t|�}|j�d�|��}|dk	rR|j	dkrRt
d�}|S)z9Report the encoding declared by the meta element
        rN)zutf-16bezutf-16lezutf-8)r|r/r}r<r.r(�EncodingParserr+�getEncodingr�rJ)r!r�parserr�rrrr�9sz(HTMLBinaryInputStream.detectEncodingMeta)NNNNrzT)T)r6r7r8r9r"rNrLr�r�r�r�rrrrrA�s
(
>"rAc@s�eZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zeee
�Z
dd�Zee�Zefdd�Zdd�Zdd�Zdd�ZdS)�
EncodingBytesz�String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedcCst|t�st�t�||���S)N)r<r.r(�__new__�lower)r!�valuerrrr�LszEncodingBytes.__new__cCs
d|_dS)Nr)rY)r!r�rrrr"PszEncodingBytes.__init__cCs|S)Nr)r!rrr�__iter__TszEncodingBytes.__iter__cCs>|jd}|_|t|�kr"t�n|dkr.t�|||d�S)Nr	r)rYr#�
StopIterationr?)r!�prrr�__next__WszEncodingBytes.__next__cCs|��S)N)r�)r!rrr�next_szEncodingBytes.nextcCsB|j}|t|�krt�n|dkr$t�|d|_}|||d�S)Nrr	)rYr#r�r?)r!r�rrr�previouscszEncodingBytes.previouscCs|jt|�krt�||_dS)N)rYr#r�)r!r rrr�setPositionlszEncodingBytes.setPositioncCs*|jt|�krt�|jdkr"|jSdSdS)Nr)rYr#r�)r!rrr�getPositionqs

zEncodingBytes.getPositioncCs||j|jd�S)Nr	)r )r!rrr�getCurrentByte{szEncodingBytes.getCurrentBytecCsL|j}x:|t|�kr@|||d�}||kr6||_|S|d7}qW||_dS)zSkip past a list of charactersr	N)r r#rY)r!rur�rorrrrl�szEncodingBytes.skipcCsL|j}x:|t|�kr@|||d�}||kr6||_|S|d7}qW||_dS)Nr	)r r#rY)r!rur�rorrr�	skipUntil�szEncodingBytes.skipUntilcCs>|j}|||t|��}|�|�}|r:|jt|�7_|S)z�Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r r#r�)r!r.r�r2r5rrr�
matchBytes�s
zEncodingBytes.matchBytescCsR||jd��|�}|dkrJ|jdkr,d|_|j|t|�d7_dSt�dS)z�Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchNrrr	T)r �findrYr#r�)r!r.ZnewPositionrrr�jumpTo�s
zEncodingBytes.jumpToN)r6r7r8r9r�r"r�r�r�r�r�r��propertyr r��currentByte�spaceCharactersBytesrlr�r�r�rrrrr�Hs 	
r�c@sXeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zdd�ZdS)r�z?Mini parser for detecting character encoding from meta elementscCst|�|_d|_dS)z3string - the data to work on for encoding detectionN)r�r2r�)r!r2rrrr"�s
zEncodingParser.__init__c
Cs�d|jfd|jfd|jfd|jfd|jfd|jff}x^|jD]T}d}xD|D]<\}}|j�|�rJy|�}PWqJtk
r�d}PYqJXqJW|s<Pq<W|jS)	Ns<!--s<metas</s<!s<?rTF)	�
handleComment�
handleMeta�handlePossibleEndTag�handleOther�handlePossibleStartTagr2r�r�r�)r!ZmethodDispatchrfZkeepParsing�key�methodrrrr��s&zEncodingParser.getEncodingcCs|j�d�S)zSkip over commentss-->)r2r�)r!rrrr��szEncodingParser.handleCommentcCs�|jjtkrdSd}d}x�|��}|dkr.dS|ddkr^|ddk}|r�|dk	r�||_dSq|ddkr�|d}t|�}|dk	r�||_dSq|ddkrtt|d��}|��}|dk	rt|�}|dk	r|r�||_dS|}qWdS)	NTFrs
http-equivr	scontent-typescharsetscontent)	r2r�r��getAttributer�rJ�ContentAttrParserr��parse)r!Z	hasPragmaZpendingEncoding�attrZtentativeEncoding�codecZ
contentParserrrrr��s:zEncodingParser.handleMetacCs
|�d�S)NF)�handlePossibleTag)r!rrrr��sz%EncodingParser.handlePossibleStartTagcCst|j�|�d�S)NT)r�r2r�)r!rrrr��s
z#EncodingParser.handlePossibleEndTagcCsf|j}|jtkr(|r$|��|��dS|�t�}|dkrD|��n|��}x|dk	r`|��}qNWdS)NTr)r2r��asciiLettersBytesr�r�r��spacesAngleBracketsr�)r!ZendTagr2ror�rrrr��s



z EncodingParser.handlePossibleTagcCs|j�d�S)Nr)r2r�)r!rrrr�szEncodingParser.handleOthercCs|j}|�ttdg�B�}|dks2t|�dks2t�|dkr>dSg}g}xt|dkrX|rXPnX|tkrl|��}PnD|dkr�d�|�dfS|tkr�|�|�	��n|dkr�dS|�|�t
|�}qHW|dkr�|��d�|�dfSt
|�|��}|dk�rR|}x�t
|�}||k�r(t
|�d�|�d�|�fS|tk�rB|�|�	��q�|�|�q�WnJ|d	k�rjd�|�dfS|tk�r�|�|�	��n|dk�r�dS|�|�x^t
|�}|tk�r�d�|�d�|�fS|tk�r�|�|�	��n|dk�r�dS|�|��q�WdS)
z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None�/Nr	)rN�=)r�rr3)�'�"r)
r2rlr��	frozensetr#r(r4�asciiUppercaseBytesr1r�r�r�r�)r!r2roZattrNameZ	attrValueZ	quoteCharrrrr�sh










zEncodingParser.getAttributeN)
r6r7r8r9r"r�r�r�r�r�r�r�r�rrrrr��s$r�c@seZdZdd�Zdd�ZdS)r�cCst|t�st�||_dS)N)r<r.r(r2)r!r2rrrr"fszContentAttrParser.__init__cCsy�|j�d�|jjd7_|j��|jjdks8dS|jjd7_|j��|jjdkr�|jj}|jjd7_|jj}|j�|�r�|j||jj�SdSnF|jj}y|j�t�|j||jj�Stk
r�|j|d�SXWntk
�rdSXdS)Nscharsetr	r�)r�r�)r2r�r rlr�r�r�r�)r!Z	quoteMarkZoldPositionrrrr�js.

zContentAttrParser.parseN)r6r7r8r"r�rrrrr�esr�cCs`t|t�r.y|�d�}Wntk
r,dSX|dk	rXy
t�|�Stk
rTdSXndSdS)z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.rN)r<r�decode�UnicodeDecodeError�webencodings�lookup�AttributeError)r�rrrrJ�s

rJ)3Z
__future__rrrZsixrrZ	six.movesrrr�rrr�Z	constantsr
rrr
rrOr�iorrr�r�r�r�r�r�Zinvalid_unicode_no_surrogaterFrWr(rs�evalrd�setrkZascii_punctuation_rerp�objectrrDr@rAr.r�r�r�rJrrrr�<module>sX
"








JgIh6'