fBdZddlZddlZddlmZdgZej dZej dZej dZ ej dZ ej d Z ej d Z ej d Z ej d Zej d Zej dej Zej d Zej dZGddej(Zy)zA parser for HTML and XHTML.N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) \s* # possibly followed by a space )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#ceZdZdZdZddfd ZfdZdZdZd Z d Z d Z d Z d Z dZddZdZdZdZdZdZdZdZdZdZdZdZdZdZdZxZS)raEFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). If convert_charrefs is True the character references are converted automatically to the corresponding Unicode character (and self.handle_data() is no longer split in chunks), otherwise they are passed by calling self.handle_entityref() or self.handle_charref() with the string containing respectively the named or numeric reference as the argument. )scriptstyleT)convert_charrefscPt|||_|jy)zInitialize and reset this instance. If convert_charrefs is True (the default), all character references are automatically converted to the corresponding Unicode characters. N)super__init__r reset)selfr __class__s "/usr/lib/python3.12/html/parser.pyr zHTMLParser.__init__Vs!  0 cbd|_d|_t|_d|_t |y)z1Reset this instance. Loses all unprocessed data.z???N)rawdatalasttaginteresting_normal interesting cdata_elemr r )rrs rr zHTMLParser.reset`s)  -  rcN|j|z|_|jdy)zFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). rN)rgoaheadrdatas rfeedzHTMLParser.feedhs ||d*  Qrc&|jdy)zHandle any buffered data.N)rrs rclosezHTMLParser.closeqs  QrNc|jS)z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr s rget_starttag_textzHTMLParser.get_starttag_textws###rc|j|_tjd|jztj|_y)Nz )lowerrrecompileIr)relems rset_cdata_modezHTMLParser.set_cdata_mode{s/**,::nt&FMrc(t|_d|_yN)rrrr s rclear_cdata_modezHTMLParser.clear_cdata_modes-rc |j}d}t|}||krW|jrq|jse|j d|}|dkr|j dt ||dz }|dk\r'tjdj||sn|}n?|jj||}|r|j}n|jrn|}||krJ|jr*|js|jt|||n|j||||j||}||k(rn3|j}|d|r[t j#||r|j%|} n|d|r|j'|} nr|d|r|j)|} nW|d|r|j+|} n<|d |r|j-|} n!|d z|kr|jd|d z} nnh| dkr|sn_|j d |d z} | dkr |j d|d z} | dkr |d z} n| d z } |jr*|js|jt||| n|j||| |j|| }n|d |rt.j#||}|rY|j1d d} |j3| |j5} |d| d z s| d z } |j|| }d||dvr,|j|||d z|j||d z}n|d|rt6j#||}|rW|j1d } |j9| |j5} |d| d z s| d z } |j|| }t:j#||}|rE|rB|j1||dk(r,|j5} | |kr|} |j||d z}n>|d z|kr'|jd|j||d z}nnJd||krW|rm||krh|js\|jr*|js|jt|||n|j||||j||}||d|_y)Nr<&"z[\s;]D))$/ A%c1Q3/Eq!,A"((!4u{{} ;!IIK6 !A NN1a!e4!eq[$$S)q!a%0A555qS!eV 1q5$$T__  '!A,!78  1.q!$Aqr{ rcp|j}|||dzdk(sJd|||dzdk(r|j|S|||dzdk(r|j|S|||dzjd k(r7|j d |dz}|d k(ry |j ||dz||d zS|j |S) Nr7r6z+unexpected call to parse_html_declaration()r4zV #%%a( ( Qqs^u $,,Q/ / Qqs^ ! ! #{ 2LLac*E{   WQqS/ 07N++A. .rc|j}|||dzdvsJd|jd|dz}|dk(ry|r|j||dz||dzS)Nr7)r6r3z"unexpected call to parse_comment()rr8r)rr;handle_comment)rrQreportrposs rr]zHTMLParser.parse_bogus_commentsu,,q1~- C1B C-ll3!$ "9    !C 0 1Qwrc|j}|||dzdk(sJdtj||dz}|sy|j}|j ||dz||j }|S)Nr7r5zunexpected call to parse_pi()r8)rpicloser>r? handle_pirM)rrQrrDrSs rrHzHTMLParser.parse_pi!st,,q1~%F'FF%w!, KKM wqsA' IIKrc~d|_|j|}|dkr|S|j}||||_g}tj ||dz}|sJd|j }|j djx|_}||krtj ||}|sn|j ddd\} } } | sd} n,| dddcxk(r| ddk(sn| dddcxk(r| ddk(rnn| dd} | r t| } |j| j| f|j }||kr|||j} | d vr|j||||S| jd r|j|||S|j!||||j"vr|j%||S) Nrrz#unexpected call to parse_starttag()r7rY'r8")r/>ri)r#check_for_whole_start_tagrtagfind_tolerantrDrMrKr&rattrfind_tolerantrappendstripr@endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr+) rrQendposrattrsrDrUtagmattrnamerest attrvaluerMs rrEzHTMLParser.parse_starttag-s#//2 A:M,,&q0 &&w!4;;;u IIK"[[^1133 s&j!''3A()1a(8 %HdI 2A$8)BC.82A#7237%aO $Y/ LL(..*I6 7A&ja%%' k !   WQv. /M <<   # #C /    e ,d111##C( rcH|j}tj||}|rt|j}|||dz}|dk(r|dzS|dk(r6|j d|r|dzS|j d|ry||kDr|S|dzS|dk(ry|dvry||kDr|S|dzSt d ) Nrr/rir7r8rz6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)rlocatestarttagend_tolerantrDrMrBAssertionError)rrQrrvrSnexts rrjz$HTMLParser.check_for_whole_start_tagYs,, & , ,Wa 8 A1QqS>Ds{1u s{%%dA.q5L%%c1-q5Hq5Lrz561u1u 677rc|j}|||dzdk(sJdtj||dz}|sy|j}tj ||}|s|j |j||||Stj ||dz}|s!|||dzdk(r|dzS|j|S|jdj}|jd|j}|j||dzS|jdj}|j %||j k7r|j||||S|j||j|S) Nr7r3zunexpected call to parse_endtagrr8rYzr)r endendtagr>rM endtagfindrDrr@rkr]rKr&r; handle_endtagr.)rrQrrDr^ namematchtagnamer*s rrFzHTMLParser.parse_endtag{sy,,q1~%H'HH%  !A#.   !,*  5!12 (..w!U*Q3J33A66ooa(..0G LLimmo6E   w '7N{{1~##% ?? &t&  5!12  4   rcJ|j|||j|yr-)rqrrrurts rrpzHTMLParser.handle_startendtags  S%( 3rcyr-rs rrqzHTMLParser.handle_starttag rcyr-r)rrus rrzHTMLParser.handle_endtagrrcyr-rrrVs rrLzHTMLParser.handle_charrefrrcyr-rrs rrOzHTMLParser.handle_entityrefrrcyr-rrs rr@zHTMLParser.handle_datarrcyr-rrs rr`zHTMLParser.handle_commentrrcyr-r)rdecls rr\zHTMLParser.handle_declrrcyr-rrs rrezHTMLParser.handle_pirrcyr-rrs r unknown_declzHTMLParser.unknown_declrr)r)__name__ __module__ __qualname____doc__rrr r rr!r#r$r+r.rrIr]rHrErjrFrprqrrLrOr@r`r\rer __classcell__)rs@rrr>s*1+/O$Nu#t/*  (X8D%P          r)rr' _markupbasehtmlr__all__r(rrPrNrJrCrd commentcloserkrlVERBOSEr|rr ParserBaserrrrrs"  . RZZ' RZZ % BJJ> ? "**@ Arzz+& "**S/rzz)$ 2::LMBJJ=>(RZZ)ZZ BJJsO RZZ> ? J ''J r