
    /Vh=                    J   d Z ddlmZ ddlZddlZddlZddlmZm	Z	 erddl
mZ ej                  j                  d      Zej                  j                  e      Zej"                  j%                  e       eej&                  d<    ej(                  d      e_         ej(                  d	      e_         ej(                  d
      e_        ej.                  e_         ej(                  dej2                        e_         ej(                  d      Z G d dej8                        Zy)a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz<[a-zA-Z]|</>z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
z^([ ]*\n){2}c                      e Zd ZU dZd fdZ fdZ fdZedd       ZddZ	ddZ
d dZd!d	Zd"d
Zd#dZd!dZd$dZd$dZd"dZd% fdZd"dZd"dZd"dZd& fdZd& fdZd'd( fdZdZded<   d)dZd&dZ xZS )*HTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    c                    d|vrd|d<   t        dg      | _        dg| _        d| _        t	        |   |i | || _        y )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cacheoverride_comment_updatesuper__init__md)selfr   argskwargs	__class__s       C/home/dcms/DCMS/lib/python3.12/site-packages/markdown/htmlparser.pyr   zHTMLExtractor.__init__W   sR    V+).F%& tf+#$#',$ 	$)&)    c                x    d| _         d| _        g | _        g | _        g | _        dg| _        t        |           y)z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   s    r   r    zHTMLExtractor.resetf   s9    
 "
!##%#$#r   c                   t         |           t        | j                        rb| j                  r;| j
                  s/| j                  t        j                  | j                               n| j                  | j                         t        | j                        r_| j                  j                  | j                  j                  j                  dj                  | j                                     g | _	        yy)zHandle any buffered data. N)r   closelenrawdatar   
cdata_elemhandle_datar   unescaper   r   appendr   	htmlStashstorejoinr!   s    r   r$   zHTMLExtractor.closeq   s    t|| $$T__  !4!4T\\!BC  .t{{MM  !2!2!8!89M!NODK r   c                h   t        t        | j                        dz
  | j                  dz
        D ]e  }| j                  |   }| j                  j                  d|      }|dk(  rt        | j                        }| j                  j                  |dz          g | j                  | j                  dz
     S )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger%   r   linenor&   findr*   )r   iilast_line_start_poslf_poss       r   line_offsetzHTMLExtractor.line_offset   s     D334Q6AF 	5B"&"9"9""=\\&&t-@AF|T\\*##**6!84	5 &&t{{1}55r   c                    | j                   dk(  ry| j                   dkD  ry| j                  | j                  | j                  | j                   z    j                         dk(  S )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   T   Fr#   )offsetr&   r8   stripr   s    r   at_line_startzHTMLExtractor.at_line_start   sV     ;;!;;?||D,,T-=-=-KLRRTXZZZr   c                    | j                   | j                  z   }t        j                  j	                  | j
                  |      }|r| j
                  ||j                          S dj                  |      S )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)r8   r;   r   	endendtagsearchr&   endformat)r   tagstartms       r   get_endtag_textzHTMLExtractor.get_endtag_text   s_       4;;.  ''e<<<aeeg.. >>#&&r   c                *   || j                   v r| j                  ||       y | j                  j                  |      rJ| j                  s| j                         r.| j                  s"d| _        | j                  j                  d       | j                         }| j                  r7| j                  j                  |       | j                  j                  |       y | j                  j                  |       || j                  v r| j                          y y )NTr0   )r   handle_startendtagr   is_block_levelr   r>   r   r   r*   get_starttag_textr   r   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rD   attrstexts       r   handle_starttagzHTMLExtractor.handle_starttag   s    $//!##C/77!!#&DKKD<N<N<PY]YcYcDJMM  &%%'::JJc"KKt$MM  &d111%%' 2r   c                :   | j                  |      }| j                  rb| j                  j                  |       || j                  v r7| j                  r+| j                  j                         |k(  rn| j                  r+t        | j                        dk(  rt        j                  | j                  | j                  | j                  z   t        |      z   d        r| j                  j                  d       nd| _        d| _        | j                  j                  | j                  j                  j!                  dj#                  | j                                     | j                  j                  d       g | _        y y | j                  j                  |       y )Nr   r0   TFr#   

)rG   r   r   r*   r   popr%   blank_line_rematchr&   r8   r;   r   r   r   r+   r,   r-   )r   rD   rO   s      r   handle_endtagzHTMLExtractor.handle_endtag   s,   ##C(::KKt$djj jjzz~~'3. jj 4::!# &&t||D4D4Dt{{4RUXY]U^4^4_'`aKK&&t, #'DK"
$$TWW%6%6%<%<RWWT[[=Q%RS$$V,  $ MM  &r   c                    | j                   rd|v rd| _         | j                  r| j                  j                  |       y | j                  j                  |       y )Nr0   F)r   r   r   r*   r   r   datas     r   r(   zHTMLExtractor.handle_data   sA    ;;44<DK::KKt$MM  &r   c                   | j                   s| j                  r| j                  j                  |       y| j	                         r	|rt
        j                  | j                  | j                  | j                  z   t        |      z   d       r|dz  }nd| _        | j                  r| j                  d   nd}|j                  d      s,|j                  d      r| j                  j                  d       | j                  j                  | j                  j                  j                  |             | j                  j                  d       y| j                  j                  |       y)z Handle empty tags (`<data>`). Nr0   Tr1   r#   rR   )r   r   r   r*   r>   rT   rU   r&   r8   r;   r%   r   endswithr   r+   r,   )r   rY   is_blockitems       r   handle_empty_tagzHTMLExtractor.handle_empty_tag   s    ::KKt$!h""4<<0@0@4;;0NQTUYQZ0Z0[#\] #(,4==$2D==(T]]4-@$$T*MM  !2!2!8!8!>?MM  (MM  &r   c                x    | j                  | j                         | j                  j                  |             y )Nr\   )r^   rK   r   rJ   )r   rD   rN   s      r   rI   z HTMLExtractor.handle_startendtag   s.    d446AWAWX[A\]r   c                H    | j                  dj                  |      d       y )Nz&#{};Fr`   r^   rC   r   names     r   handle_charrefzHTMLExtractor.handle_charref   s    gnnT2UCr   c                H    | j                  dj                  |      d       y )Nz&{};Fr`   rb   rc   s     r   handle_entityrefzHTMLExtractor.handle_entityref   s    fmmD1EBr   c                    | j                   | j                  z   t        |      z   dz   }| j                  ||dz    dk7  r| j	                  d       d| _        y | j                  dj                  |      d       y )N   r:   z--><Tz	<!--{}-->r`   )r8   r;   r%   r&   r(   r   r^   rC   )r   rY   is      r   handle_commentzHTMLExtractor.handle_comment  sr    t{{*SY6:<<!a% E)S!+/D(k006Fr   c                R    | j                   rd| _         d}d}t        | 	  ||      S )NFr   r/   )r   r   	updatepos)r   rk   jr   s      r   rn   zHTMLExtractor.updatepos
  s0    ''+0D(AAw A&&r   c                H    | j                  dj                  |      d       y )Nz<!{}>Tr`   rb   rX   s     r   handle_declzHTMLExtractor.handle_decl  s    gnnT2TBr   c                H    | j                  dj                  |      d       y )Nz<?{}?>Tr`   rb   rX   s     r   	handle_pizHTMLExtractor.handle_pi  s    hood3dCr   c                t    |j                  d      rdnd}| j                  dj                  ||      d       y )NzCDATA[z]]>z]>z<![{}{}Tr`   )
startswithr^   rC   )r   rY   rB   s      r   unknown_declzHTMLExtractor.unknown_decl  s4    x0edi..tS9DIr   c                    | j                         s| j                  rt        |   |      S | j	                  d       |dz   S )Nz<?   )r>   r   r   parse_pir(   )r   rk   r   s     r   ry   zHTMLExtractor.parse_pi  s>    4;;7#A&& 	1ur   c                V   | j                         s| j                  rw| j                  ||dz    dk(  rS| j                  ||dz    dk(  s>| j                  |      }|dk(  r&| j	                  | j                  ||dz           |dz   S |S t
        |   |      S | j	                  d       |dz   S )	Nr:   z<![	   z	<![CDATA[r1   r/   z<!rx   )r>   r   r&   parse_bogus_commentr(   r   parse_html_declaration)r   rk   resultr   s      r   r}   z$HTMLExtractor.parse_html_declaration#  s    4;;||Aac"e+DLL1Q34G;4V 11!4R<$$T\\!AE%:;q5L71!44 	1ur   c                t    t         |   ||      }|dk(  ry| j                  | j                  || d       |S )Nr1   Fr`   )r   r|   r^   r&   )r   rk   reportposr   s       r   r|   z!HTMLExtractor.parse_bogus_comment3  sC     g)!V4"9dll1S1EB
r   Nz
str | None_HTMLExtractor__starttag_textc                    | j                   S )z)Return full source of start tag: `<...>`.)r   r=   s    r   rK   zHTMLExtractor.get_starttag_textB  s    ###r   c                   | j                   ||dz    dk(  r&| j                  | j                   ||dz           |dz   S d | _        | j                  |      }|dk  r&| j                  | j                   ||dz           |dz   S | j                   }||| | _        g }t        j
                  j                  ||dz         }|sJ d       |j                         }|j                  d      j                         x| _
        }||k  rt        j                  j                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d d	cxk(  r|dd  k(  rn n|dd }|rt        j                  |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d
vr| j                         \  }}d| j                  v rP|| j                  j!                  d      z   }t#        | j                        | j                  j%                  d      z
  }n|t#        | j                        z   }| j                  |||        |S |j'                  d      r| j)                  ||       |S || j*                  v r| j-                  |       | j/                  ||       |S )Nr:   z</>r   r/   z#unexpected call to parse_starttag()rx   'r1   ")>/>r0   r   )r&   r(   r   check_for_whole_start_tagr   tagfind_tolerantrU   rB   grouplowerlasttagattrfind_tolerantr)   r*   r<   getposcountr%   rfindr[   rI   rL   set_cdata_moderP   )r   rk   endposr&   rN   rU   krD   rF   attrnamerest	attrvaluerB   r3   r;   s                  r   parse_starttagzHTMLExtractor.parse_starttagF  s   <<!a% E)T\\!AE23q5L#//2A:T\\!AE23q5L,,&q0 ++11'1Q3?;;;uIIK"[[^1133s&j,,227A>A()1a(8%HdI 	2A$8)BC.82A#7237%aO	&//	:	LL(..*I67A &j a%%'k!![[]NFFt+++$"6"6"<"<T"BBT112//55d;<  #d&:&:";;WQv./M<<##C/  d111##C(  e,r   )r   r   )returnint)r   bool)rD   strr   r   )rD   r   rN   zSequence[tuple[str, str]])rD   r   )rY   r   )rY   r   r\   r   )rd   r   )rk   r   ro   r   r   r   )rk   r   r   r   )r   )rk   r   r   r   r   r   )r   r   )__name__
__module____qualname____doc__r   r    r$   propertyr8   r>   rG   rP   rV   r(   r^   rI   re   rg   rl   rn   rq   rs   rv   ry   r}   r|   r   __annotations__rK   r   __classcell__)r   s   @r   r	   r	   N   s    	 
6 
6['(*'6''.^DCG'CDJ  #'OZ&$6r   r	   )r   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilestarttagopenpiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantrT   
HTMLParserr	    r   r   <module>r      s   ( # 	  
 *!
 ~~.^^,,T2
   
 #&L  %"**_5
   RZZ'
 !rzz"?@
  #,,
 (2

 4 ZZ)
 %$ 

?+nJ)) nr   