
    Vh"                     l    d dl Z d dlZd dl d dl d dl d dlmZ d dlmZ d dl	m
Z
  G d de      Zy)    N)*ElementWrapper)map_tag)LazyConcatenationc                   `    e Zd ZddZddZddZddZddZddZd Z	d	 Z
dd
Zed        Zy)NPSChatCorpusReaderNc                 B    t         j                  | |||       || _        y N)XMLCorpusReader__init___tagset)selfrootfileids
wrap_etreetagsets        K/home/dcms/DCMS/lib/python3.12/site-packages/nltk/corpus/reader/nps_chat.pyr   zNPSChatCorpusReader.__init__   s      tWjA    c           
         | j                   r=t        | j                  |      D cg c]  }t        |d| j                         c}      S t        | j                  |      D cg c]  }t        |d       c}      S c c}w c c}w )NzSession/Posts/Post)_wrap_etreeconcatabspathsXMLCorpusView	_wrap_eltr   r   fileids      r   	xml_postszNPSChatCorpusReader.xml_posts   s     #'--"8 "&*>O   #'--"8 "&*>? s   A<"Bc           
          t        | j                  |      D cg c]  }t        |d| j                         c}      S c c}w )NSession/Posts/Post/terminals)r   r   r   _elt_to_wordsr   s      r   postszNPSChatCorpusReader.posts(   sJ    
 #mmG4	  :D<N<N
 	
s   >c           
            fd}t         j                  |      D cg c]  }t        |d|       c}      S c c}w )Nc                 *    j                  | |      S r   )_elt_to_tagged_words)elthandlerr   r   s     r   readerz0NPSChatCorpusReader.tagged_posts.<locals>.reader3   s    ,,S'6BBr   r    )r   r   r   )r   r   r   r(   r   s   ` `  r   tagged_postsz NPSChatCorpusReader.tagged_posts2   sH    	C  #mmG4 f&DfM
 	
s   <c                 6    t        | j                  |            S r   )r   r"   )r   r   s     r   wordszNPSChatCorpusReader.words=   s     G!455r   c                 8    t        | j                  ||            S r   )r   r)   )r   r   r   s      r   tagged_wordsz NPSChatCorpusReader.tagged_words@   s     !2!27F!CDDr   c                     t        |      S r   r   )r   r&   r'   s      r   r   zNPSChatCorpusReader._wrap_eltC   s    c""r   c                     |j                  d      D cg c]   }| j                  |j                  d         " c}S c c}w )Ntword)findall_simplify_usernameattrib)r   r&   r'   r0   s       r   r!   z!NPSChatCorpusReader._elt_to_wordsF   s3    CF;;sCSTa''(89TTTs   %<c           
      &   |j                  d      D cg c]/  }| j                  |j                  d         |j                  d   f1 }}|r:|| j                  k7  r+|D cg c]  \  }}|t	        | j                  ||      f  }}}|S c c}w c c}}w )Nr0   r1   pos)r2   r3   r4   r   r   )r   r&   r'   r   r0   tagged_postws          r   r%   z(NPSChatCorpusReader._elt_to_tagged_wordsI   s     [[%
 $$QXXf%56H
 
 f,DO:@1aGDLL&!45K  

s   4B #Bc                     d| v rd| j                  dd      d   z   } | S t        | t              r| j                  d      } | S )NUserU   ascii)split
isinstancebytesdecode)r1   s    r   r3   z&NPSChatCorpusReader._simplify_usernameT   sI    T>FA.q11D  e$;;w'Dr   )FNr   )NN)__name__
__module____qualname__r   r   r"   r)   r+   r-   r   r!   r%   staticmethodr3    r   r   r	   r	      sE     
	
6E#U	  r   r	   )retextwrapnltk.corpus.reader.apinltk.corpus.reader.utilnltk.corpus.reader.xmldocsnltk.internalsr   nltk.tagr   	nltk.utilr   r   r	   rF   r   r   <module>rO      s-    
  $ % ( )  'G/ Gr   