
    VhB              
       b   U d Z ddlZddl ddlmZ ddlmZ  ededdd	g
      Zee	d<    ede
d      Ze
e	d<    eded      Zee	d<    ededddd      Zee	d<    ededdd      Zee	d<    ededdd      Zee	d<    ededg      Zee	d<    eded      Zee	d<    ededd
      Zee	d<    eded d!gd"d#d      Zee	d<    ed$ed%d&d'
      Zee	d$<    ed(ed%d)d*g
      Zee	d(<    ed+ed,      Zee	d+<    ed-ed.d
      Z ee	d-<    ed/e!d0d1
      Z"e!e	d/<    ed2ed3d4dd      Z#ee	d2<    ed5e$g d6      Z%e$e	d7<    ed8e$g d6      Z&e$e	d9<    ed:e'd;d<
      Z(e'e	d:<    ed=edg d>
      Z)ee	d=<    ed?edd@
      Z*ee	d?<    edAe+dB      Z,e+e	dA<    edCedd@
      Z-ee	dC<    edDe.dEdd1      Z/e.e	dD<    edFe0dGd'
      Z1e0e	dF<    edHe2dIdJ
      Z3e2e	dK<    edLe4dM      Z5e4e	dL<    edNe6ddd      Z7e6e	dN<    edOe8ddPdQ      Z9e8e	dO<    edRedSdTd#d'dUV      Z:ee	dR<    edWe;ddXdQ      Z<e;e	dW<    edYe=dZd'
      Z>e=e	d[<    ed\e'dd
      Z?e'e	d\<    ed]e@d^d#      ZAe@e	d]<    ed_eBd`d<
      ZCeBe	d_<    edaeDg db      ZEeDe	da<    edceFddd1
      ZGeFe	dc<    edeeFddd1
      ZHeFe	de<    edfeIdgdgd<Q      ZJeIe	df<    edheKdidjd#k      ZLeKe	dh<    edleMd d!gd<
      ZNeMe	dl<    edme;dndd<o      ZOe;e	dm<    edpePdq      ZQePe	dp<    edreRdE      ZSeRe	dr<    edseTdtdtd'Q      ZUeTe	ds<    edueVdvd'
      ZWeVe	du<    edweXdq      ZYeXe	dw<    edxeZdygdd'      Z[eZe	dx<    edzedd<
      Z\ee	dz<    ed{e'dBd1
      Z]e'e	d{<    ed|eTd}d~gdgdd      Z^eTe	d|<    ede_dBd1
      Z`e_e	d<    edeadd1
      Zbeae	d<    edeadd1
      Zceae	d<    ededd#      Zeede	d<    edef      Zgefe	d<    edehdd#d      Ziehe	d<    edejd      Zkeje	d<    ededd#d      Zlee	d<    edemd edd      end#d      Zoeme	d<    ededd<
      Zpee	d<    edeqd      Zreqe	d<    edes      Ztese	d<    eded,d1
      Zuee	d<    edevdd      Zweve	d<    edexdq      Zyexe	d<    ededd<
      Zzee	d<    ede{ ede!dd1
            Z|e{e	d<    ede{ ede!dd1
            Z}e{e	d<    ede{ ede!dd1
            Z~e{e	d<    ede{ ede!dd1
            Ze{e	d<    eded      Zee	d<    ede'dBd
      Ze'e	d<    ededddd el      Zee	d<    ededddd el      Zee	d<    ededddd eL      Zee	d<    ededddd eL      Zee	d<    edede|      Zee	d<    ededBd1
      Zee	d<    ededBdd1«      Zee	d<   dÄ Zedk(  ryy)a  
NLTK corpus readers.  The modules in this package provide functions
that can be used to read corpus files in a variety of formats.  These
functions can be used to read both the corpus files that are
distributed in the NLTK corpus package, and corpus files that are part
of external corpora.

Available Corpora
=================

Please see https://www.nltk.org/nltk_data/ for a complete list.
Install corpora using nltk.download().

Corpus Reader Functions
=======================
Each corpus module defines one or more "corpus reader functions",
which can be used to read documents from that corpus.  These functions
take an argument, ``item``, which is used to indicate which document
should be read from the corpus:

- If ``item`` is one of the unique identifiers listed in the corpus
  module's ``items`` variable, then the corresponding document will
  be loaded from the NLTK corpus package.
- If ``item`` is a filename, then that file will be read.

Additionally, corpus reader functions can be given lists of item
names; in which case, they will return a concatenation of the
corresponding documents.

Corpus reader functions are named based on the type of information
they return.  Some common examples, and their return types, are:

- words(): list of str
- sents(): list of (list of str)
- paras(): list of (list of (list of str))
- tagged_words(): list of (str,str) tuple
- tagged_sents(): list of (list of (str,str))
- tagged_paras(): list of (list of (list of (str,str)))
- chunked_sents(): list of (Tree w/ (str,str) leaves)
- parsed_sents(): list of (Tree with str leaves)
- parsed_paras(): list of (list of (Tree with str leaves))
- xml(): A single xml ElementTree
- raw(): unprocessed corpus contents

For example, to read a list of the words in the Brown Corpus, use
``nltk.corpus.brown.words()``:

    >>> from nltk.corpus import brown
    >>> print(", ".join(brown.words())) # doctest: +ELLIPSIS
    The, Fulton, County, Grand, Jury, said, ...

    N)*)LazyCorpusLoader)RegexpTokenizerabcz(?!\.).*\.txt)sciencelatin_1)ruralutf8)encodingalpino)tagsetbcp47z(cldr|iana)/*brownz
c[a-z]\d\dzcats.txtascii)cat_filer   r   cess_catz(?!\.).*\.tbfunknownzISO-8859-15)r   r   cess_espcmudictcomtranscomparative_sentenceszlabeledSentences\.txtzlatin-1	conll2000z	train.txtztest.txt)NPVPPPwsj	conll2002z.*\.(test|train).*)LOCPERORGMISCzutf-8	conll2007)eus
ISO-8859-2)espr
   crubadanz.*\.txtdependency_treebankz.*\.dpextended_omwz.*/wn-[a-z\-]*\.tabr
   florestaz(?!\.).*\.ptb#framenet_v15)zfrRelation.xmlzframeIndex.xmlzfulltextIndex.xmlzluIndex.xmlzsemTypes.xml
framenet15framenet_v17framenet
gazetteersz(?!LICENSE|\.).*\.txtr$   genesis))zfinnish|french|germanr   )swedishcp865)z.*utf_8	gutenberglatin1ieerz(?!README|\.).*	inauguralindianz(?!\.).*\.posjeitaz
.*\.chasenzknbc/corpus1z.*/KN.*zeuc-jpknbclin_thesaurusz.*\.lsp
mac_morphomachadoz([a-z]*)/.*)cat_patternr   masc_taggedz(spoken|written)/.*\.txtzcategories.txt_)r   r   r   sepmovie_reviewsz(neg|pos)/.*	mte_teip5z(oana).*\.xmlmultext_eastnamesnps_chatz(?!README|\.).*\.xmlopinion_lexiconz(\w+)\-words\.txtppattach)trainingtestdevsetproduct_reviews_1z^(?!Readme).*\.txtproduct_reviews_2	pros_conszIntegrated(Cons|Pros)\.txtptbz/(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRGzallcats.txt)r   r   qcreutersz(training|test).*)r   r   rtez(?!\.).*\.xmlsensevalsentence_polarityzrt-polarity\.(neg|pos)sentiwordnetzSentiWordNet_3.0.0.txtshakespearesinica_treebankparsedstate_union	stopwordssubjectivityz"(quote.tok.gt9|plot.tok.gt9)\.5000subjobj)zquote.tok.gt9.5000zplot.tok.gt9.5000)cat_mapr   swadeshpanlex_swadeshzswadesh110/.*\.txt
swadesh110zswadesh207/.*\.txt
swadesh207switchboardtimitz.+\.tagstimit_taggedtoolboxz(?!.*(README|\.)).*\.(dic|txt)ztreebank/combinedzwsj_.*\.mrgtreebankztreebank/taggedzwsj_.*\.posz(?<=/\.)\s*(?![^\[]*\])T)gaps)sent_tokenizerpara_block_readerr   r   treebank_chunkztreebank/rawzwsj_.*treebank_rawtwitter_samplesz.*\.jsonudhrudhr2universal_treebanks_v20z	.*\.conll)
ignorewordsrq   rq   posrq   rq   rq   rq   rq   )columntypesuniversal_treebanksverbnetwebtextz(?!README|\.).*\.txtwordnetzomw-1.4z.*/wn-data-.*\.tab	wordnet31wordnet2021wordnet2022
wordnet_icz.*\.datrr   propbankzprop.txtzframes/.*\.xmlz	verbs.txtc                 0    t        j                  dd|       S Nz
^wsj/\d\d/ resubfilenames    D/home/dcms/DCMS/lib/python3.12/site-packages/nltk/corpus/__init__.py<lambda>r         RVVM2x8     znombank.1.0znombank.1.0.wordsc                 0    t        j                  dd|       S r   r   r   s    r   r   r     r   r   nombankc                 "    | j                         S Nupperr   s    r   r   r         X^^% r   propbank_ptbc                 "    | j                         S r   r   r   s    r   r   r     r   r   nombank_ptbsemcorzbrown./tagfiles/br-.*\.xmlnonbreaking_prefixesperlunipropsmisc)nltk_data_subdirr   c                     t         j                          t        j                          t        j                          t        j                          t
        j                          t        j                          t        j                          t        j                          t        j                          t        j                          t        j                          t        j                          t        j                          t        j                          t        j                          t         j                          t"        j                          t$        j                          t&        j                          t(        j                          t*        j                          t,        j                          t.        j                          y r   )r   demor   r   r   r   r0   r4   r6   r7   r8   rE   rH   rS   rV   rW   rY   rZ   rd   rf   rg   rn   rw   rr    r   r   r   r     s    HHJ	JJLLLNNNNNLLNNNIIKNN
KKM	JJLMMOMMONN	JJLLLNMMOIIKLLN	JJLr   __main__)__doc__r   nltk.corpus.readernltk.corpus.utilr   nltk.tokenizer   PlaintextCorpusReaderr   __annotations__AlpinoCorpusReaderr   BCP47CorpusReaderr   CategorizedTaggedCorpusReaderr   BracketParseCorpusReaderr   r   CMUDictCorpusReaderr   AlignedCorpusReaderr    ComparativeSentencesCorpusReaderr   ConllChunkCorpusReaderr   r   DependencyCorpusReaderr"   CrubadanCorpusReaderr&   r'   CorpusReaderr(   r)   FramenetCorpusReaderr,   r.   WordListCorpusReaderr/   r0   r4   IEERCorpusReaderr6   r7   IndianCorpusReaderr8   ChasenCorpusReaderr9   KNBCorpusReaderr:   LinThesaurusCorpusReaderr;   MacMorphoCorpusReaderr<   *PortugueseCategorizedPlaintextCorpusReaderr=   r?    CategorizedPlaintextCorpusReaderrB   MTECorpusReaderrD   rE   NPSChatCorpusReaderrF   OpinionLexiconCorpusReaderrG   PPAttachmentCorpusReaderrH   ReviewsCorpusReaderrL   rM   ProsConsCorpusReaderrN   #CategorizedBracketParseCorpusReaderrO   StringCategoryCorpusReaderrP   rQ   RTECorpusReaderrR   SensevalCorpusReaderrS    CategorizedSentencesCorpusReaderrT   SentiWordNetCorpusReaderrU   XMLCorpusReaderrV   SinicaTreebankCorpusReaderrW   rY   rZ   r[   SwadeshCorpusReaderr_   PanlexSwadeshCorpusReaderra   rb   SwitchboardCorpusReaderrc   TimitCorpusReaderrd   TimitTaggedCorpusReaderre   ToolboxCorpusReaderrf   rg   ChunkedCorpusReader!tagged_treebank_para_block_readerrk   rl   TwitterCorpusReaderrm   UdhrCorpusReaderrn   ro   ConllCorpusReaderru   VerbnetCorpusReaderrv   rw   WordNetCorpusReaderrx   ry   rz   r{   WordNetICCorpusReaderr|   rr   PropbankCorpusReaderr}   NombankCorpusReaderr   r   r   SemcorCorpusReaderr   NonbreakingPrefixesCorpusReaderr   UnicharsCorpusReaderr   r   __name__r   r   r   <module>r      s~
  3j 
   - )-	$&78	  .   , 0  (8!($  &6&
"  &6&
"   0"YK 	  !1#%5!
  ;K$	; 7  %5*%	!  %5!%	!  %5#_5	%	!  "2$j"
  /?19w/ +  .L"86l  &6&
"  $4
$
  
 "2
"
 
 $4&(@<$
   "2		"	 	 $4&(88$	   *&2BDVW W#3&(88$	   . "29v  -  )OZ(o  +;-z+'  %5%
!  7G.7	3  .>!.*  3C$3/  !1"2W!o  /!#3g  !1#%<U!
  /?	/+  &6(*H&
"  *:,.Cf* &  *:,.Cf* &  #3!-#	  +: (  "2
${J&?,"  -=$-	)  (@PQ_ Q!1$&6"
  7G$)7 3  *:,.FQX*&   0?$4 _  /?J/+  &6(*:\&"  #3%'9F#	  2B$)$*85'J2.   0"$6 	  )9/1FQW)
%  )9/1FQW)
%  (8*5($  ,G5FG G(8$k%')%   0"$E 	  &6&
"  '7"#=DI7'#  '7)9|'#  (8*K($  *&2BC C/"J   *:	* & "  0"$4 	  "2$&="	   0Y.CfU 	 
 "2Y.CfU"	 
 $4Y.CfU$   $4Y.CfU$  
 %5'%
!  /!#5 
 "28"
   08 	  &6%&"  $4%$   . "?  9I#	9 5  &6&" H< z r   