
    Vhx                     t    d dl Z d dlmZmZ d dl d dl d dlmZ d dlm	Z	  edg d      Z
 G d d	e      Zy)
    N)defaultdict
namedtuple)*)WordListCorpusReader)line_tokenizePanlexLanguage)
panlex_uidiso639iso639_typescriptnamelangvar_uidc                   N     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
d
d	Z xZS )PanlexSwadeshCorpusReaderaq  
    This is a class to read the PanLex Swadesh list from

    David Kamholz, Jonathan Pool, and Susan M. Colowick (2014).
    PanLex: Building a Resource for Panlingual Lexical Translation.
    In LREC. http://www.lrec-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf

    License: CC0 1.0 Universal
    https://creativecommons.org/publicdomain/zero/1.0/legalcode
    c                 *   t        |   |i | t        j                  d| j	                         d         j                  d      | _        | j                         D ci c]  }|j                  | c}| _	        | j                         | _        y c c}w )Nzswadesh([0-9].*)\/r      )super__init__rematchfileidsgroupswadesh_sizeget_languagesr	   
_languagesget_macrolanguages_macro_langauges)selfargskwargslang	__class__s       Q/home/dcms/DCMS/lib/python3.12/site-packages/nltk/corpus/reader/panlex_swadesh.pyr   z"PanlexSwadeshCorpusReader.__init__+   s{    $)&)HH%:DLLN1<MNTTUVW=A=O=O=QRT4??D0R $ 7 7 9 Ss   Bc                      y)NzCC0 1.0 Universal r   s    r#   licensez!PanlexSwadeshCorpusReader.license2   s    "    c                 6    | j                   j                         S N)r   keysr&   s    r#   language_codesz(PanlexSwadeshCorpusReader.language_codes5   s    ##%%r(   c              #      K   | j                  d| j                   d      j                  d      D ]:  }|j                         st	        |j                         j                  d        < y w)Nlangs.txt
	)rawr   splitstripr   )r   lines     r#   r   z'PanlexSwadeshCorpusReader.get_languages8   sa     HHuT%6%6$7t<=CCDI 	<D::< $**,"4"4T":;;	<s   A-A/c                     t        t              }| j                  j                         D ]*  }||j                     j                  |j                         , |S r*   )r   listr   valuesr
   appendr	   )r   macro_langaugesr!   s      r#   r   z,PanlexSwadeshCorpusReader.get_macrolanguages>   sK    %d+OO**, 	ADDKK(//@	Ar(   c                     d| j                    d| d}| j                  |      D cg c]  }|j                  d       c}S c c}w z.
        :return: a list of list(str)
        swadesh/r/   r1   )r   wordsr3   )r   	lang_codefileidconcepts       r#   words_by_langz'PanlexSwadeshCorpusReader.words_by_langD   sE     4,,-Qyk>37::f3EFd#FFFs   Ac                     | j                   |   D cg c]  }d| j                   d| d }}|D cg c])  }| j                  |      D ]  }|j                  d       + c}}S c c}w c c}}w r<   )r   r   r?   r3   )r   
iso63_coder@   r   rA   rB   s         r#   words_by_iso639z)PanlexSwadeshCorpusReader.words_by_iso639K   s     "22:>
 d''()D9
 

 /6
$*TZZPVEW
:AGMM$

 	
	

s
   A%.A*c                     |s| j                         }|D cg c]  }| j                  |       }}t        t        |       S c c}w )zF
        :return: a tuple of words for the specified fileids.
        )r   r?   r7   zip)r   r   f	wordlistss       r#   entriesz!PanlexSwadeshCorpusReader.entriesW   sA     llnG,34qTZZ]4	4CO$$ 5s   Ar*   )__name__
__module____qualname____doc__r   r'   r,   r   r   rC   rF   rK   __classcell__)r"   s   @r#   r   r      s1    	:#&<G

%r(   r   )r   collectionsr   r   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.corpus.reader.wordlistr   nltk.tokenizer   r   r   r%   r(   r#   <module>rV      s:    
 / $ % < '
@% 4 @%r(   