
    VhHe                       d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZd\  ZZZZZeeeegZdZ ej@                  d      Z! G d de"      Z#e	 G d d             Z$ G d de$      Z% G d de$      Z& G d de      Z' G d de      Z(d$dZ)d$dZ*d$dZ+d%dZ,d%dZ-d%d Z.e&jR                  j                   e)_         e&jT                  j                   e*_         e&jV                  j                   e+_         e&jX                  j                   e,_         e&jZ                  j                   e-_         e&j\                  j                   e._         d%d!Z/d" Z0d# Z1y)&a  
An NLTK interface for WordNet

WordNet is a lexical database of English.
Using synsets, helps find conceptual relationships between words
such as hypernyms, hyponyms, synonyms, antonyms etc.

For details about WordNet see:
https://wordnet.princeton.edu/

This module also allows you to find lemmas in languages
other than English from the Open Multilingual Wordnet
https://omwn.org/

    N)defaultdictdeque)total_ordering)chainislice)
itemgetter)CorpusReader)
deprecated)FreqDist)binary_search_filegu <7~asrnv)(NzSomething %szSomebody %szIt is %singzSomething is %sing PPz%Something %s something Adjective/NounzSomething %s Adjective/NounzSomebody %s AdjectivezSomebody %s somethingzSomebody %s somebodyzSomething %s somebodyzSomething %s somethingzSomething %s to somebodyzSomebody %s on somethingzSomebody %s somebody somethingz!Somebody %s something to somebodyz#Somebody %s something from somebodyz#Somebody %s somebody with somethingz!Somebody %s somebody of somethingz!Somebody %s something on somebodyzSomebody %s somebody PPzSomebody %s something PPzSomebody %s PPzSomebody's (body part) %sz"Somebody %s somebody to INFINITIVEzSomebody %s somebody INFINITIVEzSomebody %s that CLAUSEzSomebody %s to somebodyzSomebody %s to INFINITIVEzSomebody %s whether INFINITIVEz)Somebody %s somebody into V-ing somethingz$Somebody %s something with somethingzSomebody %s INFINITIVEzSomebody %s VERB-ingzIt %s that CLAUSEzSomething %s INFINITIVEzSomebody %s at somethingzSomebody %s for somethingzSomebody %s on somebodyzSomebody %s out of somebodyz	\.[\d]+\.c                       e Zd ZdZy)WordNetErrorz.An exception class for wordnet-related errors.N)__name__
__module____qualname____doc__     J/home/dcms/DCMS/lib/python3.12/site-packages/nltk/corpus/reader/wordnet.pyr   r   ~   s    8r   r   c                       e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)_WordNetObjectz+A common base class for lemmas and synsets.c                 $    | j                  d      S N@_relatedselfs    r   	hypernymsz_WordNetObject.hypernyms       }}S!!r   c                 $    | j                  d      S r   r!   r#   s    r   
_hypernymsz_WordNetObject._hypernyms   r&   r   c                 $    | j                  d      S Nz@ir!   r#   s    r   instance_hypernymsz!_WordNetObject.instance_hypernyms       }}T""r   c                 $    | j                  d      S r*   r!   r#   s    r   _instance_hypernymsz"_WordNetObject._instance_hypernyms   r,   r   c                 $    | j                  d      S )N~r!   r#   s    r   hyponymsz_WordNetObject.hyponyms   r&   r   c                 $    | j                  d      S )Nz~ir!   r#   s    r   instance_hyponymsz _WordNetObject.instance_hyponyms   r,   r   c                 $    | j                  d      S )Nz#mr!   r#   s    r   member_holonymsz_WordNetObject.member_holonyms   r,   r   c                 $    | j                  d      S )Nz#sr!   r#   s    r   substance_holonymsz!_WordNetObject.substance_holonyms   r,   r   c                 $    | j                  d      S )Nz#pr!   r#   s    r   part_holonymsz_WordNetObject.part_holonyms   r,   r   c                 $    | j                  d      S )Nz%mr!   r#   s    r   member_meronymsz_WordNetObject.member_meronyms   r,   r   c                 $    | j                  d      S )Nz%sr!   r#   s    r   substance_meronymsz!_WordNetObject.substance_meronyms   r,   r   c                 $    | j                  d      S )Nz%pr!   r#   s    r   part_meronymsz_WordNetObject.part_meronyms   r,   r   c                 $    | j                  d      S )Nz;cr!   r#   s    r   topic_domainsz_WordNetObject.topic_domains   r,   r   c                 $    | j                  d      S )Nz-cr!   r#   s    r   in_topic_domainsz_WordNetObject.in_topic_domains   r,   r   c                 $    | j                  d      S )Nz;rr!   r#   s    r   region_domainsz_WordNetObject.region_domains   r,   r   c                 $    | j                  d      S )Nz-rr!   r#   s    r   in_region_domainsz _WordNetObject.in_region_domains   r,   r   c                 $    | j                  d      S )Nz;ur!   r#   s    r   usage_domainsz_WordNetObject.usage_domains   r,   r   c                 $    | j                  d      S )Nz-ur!   r#   s    r   in_usage_domainsz_WordNetObject.in_usage_domains   r,   r   c                 $    | j                  d      S )N=r!   r#   s    r   
attributesz_WordNetObject.attributes   r&   r   c                 $    | j                  d      S )N*r!   r#   s    r   entailmentsz_WordNetObject.entailments   r&   r   c                 $    | j                  d      S )N>r!   r#   s    r   causesz_WordNetObject.causes   r&   r   c                 $    | j                  d      S )N^r!   r#   s    r   	also_seesz_WordNetObject.also_sees   r&   r   c                 $    | j                  d      S )N$r!   r#   s    r   verb_groupsz_WordNetObject.verb_groups   r&   r   c                 $    | j                  d      S )N&r!   r#   s    r   similar_tosz_WordNetObject.similar_tos   r&   r   c                 ,    t        | j                        S N)hash_namer#   s    r   __hash__z_WordNetObject.__hash__   s    DJJr   c                 4    | j                   |j                   k(  S r_   ra   r$   others     r   __eq__z_WordNetObject.__eq__       zzU[[((r   c                 4    | j                   |j                   k7  S r_   rd   re   s     r   __ne__z_WordNetObject.__ne__   rh   r   c                 4    | j                   |j                   k  S r_   rd   re   s     r   __lt__z_WordNetObject.__lt__   s    zzEKK''r   N) r   r   r   r   r%   r(   r+   r.   r1   r3   r5   r7   r9   r;   r=   r?   rA   rC   rE   rG   rI   rK   rN   rQ   rT   rW   rZ   r]   rb   rg   rj   rl   r   r   r   r   r      s    5""##"#############"""""" ))(r   r   c                   l    e Zd ZdZg dZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zy)Lemmaa  
    The lexical entry for a single morphological form of a
    sense-disambiguated word.

    Create a Lemma from a "<word>.<pos>.<number>.<lemma>" string where:
    <word> is the morphological stem identifying the synset
    <pos> is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB
    <number> is the sense number, counting from 0.
    <lemma> is the morphological form of interest

    Note that <word> and <lemma> can be different, e.g. the Synset
    'salt.n.03' has the Lemmas 'salt.n.03.salt', 'salt.n.03.saltiness' and
    'salt.n.03.salinity'.

    Lemma attributes, accessible via methods with the same name:

    - name: The canonical name of this lemma.
    - synset: The synset that this lemma belongs to.
    - syntactic_marker: For adjectives, the WordNet string identifying the
      syntactic position relative modified noun. See:
      https://wordnet.princeton.edu/documentation/wninput5wn
      For all other parts of speech, this attribute is None.
    - count: The frequency of this lemma in wordnet.

    Lemma methods:

    Lemmas have the following methods for retrieving related Lemmas. They
    correspond to the names for the pointer symbols defined here:
    https://wordnet.princeton.edu/documentation/wninput5wn
    These methods all return lists of Lemmas:

    - antonyms
    - hypernyms, instance_hypernyms
    - hyponyms, instance_hyponyms
    - member_holonyms, substance_holonyms, part_holonyms
    - member_meronyms, substance_meronyms, part_meronyms
    - topic_domains, region_domains, usage_domains
    - attributes
    - derivationally_related_forms
    - entailments
    - causes
    - also_sees
    - verb_groups
    - similar_tos
    - pertainyms
    
_wordnet_corpus_readerra   _syntactic_marker_synset_frame_strings
_frame_ids_lexname_index_lex_id_lang_keyc                     || _         || _        || _        || _        g | _        g | _        || _        || _        d| _        d | _	        y Nengro   )r$   wordnet_corpus_readersynsetnamelexname_indexlex_idsyntactic_markers          r   __init__zLemma.__init__  sN     '<#
!1 +
	r   c                     | j                   S r_   rd   r#   s    r   r~   z
Lemma.name-      zzr   c                     | j                   S r_   )rq   r#   s    r   r   zLemma.syntactic_marker0  s    %%%r   c                     | j                   S r_   )rr   r#   s    r   r}   zLemma.synset3      ||r   c                     | j                   S r_   )rs   r#   s    r   frame_stringszLemma.frame_strings6  s    """r   c                     | j                   S r_   rt   r#   s    r   	frame_idszLemma.frame_ids9      r   c                     | j                   S r_   )rw   r#   s    r   langz
Lemma.lang<  r   r   c                     | j                   S r_   )rx   r#   s    r   keyz	Lemma.key?      yyr   c                 x    t        |       j                  | j                  j                  | j                  f}d|z  S )Nz%s('%s.%s'))typer   rr   ra   )r$   tups     r   __repr__zLemma.__repr__B  s1    4j!!4<<#5#5tzzAs""r   c           	      &   | j                   j                  }| j                  |f| j                  j                  vrg S | j                  j                  | j                  |f   D cg c]  \  }}} |||      j
                  |    c}}}S c c}}}w r_   )rp   synset_from_pos_and_offsetra   rr   _lemma_pointers_lemmas)r$   relation_symbol
get_synsetposoffsetlemma_indexs         r   r"   zLemma._relatedF  s    00KK
JJ(0L0LLI -1LL,H,H

O+-
 
(V[ sF#++K8
 	
 
s   &!Bc                 8    | j                   j                  |       S ))Return the frequency count for this Lemma)rp   lemma_countr#   s    r   countzLemma.countQ  s    **66t<<r   c                 $    | j                  d      S )N!r!   r#   s    r   antonymszLemma.antonymsU  r&   r   c                 $    | j                  d      S )N+r!   r#   s    r   derivationally_related_formsz"Lemma.derivationally_related_formsX  r&   r   c                 $    | j                  d      S )N\r!   r#   s    r   
pertainymszLemma.pertainyms[  r,   r   N)r   r   r   r   	__slots__r   r~   r   r}   r   r   r   r   r   r"   r   r   r   r   r   r   r   rn   rn      sU    -^I*&##	
=""#r   rn   c                      e Zd ZdZg dZd Zd Zd Zd Zd Z	d'dZ
d'd	Zd'd
Zd Zd Zd'dZd'dZd Zd Zd Zd(dZddlmZ ddlmZ d)dZd Zd Zd*dZd+dZd Zd,dZ d-dZ!d-dZ"d-d Z#d,d!Z$d,d"Z%d,d#Z&d$ Z'd% Z(d& Z)y).Synsetaj  Create a Synset from a "<lemma>.<pos>.<number>" string where:
    <lemma> is the word's morphological stem
    <pos> is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB
    <number> is the sense number, counting from 0.

    Synset attributes, accessible via methods with the same name:

    - name: The canonical name of this synset, formed using the first lemma
      of this synset. Note that this may be different from the name
      passed to the constructor if that string used a different lemma to
      identify the synset.
    - pos: The synset's part of speech, matching one of the module level
      attributes ADJ, ADJ_SAT, ADV, NOUN or VERB.
    - lemmas: A list of the Lemma objects for this synset.
    - definition: The definition for this synset.
    - examples: A list of example strings for this synset.
    - offset: The offset in the WordNet dict file of this synset.
    - lexname: The name of the lexicographer file containing this synset.

    Synset methods:

    Synsets have the following methods for retrieving related Synsets.
    They correspond to the names for the pointer symbols defined here:
    https://wordnet.princeton.edu/documentation/wninput5wn
    These methods all return lists of Synsets.

    - hypernyms, instance_hypernyms
    - hyponyms, instance_hyponyms
    - member_holonyms, substance_holonyms, part_holonyms
    - member_meronyms, substance_meronyms, part_meronyms
    - attributes
    - entailments
    - causes
    - also_sees
    - verb_groups
    - similar_tos

    Additionally, Synsets support the following methods specific to the
    hypernym relation:

    - root_hypernyms
    - common_hypernyms
    - lowest_common_hypernyms

    Note that Synsets do not support the following relations because
    these are defined by WordNet as lexical relations:

    - antonyms
    - derivationally_related_forms
    - pertainyms
    )_pos_offsetra   rt   r   _lemma_names_definition	_examples_lexname	_pointersr   
_max_depth
_min_depthc                     || _         d | _        d | _        d | _        g | _        g | _        g | _        d | _        g | _        d | _	        d | _
        t        t              | _        t        t              | _        y r_   )rp   r   r   ra   rt   r   r   r   r   r   _all_hypernymsr   setr   listr   )r$   r|   s     r   r   zSynset.__init__  sn    &;# 	
"$S)*40r   c                     | j                   S r_   )r   r#   s    r   r   z
Synset.pos  r   r   c                     | j                   S r_   )r   r#   s    r   r   zSynset.offset  r   r   c                     | j                   S r_   rd   r#   s    r   r~   zSynset.name  r   r   c                     | j                   S r_   r   r#   s    r   r   zSynset.frame_ids  r   r   c                    | j                   }||j                         vry|dk(  r|S |j                  |       |j                  |       }|j                  j                  |      }||j                  |   |   v r|j                  |   |   |   S y)z7Helper method for Synset.definition and Synset.examplesNr{   )rp   langs_load_lang_datass2oflg_attrsindex
_lang_data)r$   doc_typedefaultr   corpusofis          r   _doczSynset._doc  s    ,,v||~%U]N""4(d#B%%h/AV&&t,Q//((.q1"55r   c                 >    | j                  d| j                  |      S )z'Return definition in specified languagedefr   )r   r   r$   r   s     r   
definitionzSynset.definition  s    yy 0 0ty<<r   c                 >    | j                  d| j                  |      S )z%Return examples in specified languageexer   )r   r   r   s     r   exampleszSynset.examples  s    yyTy::r   c                     | j                   S r_   )r   r#   s    r   lexnamezSynset.lexname  s    }}r   c                 f    | j                   t        k(  r| j                  j                         dk7  ryy)Nz1.6FT)r   NOUNrp   get_versionr#   s    r   _needs_rootzSynset._needs_root  s*    99!<!<!H!H!Je!Sr   c                     |dk(  r| j                   S | j                  }|j                  |       |j                  |       }||j                  |   d   v r|j                  |   d   |   S g S )z5Return all the lemma_names associated with the synsetr{   r   )r   rp   r   r   r   )r$   r   readerr   s       r   lemma_nameszSynset.lemma_names  st    5=$$$00F""4(T"AF%%d+A..((.q1!44	r   c           
      r   |dk(  r| j                   S | j                  r| j                  j                  |       g }| j	                  |      }|D ]e  }t        | j                  | || j                  j                  j                  | j                               dd      }||_	        |j                  |       g |S y)z7Return all the lemma objects associated with the synsetr{   r   N)r   ra   rp   r   r   rn   	_lexnamesr   r   rw   append)r$   r   lemmarklemmylemtemps         r   lemmaszSynset.lemmas  s    5=<<ZZ''77=G$$T*E 
%////99??O "
t$
% N r   c                    g }t               }| g}|rn|j                         }||vrW|j                  |       |j                         |j	                         z   }|s|j                  |       n|j                  |       |rn|S )z4Get the topmost hypernyms of this synset in WordNet.)r   popaddr%   r+   r   extend)r$   resultseentodonext_synsetnext_hypernymss         r   root_hypernymszSynset.root_hypernyms  s     uv((*K$&%))+k.L.L.NN  &MM+.KK/  r   c                     d| j                   vrP| j                         | j                         z   }|sd| _        | j                  S dt	        d |D              z   | _        | j                  S )zl
        :return: The length of the longest hypernym path from this
            synset to the root.
        r   r      c              3   <   K   | ]  }|j                           y wr_   	max_depth.0hs     r   	<genexpr>z#Synset.max_depth.<locals>.<genexpr>,       )KA!++-)K   )__dict__r%   r+   r   maxr$   r%   s     r   r   zSynset.max_depth!  c     t}},(4+B+B+DDI"#  #$c)K)K&K"Kr   c                     d| j                   vrP| j                         | j                         z   }|sd| _        | j                  S dt	        d |D              z   | _        | j                  S )zm
        :return: The length of the shortest hypernym path from this
            synset to the root.
        r   r   r   c              3   <   K   | ]  }|j                           y wr_   	min_depthr   s     r   r   z#Synset.min_depth.<locals>.<genexpr>:  r   r   )r   r%   r+   r   minr   s     r   r  zSynset.min_depth/  r   r   c              #   J   K   ddl m}  || ||      D ]  }|| k7  s	|  yw)a  
        Return the transitive closure of source under the rel
        relationship, breadth-first, discarding cycles:

        >>> from nltk.corpus import wordnet as wn
        >>> computer = wn.synset('computer.n.01')
        >>> topic = lambda s:s.topic_domains()
        >>> print(list(computer.closure(topic)))
        [Synset('computer_science.n.01')]

        UserWarning: Discarded redundant search for Synset('computer.n.01') at depth 2


        Include redundant paths (but only once), avoiding duplicate searches
        (from 'animal.n.01' to 'entity.n.01'):

        >>> dog = wn.synset('dog.n.01')
        >>> hyp = lambda s:sorted(s.hypernyms())
        >>> print(list(dog.closure(hyp)))
        [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'), Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'), Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'), Synset('physical_entity.n.01'), Synset('entity.n.01')]

        UserWarning: Discarded redundant search for Synset('animal.n.01') at depth 7
        r   )acyclic_breadth_firstN)	nltk.utilr  )r$   reldepthr  r}   s        r   closurezSynset.closure=  s/     : 	4+D#u= 	F~	s   ##r   )acyclic_depth_first) unweighted_minimum_spanning_treeNc                 $    ddl m}  || |||      S )a#  
        Return the full relation tree, including self,
        discarding cycles:

        >>> from nltk.corpus import wordnet as wn
        >>> from pprint import pprint
        >>> computer = wn.synset('computer.n.01')
        >>> topic = lambda s:sorted(s.topic_domains())
        >>> pprint(computer.tree(topic))
        [Synset('computer.n.01'), [Synset('computer_science.n.01')]]

        UserWarning: Discarded redundant search for Synset('computer.n.01') at depth -3


        But keep duplicate branches (from 'animal.n.01' to 'entity.n.01'):

        >>> dog = wn.synset('dog.n.01')
        >>> hyp = lambda s:sorted(s.hypernyms())
        >>> pprint(dog.tree(hyp))
        [Synset('dog.n.01'),
         [Synset('canine.n.02'),
          [Synset('carnivore.n.01'),
           [Synset('placental.n.01'),
            [Synset('mammal.n.01'),
             [Synset('vertebrate.n.01'),
              [Synset('chordate.n.01'),
               [Synset('animal.n.01'),
                [Synset('organism.n.01'),
                 [Synset('living_thing.n.01'),
                  [Synset('whole.n.02'),
                   [Synset('object.n.01'),
                    [Synset('physical_entity.n.01'),
                     [Synset('entity.n.01')]]]]]]]]]]]]],
         [Synset('domestic_animal.n.01'),
          [Synset('animal.n.01'),
           [Synset('organism.n.01'),
            [Synset('living_thing.n.01'),
             [Synset('whole.n.02'),
              [Synset('object.n.01'),
               [Synset('physical_entity.n.01'), [Synset('entity.n.01')]]]]]]]]]
        r   )acyclic_branches_depth_first)r  r  )r$   r  r	  cut_markr  s        r   treezSynset.treef  s    V 	;+D#uhGGr   c                     g }| j                         | j                         z   }t        |      dk(  r| gg}|D ]9  }|j                         D ]$  }|j	                  |        |j	                  |       & ; |S )a%  
        Get the path(s) from this synset to the root, where each path is a
        list of the synset nodes traversed on the way to the root.

        :return: A list of lists, where each list gives the node sequence
           connecting the initial ``Synset`` node and a root node.
        r   )r%   r+   lenhypernym_pathsr   )r$   pathsr%   hypernymancestor_lists        r   r  zSynset.hypernym_paths  s     NN$t'>'>'@@	y>QVHE! 	,H!)!8!8!: ,$$T*]+,	, r   c                 V   | j                   s,| j                         D ch c]  }|D ]  }|  c}}| _         |j                   s,|j                         D ch c]  }|D ]  }|  c}}|_         t        | j                   j                  |j                               S c c}}w c c}}w )z
        Find all synsets that are hypernyms of this synset and the
        other synset.

        :type other: Synset
        :param other: other input synset.
        :return: The synsets that are hypernyms of both synsets.
        )r   _iter_hypernym_listsr   intersection)r$   rf   self_synsetsself_synsetother_synsetsother_synsets         r   common_hypernymszSynset.common_hypernyms  s     "" %)$=$=$?# #/#   ##D
 ## &+%?%?%A$!$1$ ! $$E 
 D''44U5I5IJKK#$s   BB%c                    | j                  |      }|r3t        d      }d|_        d |_        d |_        |j                  |       	 |r6t        d |D              }|D cg c]  }|j                         |k(  s| }}n5t        d |D              }|D cg c]  }|j                         |k(  s| }}t        |      S c c}w c c}w # t        $ r g cY S w xY w)aN  
        Get a list of lowest synset(s) that both synsets have as a hypernym.
        When `use_min_depth == False` this means that the synset which appears
        as a hypernym of both `self` and `other` with the lowest maximum depth
        is returned or if there are multiple such synsets at the same depth
        they are all returned

        However, if `use_min_depth == True` then the synset(s) which has/have
        the lowest minimum depth and appear(s) in both paths is/are returned.

        By setting the use_min_depth flag to True, the behavior of NLTK2 can be
        preserved. This was changed in NLTK3 to give more accurate results in a
        small set of cases, generally with synsets concerning people. (eg:
        'chef.n.01', 'fireman.n.01', etc.)

        This method is an implementation of Ted Pedersen's "Lowest Common
        Subsumer" method from the Perl Wordnet module. It can return either
        "self" or "other" if they are a hypernym of the other.

        :type other: Synset
        :param other: other input synset
        :type simulate_root: bool
        :param simulate_root: The various verb taxonomies do not
            share a single root which disallows this metric from working for
            synsets that are not connected. This flag (False by default)
            creates a fake root that connects all the taxonomies. Set it
            to True to enable this behavior. For the noun taxonomy,
            there is usually a default root except for WordNet version 1.6.
            If you are using wordnet 1.6, a fake root will need to be added
            for nouns as well.
        :type use_min_depth: bool
        :param use_min_depth: This setting mimics older (v2) behavior of NLTK
            wordnet If True, will use the min_depth function to calculate the
            lowest common hypernyms. This is known to give strange results for
            some synset pairs (eg: 'chef.n.01', 'fireman.n.01') but is retained
            for backwards compatibility
        :return: The synsets that are the lowest common hypernyms of both
            synsets
        N*ROOT*c                      g S r_   r   r   r   r   <lambda>z0Synset.lowest_common_hypernyms.<locals>.<lambda>  s    B r   c                      g S r_   r   r   r   r   r"  z0Synset.lowest_common_hypernyms.<locals>.<lambda>  s    R r   c              3   <   K   | ]  }|j                           y wr_   r  r   r   s     r   r   z1Synset.lowest_common_hypernyms.<locals>.<genexpr>       ?!?r   c              3   <   K   | ]  }|j                           y wr_   r   r%  s     r   r   z1Synset.lowest_common_hypernyms.<locals>.<genexpr>  r&  r   )r  r   ra   r%   r+   r   r   r  r   sorted
ValueError)	r$   rf   simulate_rootuse_min_depthsynsetsfake_synsetr   r   unsorted_lchs	            r   lowest_common_hypernymszSynset.lowest_common_hypernyms  s    P ''. ,K (K$.K!-7K*NN;'		?w??	+2Qaakkmy6PQQ?w??	+2Qaakkmy6PQQ,''	  R  R 	I	s<   C
  C 9C =C
 C/C3C
  
C
 
CCc                    | |fh}| j                         | j                         z   D ]  }||j                  |dz   d      z  } |rAt        d      }d|_        t        |t        d            d   }|j                  ||dz   f       |S )a  
        Get the path(s) from this synset to the root, counting the distance
        of each node from the initial node on the way. A set of
        (synset, distance) tuples is returned.

        :type distance: int
        :param distance: the distance (number of edges) from this hypernym to
            the original hypernym ``Synset`` on which this method was called.
        :return: A set of ``(Synset, int)`` tuples where each ``Synset`` is
           a hypernym of the first ``Synset``.
        r   Fr*  Nr   )r   )r(   r.   hypernym_distancesr   ra   r   r   r   )r$   distancer*  	distancesr  r-  fake_synset_distances          r   r2  zSynset.hypernym_distances  s     H%&	)D,D,D,FF 	XH44X\QV4WWI	X ,K (K#&yjm#DQ#G MM;(<q(@ABr   c                    | j                   dk(  r| diS t        | dfg      }i }|ru|j                         \  }||v r||<   dz  |j                  fd|j	                         D               |j                  fd|j                         D               |ru|r1t        d       }d|_         t        |j                               dz   ||<   |S )Nr   r   r   c              3   &   K   | ]  }|f 
 y wr_   r   r   hypr	  s     r   r   z2Synset._shortest_hypernym_paths.<locals>.<genexpr>  s     @##u@   c              3   &   K   | ]  }|f 
 y wr_   r   r8  s     r   r   z2Synset._shortest_hypernym_paths.<locals>.<genexpr>   s     I##uIr:  )	ra   r   popleftr   r(   r.   r   r   values)r$   r*  queuepathr   r-  r	  s         @r   _shortest_hypernym_pathszSynset._shortest_hypernym_paths  s    ::!!9ayk"}}HAuDyDGQJELL@@@LLI1F1F1HII   ,K (K #DKKM 2Q 6Dr   c                    | |k(  ry| j                  |      }|j                  |      }t        d      }|}|j                         D ]&  \  }}|j                  ||      }	t	        |||	z         }( t        j                  |      rdS |S )a  
        Returns the distance of the shortest path linking the two synsets (if
        one exists). For each synset, all the ancestor nodes and their
        distances are recorded and compared. The ancestor node common to both
        synsets that can be reached with the minimum number of traversals is
        used. If no ancestor nodes are common, None is returned. If a node is
        compared with itself 0 is returned.

        :type other: Synset
        :param other: The Synset to which the shortest path will be found.
        :return: The number of edges in the shortest path connecting the two
            nodes, or None if no path exists.
        r   infN)r@  floatitemsgetr  mathisinf)
r$   rf   r*  
dist_dict1
dist_dict2rB  path_distancer}   d1d2s
             r   shortest_path_distancezSynset.shortest_path_distance)  s     5=22=A
33MB

 El$**, 	8JFB,BrBw7M	8 zz-0tCmCr   c                     | j                  ||xr" | j                         xs |j                               }||dk  ryd|dz   z  S )a  
        Path Distance Similarity:
        Return a score denoting how similar two word senses are, based on the
        shortest path that connects the senses in the is-a (hypernym/hypnoym)
        taxonomy. The score is in the range 0 to 1, except in those cases where
        a path cannot be found (will only be true for verbs as there are many
        distinct verb taxonomies), in which case None is returned. A score of
        1 represents identity i.e. comparing a sense with itself will return 1.

        :type other: Synset
        :param other: The ``Synset`` that this ``Synset`` is being compared to.
        :type simulate_root: bool
        :param simulate_root: The various verb taxonomies do not
            share a single root which disallows this metric from working for
            synsets that are not connected. This flag (True by default)
            creates a fake root that connects all the taxonomies. Set it
            to false to disable this behavior. For the noun taxonomy,
            there is usually a default root except for WordNet version 1.6.
            If you are using wordnet 1.6, a fake root will be added for nouns
            as well.
        :return: A score denoting the similarity of the two ``Synset`` objects,
            normally between 0 and 1. None is returned if no connecting path
            could be found. 1 is returned if a ``Synset`` is compared with
            itself.
        r1  Nr         ?r   )rM  r   )r$   rf   verboser*  r3  s        r   path_similarityzSynset.path_similarityJ  s[    6 ..'WT-=-=-?-V5CTCTCV / 
 x!|hl##r   c                    | j                   |j                   k7  rt        d| d|d      | j                         }| j                   | j                  j                  vr&| j                  j                  | j                   |       | j                  j                  | j                      }| j                  ||xr |      }|
|dk  s|dk(  ryt        j                  |dz   d|z  z         S )	ab  
        Leacock Chodorow Similarity:
        Return a score denoting how similar two word senses are, based on the
        shortest path that connects the senses (as above) and the maximum depth
        of the taxonomy in which the senses occur. The relationship is given as
        -log(p/2d) where p is the shortest path length and d is the taxonomy
        depth.

        :type  other: Synset
        :param other: The ``Synset`` that this ``Synset`` is being compared to.
        :type simulate_root: bool
        :param simulate_root: The various verb taxonomies do not
            share a single root which disallows this metric from working for
            synsets that are not connected. This flag (True by default)
            creates a fake root that connects all the taxonomies. Set it
            to false to disable this behavior. For the noun taxonomy,
            there is usually a default root except for WordNet version 1.6.
            If you are using wordnet 1.6, a fake root will be added for nouns
            as well.
        :return: A score denoting the similarity of the two ``Synset`` objects,
            normally greater than 0. None is returned if no connecting path
            could be found. If a ``Synset`` is compared with itself, the
            maximum score is returned, which varies depending on the taxonomy
            depth.
        z&Computing the lch similarity requires  and ! to have the same part of speech.r1  Nr   r          @)	r   r   r   rp   r   _compute_max_depthrM  rF  log)r$   rf   rP  r*  	need_rootr	  r3  s          r   lch_similarityzSynset.lch_similaritym  s    6 99

"@DeM 
 $$&	99D77BBB''::499iP++66tyyA..!<9 / 
 x!|uz(Q,3;7888r   c                 `   | j                         xs |j                         }| j                  ||xr |d      }t        |      dk(  ry| |v r| n|d   }|j                         dz   }| j	                  ||xr |      }|j	                  ||xr |      }	||	y||z  }|	|z  }	d|z  ||	z   z  S )a  
        Wu-Palmer Similarity:
        Return a score denoting how similar two word senses are, based on the
        depth of the two senses in the taxonomy and that of their Least Common
        Subsumer (most specific ancestor node). Previously, the scores computed
        by this implementation did _not_ always agree with those given by
        Pedersen's Perl implementation of WordNet Similarity. However, with
        the addition of the simulate_root flag (see below), the score for
        verbs now almost always agree but not always for nouns.

        The LCS does not necessarily feature in the shortest path connecting
        the two senses, as it is by definition the common ancestor deepest in
        the taxonomy, not closest to the two senses. Typically, however, it
        will so feature. Where multiple candidates for the LCS exist, that
        whose shortest path to the root node is the longest will be selected.
        Where the LCS has multiple paths to the root, the longer path is used
        for the purposes of the calculation.

        :type  other: Synset
        :param other: The ``Synset`` that this ``Synset`` is being compared to.
        :type simulate_root: bool
        :param simulate_root: The various verb taxonomies do not
            share a single root which disallows this metric from working for
            synsets that are not connected. This flag (True by default)
            creates a fake root that connects all the taxonomies. Set it
            to false to disable this behavior. For the noun taxonomy,
            there is usually a default root except for WordNet version 1.6.
            If you are using wordnet 1.6, a fake root will be added for nouns
            as well.
        :return: A float score denoting the similarity of the two ``Synset``
            objects, normally greater than zero. If no connecting path between
            the two senses can be found, None is returned.

        T)r*  r+  r   Nr   r1  rU  )r   r/  r  r   rM  )
r$   rf   rP  r*  rX  	subsumerssubsumerr	  len1len2s
             r   wup_similarityzSynset.wup_similarity  s    F $$&=%*;*;*=	
 00!<9D 1 
	
 y>Q9,4)A, ""$q( **M$?i + 
 ++M$?i , 
 <4<et,,r   c                 (    t        | ||      \  }}}|S )a  
        Resnik Similarity:
        Return a score denoting how similar two word senses are, based on the
        Information Content (IC) of the Least Common Subsumer (most specific
        ancestor node).

        :type  other: Synset
        :param other: The ``Synset`` that this ``Synset`` is being compared to.
        :type ic: dict
        :param ic: an information content object (as returned by
            ``nltk.corpus.wordnet_ic.ic()``).
        :return: A float score denoting the similarity of the two ``Synset``
            objects. Synsets whose LCS is the root node of the taxonomy will
            have a score of 0 (e.g. N['dog'][0] and N['table'][0]).
        _lcs_icr$   rf   icrP  ic1ic2lcs_ics          r   res_similarityzSynset.res_similarity  s    " #43S&r   c                     | |k(  rt         S t        | ||      \  }}}|dk(  s|dk(  ry||z   d|z  z
  }|dk(  rt         S d|z  S )a  
        Jiang-Conrath Similarity:
        Return a score denoting how similar two word senses are, based on the
        Information Content (IC) of the Least Common Subsumer (most specific
        ancestor node) and that of the two input Synsets. The relationship is
        given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).

        :type  other: Synset
        :param other: The ``Synset`` that this ``Synset`` is being compared to.
        :type  ic: dict
        :param ic: an information content object (as returned by
            ``nltk.corpus.wordnet_ic.ic()``).
        :return: A float score denoting the similarity of the two ``Synset``
            objects.
        r      r   )_INFrb  )r$   rf   rd  rP  re  rf  rg  ic_differences           r   jcn_similarityzSynset.jcn_similarity  sa    " 5=K"43S& !8saxc	AJ.AK=  r   c                 :    t        | ||      \  }}}d|z  ||z   z  S )a  
        Lin Similarity:
        Return a score denoting how similar two word senses are, based on the
        Information Content (IC) of the Least Common Subsumer (most specific
        ancestor node) and that of the two input Synsets. The relationship is
        given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).

        :type other: Synset
        :param other: The ``Synset`` that this ``Synset`` is being compared to.
        :type ic: dict
        :param ic: an information content object (as returned by
            ``nltk.corpus.wordnet_ic.ic()``).
        :return: A float score denoting the similarity of the two ``Synset``
            objects, in the range 0 to 1.
        rU  ra  rc  s          r   lin_similarityzSynset.lin_similarity   s,    " #43S&fs++r   c              #      K   | g}t               }|r[|D ]  }|j                  |        | |D cg c].  }|j                         |j                         z   D ]  }||vr|
 0 }}}|rZyyc c}}w w)z
        :return: An iterator over ``Synset`` objects that are either proper
        hypernyms or instance of hypernyms of the synset.
        N)r   r   r%   r+   )r$   r   r   r}   r  s        r   r  zSynset._iter_hypernym_lists4  s     
 vu ! !J #!'!1!1!3f6O6O6Q!Q 4' D 	 s   1A43A.&A4,A4c                 L    t        |       j                   d| j                   dS )Nz('z'))r   r   ra   r#   s    r   r   zSynset.__repr__F  s$    t*%%&bB77r   c                     | j                   j                  }|| j                  vrg S | j                  |   }|D cg c]  \  }} |||       }}}|S c c}}w r_   )rp   r   r   )r$   r   r   pointer_tuplesr   r   r   s          r   r"   zSynset._relatedI  s[    00KK
$..0I88FGfZV$GG Hs   Ar{   ))ru  N)FF)r   FFFT)*r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r  r
  r  r  acyclic_treer  mstr  r  r  r/  r2  r@  rM  rQ  rY  r_  rh  rm  ro  r  r   r"   r   r   r   r   r   _  s    2hI 1& =;*6!F >A
-H^(L.9v,0DB!$F.9`K-Z( !D,($8r   r   c            
           e Zd ZdZdZd\  ZZZZZ	ededede	diZ
ede	d	ed
edediZ ed ej                         D              ZdZ fdZdJdZdKdZdKdZdKdZdKdZdKdZd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Z d! Z!dLd"Z"d# Z#d$ Z$d% Z%d& Z& e'd'      d(        Z(d) Z)d* Z*dMd+Z+dNd,Z,dNd-Z-dOd.Z.dNd/Z/dJd0Z0dLd1Z1dLd2Z2dPd3Z3dLd4Z4dLd5Z5dLd6Z6d7 Z7dQd9Z8e9jp                  j                  e8_        dQd:Z:e9jt                  j                  e:_        dQd;Z;e9jv                  j                  e;_        dRd<Z<e9jx                  j                  e<_        dRd=Z=e9jz                  j                  e=_        dRd>Z>e9j|                  j                  e>_        dSd?Z?eg d@e	g dAeg dBeg iZ@e@e   e@e<   dTdCZAdUdDZBdE ZCdF ZDdG ddHddd8fdIZE xZFS )VWordNetCorpusReaderzA
    A corpus reader used to access wordnet or its variants.
    utf8r   adjadvnounverbr   rj           c              #   ,   K   | ]  }|d d d     y w)Nru  r   )r   r   s     r   r   zWordNetCorpusReader.<genexpr>h  s     @Cc$B$i@s   )cntlist.revlexnamesindex.sensez	index.adjz	index.advz
index.nounz
index.verbzdata.adjzdata.advz	data.nounz	data.verbzadj.exczadv.excznoun.exczverb.excc                    t         	|   || j                  | j                         t	        t
              | _        t	        t
              | _        t	        t
              | _        || _	        d| _
        t	        t              | _        d| j                  d<   | j                  t        j                  d       t               | _        t	        t"              | _        i | _        i | _        g | _        d| _        d| _        | j1                  d      5 }t3        |      D ]D  \  }}|j5                         \  }}}t7        |      |k(  sJ | j*                  j9                  |       F 	 ddd       | j;                          | j=                          i | _        i | _         i | _!        | jE                         | _#        g d| _$        y# 1 sw Y   ]xY w)z_
        Construct a new wordnet corpus reader, with the given root
        directory.
        encodingN r{   zFThe multilingual functions are not available with this Wordnet versionr  )lemmar   r   r   )%superr   _FILES	_ENCODINGr   dict_lemma_pos_offset_map_synset_offset_cacher   _omw_reader_exomw_readerstrprovenanceswarningswarnr   	omw_langsr   r   _data_file_map_exception_mapr   _key_count_file_key_synset_fileopen	enumeratesplitintr   _load_lemma_pos_offset_map_load_exception_mapnomapsplitsmergesmap_wnmap30r   )
r$   root
omw_readerfpr   liner   r   _	__class__s
            r   r   zWordNetCorpusReader.__init__  s    	t{{T^^D &1%6" %0$5! &d+ & "&s+"$#MMX  &d+  # $ YYz" 	/b$R= /4$(JJL!w5zQ&%%g./	/ 	'') 	  "
 [[]
 6)	/ 	/s   AF==GNc                 x   d}|rddl m}m}  |||d|z         }n| }|j                  |      5 }i }|D ]n  }|j	                         j                         }	|	d   }
| j                  t        |
j                  d      d   j                  d      d            }|	d    d| ||
<   p 	 d	d	d	       |S # 1 sw Y   S xY w)
zMRead sense key to synset id mapping from index.sense file in corpus directoryr  r   )r	   LazyCorpusLoaderz.*/%r   :-N)nltk.corpusr	   r  r  stripr  
_pos_namesr  )r$   versionfnr	   r  ixreaderr  sensekey_mapr  fieldssensekeyr   s               r   index_sensezWordNetCorpusReader.index_sense  s    B'v{KHH]]2 	>"L >++-!!9ooc(..*=a*@*F*Fs*KA*N&OP,21I;au)=X&	>	> 	> s   A6B//B9c                 V   | j                  |      }| j                         }i }t        |j                               D ]  }g ||<   	 t        |j                               j	                  t        |j                                     D ]   }||   }||   }||   j                  |       " |S r_   )r  r   r=  keysr  r   )	r$   r  sensekey_map1sensekey_map2synset_to_manysynsetidr  sourcetargets	            r   map_to_manyzWordNetCorpusReader.map_to_many  s    ((1((*M0023 	*H')N8$	*M..01>>""$%
 	2H #8,F"8,F6"))&1	2 r   c                    t               | j                  |<   i | j                  |<   | j                  |      }i }|D ]  }||   }|rt        |      }t	        |      dk(  r|d   }nKg }|D ]$  }	|j                  |j                  |	      |	f       & || j                  |   |<   t        |      d   }|||<   |d   dk(  s|||d d  d<   | j                  |   j                  |        |S )Nr   r   ru  r   r   )	r   r  r  r  r  r   r   r   r   )
r$   r  r  synset_to_oner  candidates_bagcandidates_setr  counts	candidates
             r   
map_to_onezWordNetCorpusReader.map_to_one  s   !e

7!G))'2$ 	0F+F3N!$^!4~&!++A.FF%3 T	~';';I'F	&RST39DKK(0 [^F(.f%":$ 8>MVCR[M"34

7#''/%	0& r   c                 L    | j                         |k(  ry| j                  |      S )zBMapping from Wordnet 'version' to currently loaded Wordnet versionN)r   r  )r$   r  s     r   r  zWordNetCorpusReader.map_wn  s%    (??7++r   c                 ^    || j                   vr| j                  |      }| j                   |   S r_   )r  r  )r$   r  _mymaps      r   split_synsetsz!WordNetCorpusReader.split_synsets  s+    $++%__W-F{{7##r   c                 j   || j                   vrt        t              }| j                  |      j	                         D ]   \  }}|D ]  }||   j                  |        " |j	                         D ci c]  \  }}t        |      dkD  s|| c}}| j                   |<   | j                   |   S c c}}w )Nr   )r  r   r   r  rD  r   r  )r$   r  merger  targetsr  trgsrcs           r   merged_synsetsz"WordNetCorpusReader.merged_synsets	  s    $++%$E#'#3#3G#<#B#B#D .% .F&M%%f-.. */$%S##c(Q,S$DKK  {{7##$s   3B/
B/c                 D    | j                  |d   t        |dd             S )z!take an id and return the synsetsru  N   )r   r  )r$   r   s     r   of2sszWordNetCorpusReader.of2ss  s#    ..r"vs2bq6{CCr   c                 P    |r$|j                         dd|j                          S y)zreturn the ID of the synset08dr  N)r   r   )r$   sss     r   r   zWordNetCorpusReader.ss2of  s*    iik#&az22 r   c           	      
   || j                   v ry| j                  r| j                  s| j                          || j	                         vrt        d      | j                  r|| j                  vr| j                  }n| j                  }| j                  |   }|dv r|}nd}|j                  | d| d|j                  d      d    d	      5 }| j                  ||       ddd       | j                  |       y# 1 sw Y   xY w)
z^load the wordnet data of the requested language from the file to
        the cache, _lang_dataNzLanguage is not supported.cldrwiktdataz/wn-r  r  r   .tab)r   r  r  add_omwr   r   r  r  r  r  custom_lemmasdisable_custom_lemmas)r$   r   r   provprov2r  s         r   r   z#WordNetCorpusReader._load_lang_data   s     4??"DNNLLNtzz|#;<<$dnn"<''F%%F%##EE[[D6eWAdjjoa.@-AFG 	)2r4(	)""4(	) 	)s   C99Dc                 >   |j                         }|D ]  }t        j                  j                  |      \  }}t        j                  j	                  |      \  }}|dk(  sM|j                  d      d   }|| j
                  v s|dv r| d| }|| j
                  |<    y)zDAdd languages from Multilingual Wordnet to the provenance dictionaryr  r  ru  r  r  N)fileidsosr?  r  splitextr  )	r$   r   r  fileidr  langfile	file_namefile_extensionr   s	            r   	add_provszWordNetCorpusReader.add_provs<  s    .." 		.FWW]]62ND((*(8(8(B%I~' s+B/4+++t7G/G #V1TF+D)-  &		.r   c                     | j                  | j                         t        | j                  j	                               | _        y r_   )r  r  r   r  r  r  r#   s    r   r  zWordNetCorpusReader.add_omwJ  s/    t''(T--2245r   c                 t    ddl m} | j                          || _        | j	                  | j                         y)a4  
        Add languages from Extended OMW

        >>> import nltk
        >>> from nltk.corpus import wordnet as wn
        >>> wn.add_exomw()
        >>> print(wn.synset('intrinsically.r.01').lemmas(lang="eng_wikt"))
        [Lemma('intrinsically.r.01.per_se'), Lemma('intrinsically.r.01.as_such')]
        r   )extended_omwN)r  r  r  r  r  )r$   r  s     r   	add_exomwzWordNetCorpusReader.add_exomwN  s*     	-)t))*r   c                 H    t        | j                  j                               S )z<return a list of languages supported by Multilingual Wordnet)r   r  r  r#   s    r   r   zWordNetCorpusReader.langs^  s    D$$))+,,r   c           
      2   | j                   j                         D ]0  }| j                  d|z        5 }t        |      D ]   \  }}|j	                  d      rt        |j                               fd}	  |       } |       }t         |             }|dkD  sJ t         |             }	t        |	      D 
cg c]	  }
 |        c}
 t         |             }||k(  sJ  |        t        |      D 
cg c]  }
t         |              }}
|| j                  |   |<   |t        k(  s|| j                  |   t        <    	 d d d        3 y c c}
w c c}
w # t        t        f$ r}d|z  |dz   |f}t        d|z        |d }~ww xY w# 1 sw Y   zxY w)Nzindex.%s c                      t               S r_   next_iters   r   _next_tokenzCWordNetCorpusReader._load_lemma_pos_offset_map.<locals>._next_tokenl  s    #E{*r   r   r   zfile %s, line %i: %s)_FILEMAPr=  r  r  
startswithiterr  r  rangeAssertionErrorr)  r   r  ADJADJ_SAT)r$   suffixr  r   r  r  r  r   	n_synsets
n_pointersr  n_sensessynset_offsetser   r  s                  @r   r  z.WordNetCorpusReader._load_lemma_pos_offset_mapb  s   mm**, .	TF:./ ,T2(} +TGAts+  .E+P +)m %($6	(1},} &)%7
05j0AB1B $'{}#5(H444 $ GLIFV)W#km*<)W)W >LD..u5c:czES2259'BW+T,T ,T.	T0 C *X +J7 P)F2a!ea?*+AC+GHaOPI,T ,TsU   AF8AE:E
.E6E
EF+F
EF		*F	F			FF	c                 `   | j                   j                         D ]a  \  }}i | j                  |<   | j                  d|z        5 }|D ]*  }|j	                         }|dd  | j                  |   |d   <   , 	 d d d        c | j                  t
           | j                  t        <   y # 1 sw Y   xY w)Nz%s.excr   r   )r  rD  r  r  r  r  r  )r$   r   r  r  r  termss         r   r  z'WordNetCorpusReader._load_exception_map  s    ==..0 	CKC')D$8f,- C CD JJLE9>qrD'',U1X6CC C	C (,':':3'?G$	C Cs   0B$$B-	c                     d}| j                  |      D ]  }	 t        ||j                               } |r|dz  }|| j
                  |<   y# t        $ r t	        |       Y Mw xY w)zy
        Compute the max depth for the given part of speech.  This is
        used by the lch similarity metric.
        r   r   N)all_synsetsr   r   RuntimeErrorprintr   )r$   r   r*  r	  iis        r   rV  z&WordNetCorpusReader._compute_max_depth  sn    
 ""3' 	BE2<<>2	
 QJE$	   b	s   AA"!A"c                     | j                  t              }|j                  d       |D ]@  }t        j                  d|      }||j                  d      }|j                  d       |c S  y )Nr   z#Word[nN]et (\d+|\d+\.\d+) Copyrightr   )
_data_filer  seekresearchgroup)r$   fhr  matchr  s        r   r   zWordNetCorpusReader.get_version  s^    __S!

 	DIIDdKE ++a.
	r   c                     t         j                  |      j                         }|d|dz
   ||d }}| j                  |      }|j	                  |      D ]  }|j
                  |k(  s|c S  t        d|d|      )z)Return lemma object that matches the nameNr   	No lemma z in )SENSENUM_REr  endr}   r   ra   r   )r$   r~   r   	separatorsynset_name
lemma_namer}   r  s           r   r  zWordNetCorpusReader.lemma  s      &&t,002	"&Q"7ij9IZ[)]]4( 	E{{j(	 YznDHIIr   c                    |j                         }|j                  d      \  }}|j                  d      \  }}}}}| j                  t        |         }| j                  | j                  d      | _        t        | j                  |      }	|	st        d|z        t        |	j                         d         }
| j                  ||
      }|j                  D ]  }|j                  |k(  s|c S  t        d|z        )Nr  r  r  zNo synset found for key %rr   zNo lemma found for for key %r)lowerr  r  r  r  r  _binary_search_filer   r   r   rx   )r$   r   r!  	lex_sense
pos_numberr   r   r  r   synset_liner   r}   r  s                r   lemma_from_keyz"WordNetCorpusReader.lemma_from_key  s    iik #		#
I2;//#2F/
M61aooc*o.   ($(IIm$<D! *$*?*?E;cABB[&&(+,00f=^^ 	EzzS 	 :S@AAr   c                    |j                         j                  dd      \  }}}t        |      dz
  }	 | j                  |   |   |   }| j                  ||      }	|dk(  r|	j                  dk(  rd}
t        |
|z        |	j                  |k(  s|dk(  r|	j                  dk(  sJ |	S # t        $ r}t        d|d|      |d }~wt        $ r?}t        | j                  |   |         }t        d|d|d| d|dk(  rd	nd
       |d }~ww xY w)N.rj  r   r  z with part of speech zLemma z
 only has r  sensesensesr   r   zIAdjective satellite requested but only plain adjective found for lemma %r)
r#  rsplitr  r  KeyErrorr   
IndexErrorr  r   r   )r$   r~   r  r   synset_index_strsynset_indexr   r  r	  r}   messages              r   r}   zWordNetCorpusReader.synset  sO   '+zz|':':3'B$s$+,q0		//6s;LIF 00f= #:&++,/  w//{{c!cSjV[[C5GHH -  	Y5)3HPQWXX 	455e<SABH	!6sg >jX]'!IK 	s#   B& &	D/CD:DDc                     |t         k(  rt        }| j                  j                  |      0d| j                  |   z  }| j                  |      | j                  |<   | j                  |   S )ze
        Return an open file pointer for the data file for the given
        part of speech.
        data.%s)r  r  r  rE  r  r  )r$   r   r  s      r   r  zWordNetCorpusReader._data_file  sd    
 '>C""3'/s!33F'+yy'8D$""3''r   c                    || j                   |   v r| j                   |   |   S | j                  |      }|j                  |       |j                         }|dd }|j	                         r`|ddt        t        |            z
  z   t        |       k(  r6| j                  ||      }|j                  |k(  sJ || j                   |   |<   nd}t        j                  d| d| d       |j                  d       |S )a  
        - pos: The synset's part of speech, matching one of the module level
          attributes ADJ, ADJ_SAT, ADV, NOUN or VERB ('a', 's', 'r', 'n', or 'v').
        - offset: The byte offset of this synset in the WordNet dict file
          for this pos.

        >>> from nltk.corpus import wordnet as wn
        >>> print(wn.synset_from_pos_and_offset('n', 1740))
        Synset('entity.n.01')
        Nr  0z No WordNet synset found for pos=z at offset=r*  r   )r  r  r  readlineisalnumr  r  _synset_from_pos_and_liner   r  r  )r$   r   r   	data_filedata_file_lineline_offsetr}   s          r   r   z.WordNetCorpusReader.synset_from_pos_and_offset  s    T..s33,,S1&99OOC(	v"++-$Ra(!#qS[)9'9":!;CK=II33CHF>>V+++5;D%%c*62FMM<SEVHTUVWqr   z6Use public method synset_from_pos_and_offset() insteadc                 &     | j                   |i |S )z
        Hack to help people like the readers of
        https://stackoverflow.com/a/27145655/1709587
        who were using this function before it was officially a public method
        )r   )r$   argskwargss      r   _synset_from_pos_and_offsetz/WordNetCorpusReader._synset_from_pos_and_offset0  s     /t..???r   c           	      2
  ' t        |       }	 |j                         j                  d      \  }}t        j                  dd|      j                         }t        j
                  d|      }|D ]  }|j                  j                  |        |j                  d      |_        t        |j                               ''fd}	t         |	             |_        t         |	             }
| j                  |
   |_         |	       |_        t         |	       d      }t        |      D ]  } |	       }t         |	       d      }t        j                   d|      }|j#                         \  }}t%        | |||
||      }|j&                  j                  |       |j(                  j                  |j*                          t         |	             }t        |      D ]  } |	       }t         |	             } |	       } |	       }|d	k(  r!|j,                  |   j/                  ||f       Nt        |d d
 d      dz
  }t        |d
d  d      dz
  }|j&                  |   j*                  }|j0                  }|||f   }|j                  |||f        	 t         |	             }t        |      D ]  } |	       }|dk(  sJ t         |	             }t2        |   }t         |	       d      }|dk(  rp|j4                  j                  |       |j&                  D ]E  }|j4                  j                  |       |j6                  j                  ||j*                  z         G |j&                  |dz
     }|j4                  j                  |       |j6                  j                  ||j*                  z          	 |j&                  D ]  }|j                  t>        k(  r<|jA                         d   j&                  d   }!|!j*                  }"d|!jB                  z  }#ndx}"}#|j*                  tD        jF                  |j                     |jH                  |jB                  |"|#f}$d|$z  jK                         |_&         |j&                  d   j*                  jK                         }| jN                  |   |j                     }%|%jQ                  |j                        }&||j                  |&dz   f}$d|$z  |_        |S # t8        $ r Y Ew xY w# t:        $ r} t=        d|d|        | d } ~ ww xY w)N|z[\"].*?[\"]r  z	"([^"]*)"z; c                      t               S r_   r  r  s   r   r  zBWordNetCorpusReader._synset_from_pos_and_line.<locals>._next_tokenK  s    E{"r      z(.*?)(\(.*\))?$0000rj  r   r   r   zline z: z%02dz%s%%%d:%02d:%02d:%s:%sz
%s.%s.%02i))r   r  r  r  subfindallr   r   r   r  r  r   r   r   r   r  r  groupsrn   r   r   ra   r   r   r   VERB_FRAME_STRINGSrt   rs   StopIterationr)  r   r  r]   rv   r{  _pos_numbersru   r#  rx   r  r   )(r$   r   r;  r}   columns_strglossr   r   exampler  r   n_lemmasr  r!  r   msyn_markr  r  symbolr   lemma_ids_strsource_indextarget_indexsource_lemma_namelemma_pointerstupsframe_countplusframe_numberframe_string_fmtlemma_numberr  
head_lemma	head_namehead_idr   offsetssense_indexr  s(                                          @r   r9  z-WordNetCorpusReader._synset_from_pos_and_line9  s   V	G!/!5!5!7!=!=c!BKE:@@BJzz,6H# 1  ''01 ",!1!1$!7F **,-E# !/FN  .M"nn];FO &-FK ;="-H8_ 8(]
[]B/HH/<'(xxz$
HdFJvxX%%e,##**5;;78 []+J:& =$[]+!m + F*$$V,00#v?#&}Ra'8"#=#AL#&}QR'8"#=#AL(.|(D(J(J%%+%;%;N)*;V*CDDKKfl ;<= T!+-0 {+ TA&=D3;&;#&{}#5L'9,'G$#&{}b#9L#q())00>%+^^ XE!,,33LA!00778H5;;8VWX
 !'|a/? @((//=,,334Du{{4RS%T4 ^^ 	BE{{g%#//1!4<<Q?
&,,	 :#5#55&((	G#00=$$C 3S8??AEJ	B$ ^^A&,,224
,,Z8EmmFNN3&++{Q6#c)k ! 0  	G~&81#>?QF	Gs=   I9S6 S& DS6 &	S3/S6 2S33S6 6	T?TTc                 @    | j                  |      j                         S )a   
        Retrieves synset based on a given sense_key. Sense keys can be
        obtained from lemma.key()

        From https://wordnet.princeton.edu/documentation/senseidx5wn:
        A sense_key is represented as::

            lemma % lex_sense (e.g. 'dog%1:18:01::')

        where lex_sense is encoded as::

            ss_type:lex_filenum:lex_id:head_word:head_id

        :lemma:       ASCII text of word/collocation, in lower case
        :ss_type:     synset type for the sense (1 digit int)
                      The synset type is encoded as follows::

                          1    NOUN
                          2    VERB
                          3    ADJECTIVE
                          4    ADVERB
                          5    ADJECTIVE SATELLITE
        :lex_filenum: name of lexicographer file containing the synset for the sense (2 digit int)
        :lex_id:      when paired with lemma, uniquely identifies a sense in the lexicographer file (2 digit int)
        :head_word:   lemma of the first word in satellite's head synset
                      Only used if sense is in an adjective satellite synset
        :head_id:     uniquely identifies sense in a lexicographer file when paired with head_word
                      Only used if head_word is present (2 digit int)

        >>> import nltk
        >>> from nltk.corpus import wordnet as wn
        >>> print(wn.synset_from_sense_key("drive%1:04:03::"))
        Synset('drive.n.06')

        >>> print(wn.synset_from_sense_key("driving%1:04:03::"))
        Synset('drive.n.06')
        )r(  r}   )r$   	sense_keys     r   synset_from_sense_keyz)WordNetCorpusReader.synset_from_sense_key  s    L ""9-4466r   c                    |j                         }|dk(  rm| j                  }| j                  }|t        }|D 	cg c]=  }| j	                  |||      D ]%  }||   j                  |g       D ]  }	 |||	       ' ? c}	}}S | j                  |       g }
|| j                  |   d   v rE| j                  |   d   |   D ]-  }|	|d   |k7  r|
j                  | j                  |             / |
S c c}	}}w )a  Load all synsets with a given lemma and part of speech tag.
        If no pos is specified, all synsets for all parts of speech
        will be loaded.
        If lang is specified, all the synsets associated with the lemma name
        of that language will be returned.
        r{   r   ru  )
r#  r   r  POS_LIST_morphyrE  r   r   r   r  )r$   r  r   r   check_exceptionsr   r   pformr   synset_listls               r   r,  zWordNetCorpusReader.synsets  s(    5=88J..E{   LL3CD #Dkooa4	  1f%%%    &K-a00.q1%8 6A1R5C< &&tzz!}56 s   AC1c                    |j                         }|dk(  rX| j                  ||      D cg c]:  }|j                         D ]%  }|j                         j                         |k(  r|' < c}}S | j	                  |       g }| j                  ||      }|D ]b  }||j                         |k7  r|j                  |      D ]5  }|j                         j                         |k(  s%|j                  |       7 d |S c c}}w )zReturn all Lemma objects with a name matching the specified lemma
        name and part of speech tag. Matches any part of speech tag if none is
        specified.r{   r   )r#  r,  r   r~   r   r   r   )	r$   r  r   r   r}   	lemma_objr   synr   s	            r   r   zWordNetCorpusReader.lemmas  s   
 5= #ll5#6!' >>#))+u4     &F,,u4,0C 1?quuw#~!"t!4 1I ~~'--/58i011 M#s   ?C=c                 H    |dk(  r-t         j                        S  fd j                  D        S  j                  |       g } j                  |   d   D ]1  }	|d   k7  r|j	                   j                  |   d   |          3 t        t        |            }|S )zReturn all lemma names for all synsets for the given
        part of speech tag and language or languages. If pos is
        not specified, all synsets for all parts of speech will
        be used.r{   c              3   D   K   | ]  }j                   |   v r|  y wr_   )r  )r   r  r   r$   s     r   r   z6WordNetCorpusReader.all_lemma_names.<locals>.<genexpr>!  s,      d88?? s    r   ru  )r  r  r   r   r   r   )r$   r   r   r  r   s   ``   r   all_lemma_namesz#WordNetCorpusReader.all_lemma_names  s     5={D6677!%!;!;    &E__T*1- :?qu|T__T215a89:
 U$ELr   c              #      K   || j                         vry | j                  |       | j                  |   d   D ]%  }|r	|d   |k(  s| j                  |      }|s"| ' y w)Nr   ru  )r   r   r   r  )r$   r   r   r   r  s        r   all_omw_synsetsz#WordNetCorpusReader.all_omw_synsets1  sd     tzz|#T"//$'* 	B"R&C-ZZ^H		s   AA"A"A"c                 V    |dk(  r| j                  |      S | j                  ||      S )zIterate over all synsets with a given part of speech tag.
        If no pos is specified, all synsets for all parts of speech
        will be loaded.
        r{   )r   )r   r   )all_eng_synsetsru  )r$   r   r   s      r   r  zWordNetCorpusReader.all_synsets?  s5    
 5=''C'00''Cd';;r   c              #     K   || j                   j                         }n|g}| j                  }| j                  }|D ]  }|t        k(  rt
        }n|}d| j                   |   z  }| j                  |      }	 |j                         }	|j                         }
|
r|
d   j                         sO|	||   v r	||   |	   }n |||
      }|||   |	<   |t        k(  r|j                  t        k(  r| n|t        k7  r| |j                         }	|j                         }
|
r|j                           y #  |j                           xY ww)Nr4  r   )r  r  r  r9  r  r  r  tellr7  isspacer   close)r$   r   pos_tagscachefrom_pos_and_linepos_tagpos_filer  r:  r   r  r}   s               r   rw  z#WordNetCorpusReader.all_eng_synsetsI  sP    ;}}))+HuH)) ::   +	"G
 '!"x!88F		&)I"") ))+7??,!U7^3%*7^F%;F &7w%EF5;E'N62
 #g-&++2H"(L %/"(L&^^-F$--/D) 6 !W+	"N!s   A3E6B&D0E0EEc                 &    | j                  |      S )z4return lemmas of the given language as list of wordsr   )rs  r   s     r   wordszWordNetCorpusReader.words  s    ###..r   c                     | j                  ||      D cg c]3  }t        t        t        |j	                  |            |hz
              5 c}S c c}w )zZreturn nested list with the synonyms of the different senses of word in the given languager   )r,  r(  r   r   r   )r$   wordr   r  s       r   synonymszWordNetCorpusReader.synonyms  sR     ll4dl3
 4BNNN56$?@A
 	
 
s   8Ac                 d   |dk(  r| }nL| j                   }|| j                         v r.t        j                  j	                  | j
                  |   |       }	 |j                  |      5 }|j                         cddd       S # 1 sw Y   yxY w#  || j                  v r
d| d| cY S d| dcY S xY w)zxReturn the contents of readme, license or citation file
        use lang=lang to get the file for an individual languager{   NzCannot determine z for z	Language z is not supported.)	r  r   r  r?  joinr  r  readr   )r$   filer   r   r  s        r   doczWordNetCorpusReader.doc  s     5=F%%Ftzz|#'',,t'7'7'=dCD	<T" !bwwy! ! !	<t&*4&dV<<"4&(:;;s0   B 'B7	B B
B 
B B/'B/c                 (    | j                  d|      S )zlReturn the contents of LICENSE (for omw)
        use lang=lang to get the license for an individual languageLICENSEr  r   r  r   s     r   licensezWordNetCorpusReader.license  s     xxYTx22r   c                 (    | j                  d|      S )zjReturn the contents of README (for omw)
        use lang=lang to get the readme for an individual languageREADMEr  r  r   s     r   readmezWordNetCorpusReader.readme  s     xxXDx11r   c                 (    | j                  d|      S )zwReturn the contents of citation.bib file (for omw)
        use lang=lang to get the citation for an individual languagezcitation.bibr  r  r   s     r   citationzWordNetCorpusReader.citation  s     xx^$x77r   c                     |j                   dk7  ry| j                  | j                  d      | _        t        | j                  |j                        }|rt        |j                  dd      d         S y)r   r{   r   r  r  r   ru  )rw   r  r  r$  rx   r  r-  )r$   r  r  s      r   r   zWordNetCorpusReader.lemma_count  sg     ;;%'#'99]#;D "4#7#7Dt{{3*2.//r   Fc                 (    |j                  |||      S r_   rQ  r$   synset1synset2rP  r*  s        r   rQ  z#WordNetCorpusReader.path_similarity  s    &&wGGr   c                 (    |j                  |||      S r_   rY  r  s        r   rY  z"WordNetCorpusReader.lch_similarity      %%gwFFr   c                 (    |j                  |||      S r_   r_  r  s        r   r_  z"WordNetCorpusReader.wup_similarity  r  r   c                 (    |j                  |||      S r_   rh  r$   r  r  rd  rP  s        r   rh  z"WordNetCorpusReader.res_similarity      %%gr7;;r   c                 (    |j                  |||      S r_   rm  r  s        r   rm  z"WordNetCorpusReader.jcn_similarity  r  r   c                 (    |j                  |||      S r_   ro  r  s        r   ro  z"WordNetCorpusReader.lin_similarity  r  r   c                 \    |r|gnt         D ]  }| j                  |||      }|s|d   c S  y)a  
        Find a possible base form for the given form, with the given
        part of speech, by checking WordNet's list of exceptional
        forms, or by substituting suffixes for this part of speech.
        If pos=None, try every part of speech until finding lemmas.
        Return the first form found in WordNet, or eventually None.

        >>> from nltk.corpus import wordnet as wn
        >>> print(wn.morphy('dogs'))
        dog
        >>> print(wn.morphy('churches'))
        church
        >>> print(wn.morphy('aardwolves'))
        aardwolf
        >>> print(wn.morphy('abaci'))
        abacus
        >>> wn.morphy('hardrock', wn.ADV)
        >>> print(wn.morphy('book', wn.NOUN))
        book
        >>> wn.morphy('book', wn.ADJ)
        r   N)rg  rh  )r$   rk  r   ri  analysess        r   morphyzWordNetCorpusReader.morphy  s9    ,  C5X 	#C||D#/?@H{"		#r   )	r   r  )sesr   )vesf)xesx)zesz)chesch)shessh)menmaniesy)r  r  )esr  )r  r  )edr  )r  r  )ingr  )r  r  ))err  )estr  )r  r  )r  r  c                       j                      } j                     fd} fd}|r
||v r||   }n	 ||g      } ||g|z         S )Nc                     | D cg c]/  }D ](  \  }}|j                  |      r|d t        |        |z   * 1 c}}}S c c}}}w r_   )endswithr  )formsrk  oldnewsubstitutionss       r   apply_rulesz0WordNetCorpusReader._morphy.<locals>.apply_rules"  sa     "  - C==% [Cy!C''  s   4Ac                     g }t               }| D ]J  }|j                  v sj                  |   v s$||vs)|j                  |       |j                  |       L |S r_   )r   r  r   r   )r  r   r   rk  r   r$   s       r   filter_formsz1WordNetCorpusReader._morphy.<locals>.filter_forms*  se    F5D +4555d88>>t+"MM$/ HHTN+ Mr   )r  MORPHOLOGICAL_SUBSTITUTIONS)	r$   rk  r   ri  
exceptionsr  r  r  r  s	   ` `     @r   rh  zWordNetCorpusReader._morphy  sf     ((-
88=			 
 2t$E  'E TFUN++r   c                    t               }|j                         D ]  }||xx   dz  cc<    i }t        D ]  }t        t              ||<    |dkD  rUt        D ]
  }|||   d<    | j                         D ]/  }|j                  }	|	t        k(  rt        }	|||	   |j                  <   1 |D ]  }| j                  |      }
t        |
      dk(  r#t	        ||         }|s|t	        t        |
            z  }|
D ]c  }|j                  }	|	t        k(  rt        }	|j                         D ]#  }|D ]  }||	   |j                  xx   |z  cc<    % ||	   dxx   |z  cc<   e  |S )a  
        Creates an information content lookup dictionary from a corpus.

        :type corpus: CorpusReader
        :param corpus: The corpus from which we create an information
            content dictionary.
        :type weight_senses_equally: bool
        :param weight_senses_equally: If this is True, gives all
            possible senses equal weight rather than dividing by the
            number of possible senses.  (If a word has 3 synses, each
            sense gets 0.3333 per appearance when this is False, 1.0 when
            it is true.)
        :param smoothing: How much do we smooth synset counts (default is 1.0)
        :type smoothing: float
        :return: An information content dictionary
        r   g        r   )r   r  rg  r   rC  r  r   r  r  r   r,  r  r  )r$   r   weight_senses_equally	smoothingr  wwrd  ppr  r   possible_synsetsweightlevelhhs                 r   rd  zWordNetCorpusReader.icB  s   " ,,. 	B2J!OJ	  	(B 'BrF	( s? &%2q	&&&( 0gg'>C&/3

#	0  	%B#||B/#$) 6":&F(%$4 566& %gg'>C446 6E# 63

+v5+66 3
f$
%	%& 	r   c           	      *   |j                  d      d   }t        |      dk7  rt        d      t        t              t        t              t        t              t        t              g| j
                  |<   |j                         D ]  }t        |t              r|j                  d      }|j                  d      r7|j                         j                  d      }t        |      dk  re|dd	 \  }}|d
   }| j                  ro|| j                  v r| j                  |   }n|| j                  d   vr?|j                  dd      | j                  d   vrt        j                   | d| d| d       |d
   dk(  r7| j#                  |      }	|	r$|	j%                         dk(  r| j'                  |	      }|j                  d      }
|
d
   }t        |
      dk(  s
|
d   |k(  sZ|dk(  rX|j                         j                  dd      }| j
                  |   d   |j)                            }||vr|j+                  |       || j,                  v s| j
                  |   | j,                  j/                  |         |   }||vs|j+                  |        y)a  
        Reads a custom tab file containing mappings of lemmas in the given
        language to Princeton WordNet 3.0 synset offsets, allowing NLTK's
        WordNet functions to then be used with that language.

        See the "Tab files" section at https://omwn.org/omw1.html for
        documentation on the Multilingual WordNet tab file format.

        :param tab_file: Tab file as a file or file-like object
        :type: lang str
        :param: lang ISO 639-3 code of the language of the tab file
        r  r   r  z-lang should be a (3 character) ISO 639-3 codezutf-8#	Nrj  ru  wordnetr   r   z: invalid offset z in ''r  r   r  r  )r  r  r)  r   r   r   	readlines
isinstancebytesdecoder   r  r  r  replacer  r  r  r   r   r#  r   r   r   )r$   tab_filer   lgr  triple
offset_poslabelvalwnsspairattrlang_offsetslang_lemmass                 r   r  z!WordNetCorpusReader.custom_lemmasz  sa    ZZ_Qr7a<LMM	!
 &&( ,	4D$& {{7+??3'++D1v;?$*2AJ!
ERj::!TZZ/%)ZZ
%;
 'djj.CC * 2 23 <#'::i#8!9 %MM#'&(9*U4&PQ R !^s*::j1D
c 1%)ZZ%5
{{3'Bxt9>T!W]w!iik11#s;'+t'<Q'?		'L%\9(//
;t}},&*ood&;DMM<O<OPT<U&V&' k1'..s3Y,	4r   c                 x    t        t        | j                              D ]  }d| j                  |   |   _         y)z+prevent synsets from being mistakenly addedN)r  r  r   r   default_factory)r$   r   r   s      r   r  z)WordNetCorpusReader.disable_custom_lemmas  s5    s4==)* 	<A7;DOOD!!$4	<r   c                 "    | j                         S r_   )r%   )r   s    r   r"  zWordNetCorpusReader.<lambda>  s    akkm r   ru  c           	         ddl m}m}	 t               t               |s
t	               }|s
t	               }fd}
|D ]c  }t        |      }|t        k(  rj                  |       )|t        k(  r	 |
|       ;|t        k(  sE| j                  ||      D ]
  } |
|        e D ]  }j                   |||||              |	t        t                    ||      }|S )ai  
        Produce a graphical representation from 'inputs' (a list of
        start nodes, which can be a mix of Synsets, Lemmas and/or words),
        and a synset relation, for drawing with the 'dot' graph visualisation
        program from the Graphviz package.

        Return a string in the DOT graph file language, which can then be
        converted to an image by nltk.parse.dependencygraph.dot2img(dot_string).

        Optional Parameters:
        :rel: Wordnet synset relation
        :pos: for words, restricts Part of Speech to 'n', 'v', 'a' or 'r'
        :maxdepth: limit the longest path
        :shapes: dictionary of strings that trigger a specified shape
        :attr: dictionary with global graph attributes
        :verbose: warn about cycles

        >>> from nltk.corpus import wordnet as wn
        >>> print(wn.digraph([wn.synset('dog.n.01')]))
        digraph G {
        "Synset('animal.n.01')" -> "Synset('organism.n.01')";
        "Synset('canine.n.02')" -> "Synset('carnivore.n.01')";
        "Synset('carnivore.n.01')" -> "Synset('placental.n.01')";
        "Synset('chordate.n.01')" -> "Synset('animal.n.01')";
        "Synset('dog.n.01')" -> "Synset('canine.n.02')";
        "Synset('dog.n.01')" -> "Synset('domestic_animal.n.01')";
        "Synset('domestic_animal.n.01')" -> "Synset('animal.n.01')";
        "Synset('living_thing.n.01')" -> "Synset('whole.n.02')";
        "Synset('mammal.n.01')" -> "Synset('vertebrate.n.01')";
        "Synset('object.n.01')" -> "Synset('physical_entity.n.01')";
        "Synset('organism.n.01')" -> "Synset('living_thing.n.01')";
        "Synset('physical_entity.n.01')" -> "Synset('entity.n.01')";
        "Synset('placental.n.01')" -> "Synset('mammal.n.01')";
        "Synset('vertebrate.n.01')" -> "Synset('chordate.n.01')";
        "Synset('whole.n.02')" -> "Synset('object.n.01')";
        }
        <BLANKLINE>
        r   )edge_closure	edges2dotc                 n    | j                         }j                  |       j                  | |f       y r_   )r}   r   )r   r  edgesr,  s     r   	add_lemmaz.WordNetCorpusReader.digraph.<locals>.add_lemma 	  s)    BKKOIIsBi r   )shapesr  )r  r  r  r   r  r   r   r   rn   r  r   unionr(  r   )r$   inputsr  r   maxdepthr  r  rP  r  r  r  nodetypr  r  
dot_stringr  r,  s                   @@r   digraphzWordNetCorpusReader.digraph  s    ` 	6%VF6D	!
  	%Dt*Cf}D!$![[s3 %Ee$%	%  	JBKKRh HIE	Jvd5k26M
r   r_   )r  rt  )Nr{   Trz   )NN)r  r{   rw  rv  )NT)T)FrO  )Gr   r   r   r   r  r  r  ADVr   VERBr  rK  r  rD  r  r  r   r  r  r  r  r  r  r  r   r   r  r  r  r   r  r  rV  r   r  r(  r}   r  r   r
   r@  r9  re  r,  r   rs  ru  r  rw  r  r  r  r  r  r  r   rQ  r   rY  r_  rh  rm  ro  r  r  rh  rd  r  r  r  __classcell__)r  s   @r   r{  r{  W  sv    I %<!C#tT UCfdFCH !T1c1c1gqAL@<+=+=+?@@J
F$B6H$4,$
	$D3
)8.6+ -/Tb@%JB6@
(B HI@ J@wr&7X>44<5"n/
<$3
2
8H %44<<OG $22::NG $22::N< $22::N< $22::N< $22::N#: 	 

 	 	
 	AR/#4 ,Gs+K(%,T6pB4H< $Kr   r{  c                       e Zd ZdZd Zd Zy)WordNetICCorpusReaderzE
    A corpus reader for the WordNet information content corpus.
    c                 6    t        j                  | ||d       y )Nr|  r  )r	   r   )r$   r  r  s      r   r   zWordNetICCorpusReader.__init__	  s    dD'FCr   c                    i }t        t              |t        <   t        t              |t        <   | j	                  |      5 }t        |      D ]|  \  }}|dk(  r|j                         }t        |d   dd       }t        |d         }t        |d         }	t        |      dk(  r|d   dk(  r||	   dxx   |z  cc<   |dk7  su|||	   |<   ~ 	 ddd       |S # 1 sw Y   |S xY w)a  
        Load an information content file from the wordnet_ic corpus
        and return a dictionary.  This dictionary has just two keys,
        NOUN and VERB, whose values are dictionaries that map from
        synsets to information content values.

        :type icfile: str
        :param icfile: The name of the wordnet_ic file (e.g. "ic-brown.dat")
        :return: An information content dictionary
        r   Nru  r   r  rj  ROOT)
r   rC  r   r  r  r  r  r  _get_posr  )
r$   icfilerd  r  numr  r  r   valuer   s
             r   rd  zWordNetICCorpusReader.ic&	  s     u%4u%4YYv 	,"&r] ,	T!8VAYs^,fQi(vay)v;!#q	V(;sGAJ%'JA:&+BsGFO,	, 		, 	s    B C
CC N)r   r   r   r   r   rd  r   r   r   r   r   	  s    Dr   r   c                 *    | j                  |||      S N)rP  r*  r  r  r  rP  r*  s       r   rQ  rQ  O	  s!    "" #  r   c                 *    | j                  |||      S r	  r  r
  s       r   rY  rY  U	      !!'7-!XXr   c                 *    | j                  |||      S r	  r  r
  s       r   r_  r_  Y	  r  r   c                 *    | j                  |||      S N)rP  r  r  r  rd  rP  s       r   rh  rh  ]	      !!'2w!??r   c                 *    | j                  |||      S r  r  r  s       r   rm  rm  a	  r  r   c                 *    | j                  |||      S r  r  r  s       r   ro  ro  e	  r  r   c                    | j                   |j                   k7  rt        d| d|d      t        |       }t        |      }| j                  |      }t	        |      dk(  rd}nt        fd|D              }|rt        d|       |||fS )a  
    Get the information content of the least common subsumer that has
    the highest information content value.  If two nodes have no
    explicit common subsumer, assume that they share an artificial
    root node that is the hypernym of all explicit roots.

    :type synset1: Synset
    :param synset1: First input synset.
    :type synset2: Synset
    :param synset2: Second input synset.  Must be the same part of
    speech as the first synset.
    :type  ic: dict
    :param ic: an information content object (as returned by ``load_ic()``).
    :return: The information content of the two synsets and their most
    informative subsumer
    z-Computing the least common subsumer requires rS  rT  r   c              3   6   K   | ]  }t        |        y wr_   )information_content)r   r   rd  s     r   r   z_lcs_ic.<locals>.<genexpr>	  s     H-a4Hs   z> LCS Subsumer by content:)r   r   r  r  r  r   r  )r  r  rd  rP  re  rf  r[  subsumer_ics     `     r   rb  rb  q	  s    " ||w||#<CWN
 	

 gr
*C
gr
*C((1I
9~HiHH*K8[  r   c                     | j                   }|t        k(  rt        }	 ||   }|| j
                     }|dk(  rt        S t        j                  ||d   z         S # t        $ r}d}t	        ||z        |d }~ww xY w)Nz>Information content file has no entries for part-of-speech: %sr   )	r   r  r  r.  r   r   rk  rF  rW  )r}   rd  r   icposr  msgr  s          r   r  r  	  s    
++C
g~-3
 6>>"F{%(*+++  -N39%1,-s   A 	A7!A22A7c                 Z    | d   dk(  rt         S | d   dk(  rt        S d| z  }t        |      )Nru  r   r   zLUnidentified part of speech in WordNet Information Content file for field %s)r   r  r)  )fieldr  s     r   r  r  	  sB    RyC	rc	"# 	 or   rw  rv  )2r   rF  r  r  r  collectionsr   r   	functoolsr   	itertoolsr   r   operatorr   nltk.corpus.readerr	   nltk.internalsr
   nltk.probabilityr   r  r   r$  rk  r  r  r  r   r  rg  rI  compiler  	Exceptionr   r   rn   r   r{  r   rQ  rY  r_  rh  rm  ro  rb  r  r  r   r   r   <module>r&     s  "   	 	  * $ #  + % % ?(  !8 Wc4 $S!* X bjj&99 9 U( U( U(pA#N A#Hp^ pp{, {F&'L 'jYY@@@ !0088 ..66 ..66 ..66 ..66 ..66 "!P,*
r   