
    2Vh_+                         d Z ddlZddlZddlZddlZddlZddlmZ  ed      	 	 	 dd       Z	 ed      	 	 	 	 dd       Z
 ed      	 	 	 	 	 dd	       Z ed
       G d d             Z ed      d        Zy)z0Deprecated text preprocessing APIs from Keras 1.    N)keras_exportz6keras._legacy.preprocessing.text.text_to_word_sequencec                     |r| j                         } |D ci c]  }|| }}t        j                  |      }| j                  |      } | j	                  |      }|D cg c]  }|s|	 c}S c c}w c c}w )DEPRECATED.)lowerstr	maketrans	translatesplit)	
input_textfiltersr   r
   ctranslate_dicttranslate_mapseqis	            S/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/legacy/preprocessing/text.pytext_to_word_sequencer      sw     %%'
(/01ah0N0MM.1M%%m4J


5
!C !aA   1
 !s   
A-A2&A2z(keras._legacy.preprocessing.text.one_hotc           	      .    t        | |t        ||||      S )r   )hash_functionr   r   r
   analyzer)hashing_trickhash)r   nr   r   r
   r   s         r   one_hotr       s&     	     z.keras._legacy.preprocessing.text.hashing_trickc                     |t         }n|dk(  rd }|t        | |||      }n ||       }|D cg c]  } ||      |dz
  z  dz    c}S c c}w )r   md5c                 x    t        t        j                  | j                               j	                         d      S )N   )inthashlibr   encode	hexdigest)ws    r   r   z$hashing_trick.<locals>.hash_functionD   s'    w{{188:.88:B??r   r   r   r
      )r   r   )	textr   r   r   r   r
   r   r   r$   s	            r   r   r   5   sj     	%		@ #'e
 tn69:]1Q'!+:::s   Az*keras._legacy.preprocessing.text.Tokenizerc                   f    e Zd ZdZ	 	 	 	 	 	 	 ddZd Zd Zd Zd Zd Z	d	 Z
dd
ZddZd Zd Zy)	Tokenizerr   Nc                    d|v r&t        j                  d       |j                  d      }|j                  dd      }	|rt        dt	        |      z         t        j                         | _        t        j                  t              | _
        || _        || _        || _        || _        |	| _        || _        || _        t        j                  t              | _        i | _        i | _        || _        y )Nnb_wordszDThe `nb_words` argument in `Tokenizer` has been renamed `num_words`.document_countr   z Unrecognized keyword arguments: )warningswarnpop	TypeErrorr   collectionsOrderedDictword_countsdefaultdictr    	word_docsr   r
   r   	num_wordsr,   
char_level	oov_token
index_docs
word_index
index_wordr   )
selfr6   r   r   r
   r7   r8   r   kwargsr,   s
             r   __init__zTokenizer.__init__U   s     MM0 

:.I$4a8>VLMM&224$005

",$"%11#6 r   c                    |D ]/  }| xj                   dz  c_         | j                  st        |t              rM| j                  r>t        |t              r|D cg c]  }|j	                          }}n|j	                         }|}nK| j
                  .t        || j                  | j                  | j                        }n| j                  |      }|D ]7  }|| j                  v r| j                  |xx   dz  cc<   )d| j                  |<   9 t        |      D ]  }| j                  |xx   dz  cc<    2 t        | j                  j                               }|j                  d d       | j                  g }n| j                  g}|j                  d |D               t!        t#        |t        t%        dt'        |      dz                           | _        | j(                  j                         D ci c]  \  }}||
 c}}| _        t        | j                  j                               D ]!  \  }}|| j,                  | j(                  |   <   # y c c}w c c}}w )Nr&   r%   c                     | d   S Nr&    )xs    r   <lambda>z(Tokenizer.fit_on_texts.<locals>.<lambda>   s
    1Q4 r   T)keyreversec              3   &   K   | ]	  }|d      yw)r   NrB   ).0wcs     r   	<genexpr>z)Tokenizer.fit_on_texts.<locals>.<genexpr>   s     2B"Q%2s   )r,   r7   
isinstancelistr   r   r   r   r
   r3   setr5   itemssortr8   extenddictziprangelenr:   r;   r9   )	r<   textsr'   	text_elemr   r$   wcounts
sorted_vocr   s	            r   fit_on_textszTokenizer.fit_on_textsy   s    	'D1$*T4"8::!$-CGHi	 1HH#zz|==(/ $"jj"jj	C ---C ,((($$Q'1,'*+D$$Q'	,
 X 'q!Q&!'1	'8 t''--/06>>!J..)J2'22 
Dq#j/A*=!>?@
 -1OO,A,A,CDDAq1a4D--/0 	4DAq23DOODOOA./	4O  IJ Es   I7Ic                     | xj                   t        |      z  c_         |D ]+  }t        |      }|D ]  }| j                  |xx   dz  cc<    - y rA   )r,   rT   rM   r9   )r<   	sequencesr   r   s       r   fit_on_sequenceszTokenizer.fit_on_sequences   sR    s9~- 	(Cc(C ("a'"(	(r   c                 6    t        | j                  |            S N)rL   texts_to_sequences_generator)r<   rU   s     r   texts_to_sequenceszTokenizer.texts_to_sequences   s    D55e<==r   c              #     K   | j                   }| j                  j                  | j                        }|D ]-  }| j                  st        |t              rM| j                  r>t        |t              r|D cg c]  }|j                          }}n|j                         }|}nK| j                  .t        || j                  | j                  | j                        }n| j                  |      }g }|D ]k  }| j                  j                  |      }	|	.|r|	|k\  r|*|j                  |       <|j                  |	       N| j                  [|j                  |       m | 0 y c c}w w)Nr%   )r6   r:   getr8   r7   rK   rL   r   r   r   r   r
   append)
r<   rU   r6   oov_token_indexr'   rV   r   vectr$   r   s
             r   r_   z&Tokenizer.texts_to_sequences_generator   s3    NN	//--dnn= 	D*T4"8::!$-CGHi	 1HH#zz|==(/ $"jj"jj	C ---CD 	1OO''*= Q)^*6 KK8A^^/KK0	1 J;	  Is   A4E-6E(BE-0E- E-c                 6    t        | j                  |            S r^   )rL   sequences_to_texts_generator)r<   r[   s     r   sequences_to_textszTokenizer.sequences_to_texts   s    D55i@AAr   c              #     K   | j                   }| j                  j                  | j                        }|D ]  }g }|D ]  }| j                  j                  |      }|;|r'||k\  r"|*|j                  | j                  |          I|j                  |       [| j                  h|j                  | j                  |           dj                  |      }|  y w)N )r6   r:   rb   r8   r;   rc   join)r<   r[   r6   rd   r   re   numwords           r   rg   z&Tokenizer.sequences_to_texts_generator   s     NN	//--dnn= 	CD 	B**3/# SI%5*6 KK(HID)^^/KK @A	B 88D>DJ	s   A$C'=C%8Cc                 J    | j                  |      }| j                  ||      S )N)mode)r`   sequences_to_matrix)r<   rU   ro   r[   s       r   texts_to_matrixzTokenizer.texts_to_matrix   s(    ++E2	''	'==r   c                 >   | j                   s0| j                  rt        | j                        dz   }nt        d      | j                   }|dk(  r| j                  st        d      t        j                  t        |      |f      }t        |      D ]	  \  }}|s
t        j                  t              }|D ]  }||k\  r	||xx   dz  cc<    t        |j                               D ]  \  }}	|dk(  r	|	||   |<   |dk(  r|	t        |      z  ||   |<   .|dk(  r	d||   |<   <|dk(  rfdt        j                  |	      z   }
t        j                  d| j                  d| j                  j                  |d      z   z  z         }|
|z  ||   |<   t        d	|        |S )
Nr&   zKSpecify a dimension (`num_words` argument), or fit on some text data first.tfidfz7Fit the Tokenizer on some data before using tfidf mode.countfreqbinaryr   zUnknown vectorization mode:)r6   r:   rT   
ValueErrorr,   npzeros	enumerater1   r4   r    rL   rN   logr9   rb   )r<   r[   ro   r6   rC   r   r   countsjr   tfidfs               r   rp   zTokenizer.sequences_to_matrix   s   ~~014	 6 
 I7?4#6#6I  HHc)ni01	* 	JFAs ,,S1F 	>q	Q	 V\\^, J17?AaDGV^#c(lAaDGX%AaDGW_ RVVAYB&&--T__5H5HA5N1NOPC !3hAaDG$%BDII#J	J4 r   c                    t        j                  | j                        }t        j                  | j                        }t        j                  | j                        }t        j                  | j
                        }t        j                  | j                        }| j                  | j                  | j                  | j                  | j                  | j                  | j                  |||||dS )N)r6   r   r   r
   r7   r8   r,   r3   r5   r9   r;   r:   )jsondumpsr3   r5   r9   r:   r;   r6   r   r   r
   r7   r8   r,   )r<   json_word_countsjson_word_docsjson_index_docsjson_word_indexjson_index_words         r   
get_configzTokenizer.get_config  s    ::d&6&67DNN3**T__5**T__5**T__5 ||ZZZZ//"11+')))
 	
r   c                     | j                         }| j                  j                  |d}t        j                  |fi |S )N)
class_nameconfig)r   	__class____name__r   r   )r<   r=   r   tokenizer_configs       r   to_jsonzTokenizer.to_json2  s=    "..11
 zz*5f55r   )N!!"#$%&()*+,-./:;<=>?@[\]^_`{|}~	
Trj   FNN)rv   )r   
__module____qualname____doc__r>   rY   r\   r`   r_   rh   rg   rq   rp   r   r   rB   r   r   r)   r)   Q   sX     6"!H.4`(> DB$>,\
,6r   r)   z4keras._legacy.preprocessing.text.tokenizer_from_jsonc                    t        j                  |       }|j                  d      }t        j                  |j                  d            }t        j                  |j                  d            }t        j                  |j                  d            }|j	                         D ci c]  \  }}t        |      | }}}t        j                  |j                  d            }|j	                         D ci c]  \  }}t        |      | }}}t        j                  |j                  d            }	t        di |}
||
_        ||
_        ||
_	        |	|
_
        ||
_        |
S c c}}w c c}}w )r   r   r3   r5   r9   r;   r:   rB   )r   loadsrb   r/   rN   r    r)   r3   r5   r9   r:   r;   )json_stringr   r   r3   r5   r9   kvr;   r:   	tokenizers              r   tokenizer_from_jsonr   ;  s)    zz+.!!(+F**VZZ67K

6::k23IFJJ|45J(2(8(8(:;1#a&!);J;FJJ|45J(2(8(8(:;1#a&!);J;FJJ|45J#F#I'I#I%I%I%I <;s   &E#6E))r   Trj   )r   Trj   N)Nr   Trj   N)r   r1   r!   r   r-   numpyrx   keras.src.api_exportr   r   r   r   r)   r   rB   r   r   <module>r      s    6      - FG 3

	! H!$ 89 3

 :( >? 2

; @;6 :;f6 f6 <f6R DE Fr   