
    Vh                         U d dl mZ d dlZd dlmc mZ d dlmZ d dlm	Z	 g Z
ee   ed<   ej                  j                   G d d             Zy)    )OptionalN)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                       e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 ddee   dededededededed	ed
edededefdZdeee      fdZ	y)_FunctionalAdagradparamslrlr_decayweight_decayinitial_accumulator_valuewarmup_lr_multiplierwarmup_num_itersepscoalesce_gradforeachfusedmaximize_allow_empty_param_listc                    t        d       |||||||d| _        |	| _        |
| _        || _        || _        t        j                  j                  t        t        j                  t        t        t        j                  f   f   i       | _        t        |      dk(  r|st        d      d|i| _        | j                  d   D ]E  }t        j                   |j"                  |      t        j$                  d      d| j                  |<   G y )	N   )
stacklevel)r
   r   r   r   r   r   r   r   z%optimizer got an empty parameter listr	           )sumstep)r   defaultsr   r   r   r   torchjitannotatedictr   strstatelen
ValueErrorparam_group	full_likedatatensor)selfr	   r
   r   r   r   r   r   r   r   r   r   r   r   ps                  Z/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/optim/functional_adagrad.py__init__z_FunctionalAdagrad.__init__   s      	;aH ()B$8 0
 +
 YY''U\\4U\\@Q;R-R(SUWX
v;!$;DEE %f- !!(+ 	Aqvv/HIS)DJJqM	    	gradientsc                 D   | j                   d   }g }g }g }g }t        |      t        |      k7  r*t        ddt        |       dz   dt        |       z         d\  }}t        | j                   d   |      D ]  \  }	}
|
	||
j                  z  }|t        j                  |	      z  }|j                  |	       |j                  |
       | j                  |	   }|j                  |d          |j                  |d           t        j                         5  t        j                  ||||| j                  d	   | j                  d
   | j                  d   | j                  d   || j                  | j                  || j                  d d        d d d        y # 1 sw Y   y xY w)Nr	   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: )FFr   r   r
   r   r   r   )r
   r   r   r   has_sparse_gradr   r   has_complexr   
grad_scale	found_inf)r%   r#   r$   zip	is_sparser   
is_complexappendr"   no_gradFadagradr   r   r   r   )r)   r.   r	   params_with_gradgrads
state_sumsstate_stepsr0   r1   paramgradientr"   s               r+   r   z_FunctionalAdagrad.stepI   s   !!(+
$&v;#i.(W#CK=34&s9~&678  (4$"4#3#3H#=yI 	2OE8#8#5#55u//66 ''.X&

5)!!%,/""5=1	2 ]]_ 	II==&!]]>:z2MM%( /'jj	 	 	s   A7FFN)g{Gz?r   r   r   g      ?r   g|=TFFFF)
__name__
__module____qualname__listr   floatboolr,   r   r    r-   r+   r   r      s    
 !+.&)"%"(--V- - 	-
 - $)- $-  - - - - - - "&-^*d8F#34 *r-   r   )typingr   r   torch.optim._functionaloptim_functionalr9   r   ,torch.distributed.optim._deprecation_warningr   r   rD   r!   __annotations__r   scriptr   rG   r-   r+   <module>rO      sO      # # 
 c  Z Z Zr-   