
    Vh                         U d dl mZ d dlZd dlmc mZ d dlmZ d dlm	Z	 g Z
ee   ed<   ej                  j                   G d d             Zy)    )OptionalN)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                       e Zd Z	 	 	 	 	 	 	 	 	 ddee   dedeeef   dededededed	ed
efdZdede	e   fdZ
dee	e      fdZy)_FunctionalAdamparamslrbetasepsweight_decayamsgradmaximizeforeachfused_allow_empty_param_listc                 f   t        d       d|k  st        d|       d|k  st        d|       d|d   cxk  rdk  sn t        d|d          d|d	   cxk  rdk  sn t        d
|d	          d|k  st        d|       |||d   |d	   |d| _        || _        || _        || _        |	| _        t        j                  j                  t        t        j                  t        t        t        j                  f   f   i       | _        t        |      dk(  r|
st        d      d|i| _        y )N   )
stacklevel        zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r
   r   beta1beta2r   z%optimizer got an empty parameter listr	   )r   
ValueErrordefaultsr   r   r   r   torchjitannotatedictr   strstatelenparam_group)selfr	   r
   r   r   r   r   r   r   r   r   s              W/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/optim/functional_adam.py__init__z_FunctionalAdam.__init__   sJ    	;aHby6rd;<<cz6se<==eAh$$B58*MNNeAh$$B58*MNNl";L>JKK 1X1X(
  
YY''U\\4U\\@Q;R-R(SUWX
v;!$;DEE %f-    paramgradc                 f   g }g }g }g }g }g }t        j                  |      }	|"|j                  |       |j                  |       || j                  vri | j                  |<   | j                  |   }
t        j                  d      |
d<   t        j
                  |t         j                        |
d<   t        j
                  |t         j                        |
d<   | j                  r(t        j
                  |t         j                        |
d<   | j                  |   }
|j                  |
d          |j                  |
d          | j                  r|j                  |
d          |j                  |
d          t        j                         5  t        j                  ||||||| j                  |	| j                  | j                  d   | j                  d	   | j                  d
   | j                  d   | j                  d   | j                  | j                  dd       ddd       y# 1 sw Y   yxY w)zo
        Similar to step, but operates on a single parameter and optionally a
        gradient tensor.
        Nr   stepmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r
   r   r   r   has_complexr   r   r   r
   r   r   r   r   
grad_scale	found_inf)r   
is_complexappendr!   tensor
zeros_likepreserve_formatr   no_gradFadamr   r   r   r   )r$   r(   r)   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr2   r!   s              r%   
step_paramz_FunctionalAdam.step_paramG   s   
 $&&&u-##E*LL

" "DJJuJJu%E!LL-E&M$//U%:%: E) #("2"2U%:%:#E, ||*/*:*:)>)>+&' 

5!i()5./<<""5)9#:;5=)]]_ 	FF 'mmG,mmG,==&!]]>:MM%(jj%	 	 	s   BH''H0	gradientsc                 X   | j                   d   }g }g }g }g }g }g }d}	t        |      t        |      k7  r*t        ddt        |       dz   dt        |       z         t        | j                   d   |      D ]v  \  }
}|
|	t	        j
                  |
      z  }	|j                  |
       |j                  |       |
| j                  vri | j                  |
<   | j                  |
   }t	        j                  d      |d<   t	        j                  |
t        j                  	      |d
<   t	        j                  |
t        j                  	      |d<   | j                  r(t	        j                  |
t        j                  	      |d<   | j                  |
   }|j                  |d
          |j                  |d          | j                  r|j                  |d          |j                  |d          y t	        j                         5  t        j                  ||||||| j                  |	| j                  | j                   d   | j                   d   | j                   d   | j                   d   | j                   d   | j"                  | j$                  d d        d d d        y # 1 sw Y   y xY w)Nr	   FzEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: r   r+   r,   r.   r/   r0   r   r   r
   r   r   r1   )r#   r"   r   zipr   r5   r6   r!   r7   r8   r9   r   r:   r;   r<   r   r   r   r   )r$   rD   r	   r=   r>   r?   r@   rA   rB   r2   r(   gradientr!   s                r%   r+   z_FunctionalAdam.step   s   !!(+$&v;#i.(W#CK=34&s9~&678   #4#3#3H#=yI  	2OE8#u//66 ''.X&

*(*DJJu% JJu-E$)LL$5E&M','7'7U-B-B(E)$ +0*:*:U-B-B+E,' ||272B2B!1F1F3./ 

5)i 01""5#67<<#**51A+BC""5=1A 	2D ]]_ 	FF 'mmG,mmG,==&!]]>:MM%(jj%	 	 	s   BJ  J)N)	gMbP?)g?g+?g:0yE>r   FFFFF)__name__
__module____qualname__listr   floattupleboolr&   r   rC   r+    r'   r%   r   r      s    
 %1!(-+.V+. +. UE\"	+.
 +. +. +. +. +. +. "&+.Z: :hv.> :xGd8F#34 Gr'   r   )typingr   r   torch.optim._functionaloptim_functionalr;   r   ,torch.distributed.optim._deprecation_warningr   r   rK   r    __annotations__r   scriptr   rO   r'   r%   <module>rW      sO      # # 
 c  q q qr'   