
    BVh:$                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ dd	l	mZ dd
l	mZ ddl	mZ  G d dej                         Zy)zNadam optimizer implementation.    )ops)tensor_conversion)backend_config)learning_rate_schedule)optimizer_v2)	array_ops)control_flow_ops)math_ops)	state_ops)	variablesc                   b     e Zd ZdZdZ	 	 	 	 	 d
 fd	Zd Zd Z fdZddZ	ddZ
 fd	Z xZS )Nadama  Optimizer that implements the NAdam algorithm.
  Much like Adam is essentially RMSprop with momentum, Nadam is Adam with
  Nesterov momentum.

  Args:
    learning_rate: A Tensor or a floating point value.  The learning rate.
    beta_1: A float value or a constant float tensor. The exponential decay
      rate for the 1st moment estimates.
    beta_2: A float value or a constant float tensor. The exponential decay
      rate for the exponentially weighted infinity norm.
    epsilon: A small constant for numerical stability.
    name: Optional name for the operations created when applying gradients.
      Defaults to `"Nadam"`.
    **kwargs: Keyword arguments. Allowed to be one of
      `"clipnorm"` or `"clipvalue"`.
      `"clipnorm"` (float) clips gradients by norm; `"clipvalue"` (float) clips
      gradients by value.

  Usage Example:
    >>> opt = tf.keras.optimizers.Nadam(learning_rate=0.2)
    >>> var1 = tf.Variable(10.0)
    >>> loss = lambda: (var1 ** 2) / 2.0
    >>> step_count = opt.minimize(loss, [var1]).numpy()
    >>> "{:.1f}".format(var1.numpy())
    9.8

  Reference:
    - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf).
  Tc                    |j                  dd      |d<   |j                  d|      }t        |t        j                        rt        d      t        t        | "  |fi | | j                  d|j                  d|             | j                  d| j                         | j                  d|       | j                  d|       |xs t        j                         | _        d | _        y )	Nschedule_decaygMbp?decaylrzdThe Nadam optimizer does not support tf.keras.optimizers.LearningRateSchedules as the learning rate.learning_ratebeta_1beta_2)popget
isinstancer   LearningRateSchedule
ValueErrorsuperr   __init__
_set_hyper_initial_decayr   epsilon_m_cache)selfr   r   r   r   namekwargs	__class__s          Z/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/keras/optimizer_v2/nadam.pyr   zNadam.__init__?   s     jj!159F7OJJt]3M-!7!L!LM ( ) ) 
%//OOOVZZm%DEOOGT001OOHf%OOHf%6n446DLDM    c                 d   |d   j                   j                  }| j                  Y| j                  dg |ddt        j
                  j                        | _        | j                  j                  | j                         |D ]  }| j                  |d        |D ]  }| j                  |d        y )Nr   momentum_cacheonesF)shapedtypeinitializer	trainableaggregationmv)
r+   
base_dtyper    
add_weighttf_variablesVariableAggregationONLY_FIRST_REPLICA_weightsappendadd_slot)r!   var_list	var_dtypevars       r%   _create_slotszNadam._create_slotsV   s    !!,,I}}oo
"66II & Kdm mm4==) 
mmC  
mmCr&   c                    t        j                  | j                  d|            }t        j                  | j                  d|            }t        j                  | j                  d|            }t        j                  | j
                  dz   |      }t        j                  | j
                  dz   |      }t        j                  d|      }	|ddt        j                  |	| j                  |z        z  z
  z  }
|ddt        j                  |	| j                  |z        z  z
  z  }t        j                  | j                  |      |
z  }|| j                  j                  u r?t        j                  t        j                  | j                  || j                  	            }||z  }t        || t        j                   | j"                  |      |||
|d|z
  d|z
  d|
z
  d|z
  d|z
  dt        j                  ||      z
  
      |||f<   y )Nr   r   r         gQ?g      ?g      ?use_locking)lr_tneg_lr_tr   beta_1_tbeta_2_tm_tm_t_1one_minus_beta_1_tone_minus_beta_2_tone_minus_m_tone_minus_m_schedule_newone_minus_m_schedule_nextv_t_prime_denominator)r   identity
_get_hyperr
   cast
iterationspowr   _m_cache_readr    r+   r   assign_use_lockingdictr   "convert_to_tensor_v2_with_dispatchr   )r!   
var_devicer:   apply_staterB   rD   rE   
local_step	next_step
decay_baserF   rG   m_schedule_newm_schedule_nexts                 r%   _prepare_localzNadam._prepare_locali   s   doooyIJD!!$//(I"FGH!!$//(I"FGHt2I>Jdoo19=ItY/J
b3Z!4!4z!ABD D ECSZ!4!4y!@AC C DE ]]4#5#5yACGNDMM''' )))*:*:
--T5F5F+H In$u,O+/!DDLL)
 x<x<Ci!$~!5"%"7!HLL:$FF,KY'(r&   c                 r    t        j                  | j                        | _        t        t
        |   |      S N)r   rN   r    rS   r   r   _prepare)r!   r9   r$   s     r%   rb   zNadam._prepare   s,    "++DMM:D&x00r&   c                    |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }| j                  |d      }| j                  |d      }||d   z  }	|d   |z  |d   |z  z   }
t        j                  ||
| j                        }
|
|d   z  }|d   |z  |d	   t        j                  |      z  z   }t        j                  ||| j                        }||d
   z  }|d   |	z  |d   |z  z   }||d   |z  t        j                  |      |d   z   z  z
  }t        j                  ||| j                        j                  S )Nr/   r0   rK   rD   rH   r@   rL   rE   rI   rM   rJ   rG   rB   r   )devicer+   r1   r   _fallback_apply_stateget_slotr   rT   rU   r
   squaresqrtop)r!   gradr;   rY   rX   r:   coefficientsr/   r0   g_primerF   	m_t_primev_t	v_t_primem_t_barvar_ts                   r%   _resource_apply_densezNadam._resource_apply_dense   s   JJ		(<(<	J &B++Z,CD I11*iH  	c3Ac3A\"<==G
#a',-45C


1ct/@/@
ACl#>??I
#a',-0EEFC


1ct/@/@
ACl#:;;IO,w6G$y01G,v&0i <	#::< <ECD4E4EFIIIr&   c                    |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }| j                  |d      }| j                  |d      }	||d   z  }
||d   z  }t        j                  |||d   z  | j                        }t        j                  |g      5  | j                  |||      }t        j                  ||      }d d d        |d   z  }|d   |
z  |d	   |z  z   }||z  |d
   z  }t        j                  |	|	|d   z  | j                        }t        j                  |g      5  | j                  |	||      }t        j                  ||      }d d d        |d   z  }t        j                  |      |d   z   }| j                  |||d   |z  |z        }t!        j"                  |||g S # 1 sw Y   xY w# 1 sw Y   ixY w)Nr/   r0   rK   rH   rD   r@   rL   rJ   rG   rI   rE   rM   r   rC   )rd   r+   r1   r   re   rf   r   rT   rU   r   control_dependencies_resource_scatter_addr   gatherr
   rh   r	   group)r!   rj   r;   indicesrY   rX   r:   rk   r/   r0   rl   m_scaled_g_valuesrF   	m_t_slicerm   rp   v_scaled_g_valuesrn   	v_t_slicero   v_prime_sqrt_plus_eps
var_updates                         r%   _resource_apply_sparsezNadam._resource_apply_sparse   s3   JJ		(<(<	J &B++Z,CD I11*iH  	c3Ac3A\"<==G |,@AA


1a,z"::'+'8'8:C 
	!	!3%	( 1&&q'3DEc""30i1 L)DEEIO,w6G$y01G 5I(JJ


1a,z"::'+'8'8:C 
	!	!3%	( 1&&q'3DEc""30i1 L)@AAI$MM)4|I7NN++WZ 7*-BBDJ !!J#=>>/1 11 1s    *G*G&G#&G/c                     t         t        |          }|j                  | j	                  d      | j
                  | j	                  d      | j	                  d      | j                  d       |S )Nr   r   r   )r   r   r   r   r   )r   r   
get_configupdate_serialize_hyperparameterr   r   )r!   configr$   s     r%   r   zNadam.get_config   sd    5$*,F
MM77H$$00:00:<<  Mr&   )gMbP?g?g+?gHz>r   ra   )__name__
__module____qualname____doc___HAS_AGGREGATE_GRADr   r<   r_   rb   rr   r   r   __classcell__)r$   s   @r%   r   r      sL    <  #.&$L1
J.&?P	 	r&   r   N)r   tensorflow.python.frameworkr   r   tensorflow.python.kerasr   $tensorflow.python.keras.optimizer_v2r   r   tensorflow.python.opsr   r	   r
   r   r   r3   OptimizerV2r    r&   r%   <module>r      s>    & , 9 2 G = + 2 * + ;~L$$ ~r&   