
    2Vhb                         d dl mZ d dlmZ d dlmZ  ed       G d dej                               Zej                  j                  dej                        e_        y)	    )ops)keras_export)	optimizerzkeras.optimizers.Lambc                   Z     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z fdZd Z fdZ xZS )Lamba9  Optimizer that implements the Lamb algorithm.

    Lamb is a stochastic gradient descent method that
    uses layer-wise adaptive moments to adjusts the
    learning rate for each parameter based on the ratio of the
    norm of the weight to the norm of the gradient
    This helps to stabilize the training process and improves convergence
    especially for large batch sizes.

    Args:
        learning_rate: A float, a
            `keras.optimizers.schedules.LearningRateSchedule` instance, or
            a callable that takes no arguments and returns the actual value to
            use. The learning rate. Defaults to `0.001`.
        beta_1: A float value or a constant float tensor, or a callable
            that takes no arguments and returns the actual value to use. The
            exponential decay rate for the 1st moment estimates. Defaults to
            `0.9`.
        beta_2: A float value or a constant float tensor, or a callable
            that takes no arguments and returns the actual value to use. The
            exponential decay rate for the 2nd moment estimates. Defaults to
            `0.999`.
        epsilon: A small constant for numerical stability.
            Defaults to `1e-7`.
        {{base_optimizer_keyword_args}}

    References:
        - [Yang et al.](https://arxiv.org/pdf/1904.00962)
    c                 f    t        |   d|||||||	|
|||d| || _        || _        || _        y )N)learning_ratenameweight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyloss_scale_factorgradient_accumulation_steps )super__init__beta_1beta_2epsilon)selfr	   r   r   r   r   r   r   r   r   r   r   r   r   r
   kwargs	__class__s                   I/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/optimizers/lamb.pyr   zLamb.__init__&   sY    $ 	 	
'%+%$;/(C	
 	
     c                     | j                   ryt        | 	  |       | j                  |ddg      \  | _        | _        y)zInitialize optimizer variables.

        Lamb optimizer has 2 types of variables: momentums and velocities

        Args:
            var_list: list of model variables to build Lamb variables on.
        Nmomentumvelocity)builtr   buildadd_optimizer_variables
_momentums_velocities)r   var_listr   s     r   r#   z
Lamb.buildJ   s?     ::h,0,H,Hz:.-
))r   c           
      P   t        j                  ||j                        }t        j                  ||j                        }t        j                  | j                  dz   |j                        }t        j                  t        j                  | j
                  |j                        |      }t        j                  t        j                  | j                  |j                        |      }| j                  | j                  |         }| j                  | j                  |         }	| j                  |t        j                  t        j                  ||      d| j
                  z
               | j                  |	t        j                  t        j                  t        j                  |      |	      d| j                  z
               t        j                  |d|z
        }
t        j                  t        j                   t        j                  |	d|z
              | j"                        }t        j                  |
|      }t        j                   t        j$                  t        j                  |d                  }t        j                   t        j$                  t        j                  |d                  }t        j&                  t        j(                  |d      t        j&                  t        j(                  |d      ||z  d      d      }| j+                  |||z  |z         y)z=Update step given gradient and the associated model variable.   g      ?   r   N)r   castdtype
iterationspowerr   r   r%   _get_variable_indexr&   
assign_addmultiplysubtractsquaredivideaddsqrtr   sumwheregreater
assign_sub)r   gradientvariabler	   lr
local_stepbeta_1_powerbeta_2_powermvm_t_hatv_sqrtupdatew_normg_normratios                   r   update_stepzLamb.update_stepY   s.   XXmX^^488Hhnn5XXdoo18>>B
yyHHT[[(..1:
 yyHHT[[(..1:
 OOD44X>?T55h?@s||CLL15q4;;G	
 	LLSZZ115q4;;	
 **Q|!35HHSZZC,$6894<<
 GV,#''#))Ha"89:#''#))FA"678 		KK"IIckk&!,vE
 	%"*v"56r   c                     t         |          }|j                  | j                  | j                  | j
                  d       |S )N)r   r   r   )r   
get_configrE   r   r   r   )r   configr   s     r   rK   zLamb.get_config   s=    #%++++<<	
 r   )gMbP?g?g+?gHz>NNNNFgGz?NNNlamb)	__name__
__module____qualname____doc__r   r#   rI   rK   __classcell__)r   s   @r   r   r      sP    @  $$("H
+7Z	 	r   r   z{{base_optimizer_keyword_args}}N)	keras.srcr   keras.src.api_exportr   keras.src.optimizersr   	Optimizerr   rQ   replacebase_optimizer_keyword_argsr   r   r   <module>rY      s\     - * %&H9 H 'HV ||##%y'L'Lr   