
    2Vh                         d dl mZ d dlmZ d dlmZ  edg       G d dej                               Zej                  j                  dej                        e_        y)	    )ops)keras_export)	optimizerzkeras.optimizers.Adamaxc                   Z     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z fdZd Z fdZ xZS )Adamaxa  Optimizer that implements the Adamax algorithm.

    Adamax, a variant of Adam based on the infinity norm, is a first-order
    gradient-based optimization method. Due to its capability of adjusting the
    learning rate based on data characteristics, it is suited to learn
    time-variant process, e.g., speech data with dynamically changed noise
    conditions. Default parameters follow those provided in the paper (see
    references below).

    Initialization:

    ```python
    m = 0  # Initialize initial 1st moment vector
    u = 0  # Initialize the exponentially weighted infinity norm
    t = 0  # Initialize timestep
    ```

    The update rule for parameter `w` with gradient `g` is described at the end
    of section 7.1 of the paper (see the reference section):

    ```python
    t += 1
    m = beta1 * m + (1 - beta) * g
    u = max(beta2 * u, abs(g))
    current_lr = learning_rate / (1 - beta1 ** t)
    w = w - current_lr * m / (u + epsilon)
    ```

    Args:
        learning_rate: A float, a
            `keras.optimizers.schedules.LearningRateSchedule` instance, or
            a callable that takes no arguments and returns the actual value to
            use. The learning rate. Defaults to `0.001`.
        beta_1: A float value or a constant float tensor. The exponential decay
            rate for the 1st moment estimates.
        beta_2: A float value or a constant float tensor. The exponential decay
            rate for the exponentially weighted infinity norm.
        epsilon: A small constant for numerical stability.
            {{base_optimizer_keyword_args}}

    Reference:

    - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
    c                 f    t        |   d|||||||	|
|||d| || _        || _        || _        y )N)learning_ratenameweight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyloss_scale_factorgradient_accumulation_steps )super__init__beta_1beta_2epsilon)selfr	   r   r   r   r   r   r   r   r   r   r   r   r   r
   kwargs	__class__s                   K/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/optimizers/adamax.pyr   zAdamax.__init__5   sY    $ 	 	
'%+%$;/(C	
 	
     c                     | j                   ryt        | 	  |       | j                  |ddg      \  | _        | _        y)a  Initialize optimizer variables.

        Adamax optimizer has 2 types of variables: momentums (denoted as m),
        exponentially weighted infinity norm (denoted as u).

        Args:
            var_list: list of model variables to build Adamax variables on.
        Nmomentumnorm)builtr   buildadd_optimizer_variables_m_u)r   var_listr   s     r   r#   zAdamax.buildY   s>     ::h77z6*
r   c                    t        j                  ||j                        }t        j                  ||j                        }t        j                  | j                  dz   |j                        }t        j                  t        j                  | j
                  |j                        |      }| j                  | j                  |         }| j                  | j                  |         }| j                  |t        j                  t        j                  ||      d| j
                  z
               | j                  |t        j                  t        j                  | j                  |      t        j                  |                   | j!                  |t        j"                  t        j                  ||      t        j                  d|z
  t        j$                  || j&                                           y)z=Update step given gradient and the associated model variable.   N)r   castdtype
iterationspowerr   r%   _get_variable_indexr&   
assign_addmultiplysubtractassignmaximumr   abs
assign_subdivideaddr   )	r   gradientvariabler	   lr
local_stepbeta_1_powermus	            r   update_stepzAdamax.update_stepi   sS   XXmX^^488Hhnn5XXdoo18>>B
yyHHT[[(..1:
 GGD,,X67GGD,,X67s||CLL15DKKI	
 	s{{3<<Q79JK	
 	JJR#a,.DLL1IJ	
r   c                     t         |          }|j                  | j                  | j                  | j
                  d       |S )N)r   r   r   )r   
get_configupdater   r   r   )r   configr   s     r   rA   zAdamax.get_config   s=    #%++++<<	
 r   )gMbP?g?g+?gHz>NNNNFgGz?NNNadamax)	__name__
__module____qualname____doc__r   r#   r?   rA   __classcell__)r   s   @r   r   r      sO    +^  $$("H
 
4
 
r   r   z{{base_optimizer_keyword_args}}N)	keras.srcr   keras.src.api_exportr   keras.src.optimizersr   	Optimizerr   rH   replacebase_optimizer_keyword_argsr   r   r   <module>rP      s_     - * ()*FY   F +FR ''%y'L'Lr   