
    2Vh                        d Z ddlZddlmZ ddlmZ ddlmZ  ed       G d d             Z ed	       G d
 de             Z	 ed       G d de             Z
 ed       G d de             Z ed       G d de             Z ed       G d de             Z ed       G d de             Z ed      d        Z ed      dd       Zy) z)Various learning rate schedule functions.    N)ops)keras_export)serialization_libz/keras.optimizers.schedules.LearningRateSchedulec                   ,    e Zd ZdZd Zd Zed        Zy)LearningRateSchedulea  The learning rate schedule base class.

    You can use a learning rate schedule to modulate how the learning rate
    of your optimizer changes over time.

    Several built-in learning rate schedules are available, such as
    `keras.optimizers.schedules.ExponentialDecay` or
    `keras.optimizers.schedules.PiecewiseConstantDecay`:

    ```python
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-2,
        decay_steps=10000,
        decay_rate=0.9)
    optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)
    ```

    A `LearningRateSchedule` instance can be passed in as the `learning_rate`
    argument of any optimizer.

    To implement your own schedule object, you should implement the `__call__`
    method, which takes a `step` argument (scalar integer tensor, the
    current training step count).
    Like for any other Keras object, you can also optionally
    make your object serializable by implementing the `get_config`
    and `from_config` methods.

    Example:

    ```python
    class MyLRSchedule(keras.optimizers.schedules.LearningRateSchedule):

        def __init__(self, initial_learning_rate):
            self.initial_learning_rate = initial_learning_rate

        def __call__(self, step):
            return self.initial_learning_rate / (step + 1)

    optimizer = keras.optimizers.SGD(learning_rate=MyLRSchedule(0.1))
    ```
    c                 H    t        d| j                  j                   d      )NLearning rate schedule 'z'' must override `__call__(self, step)`.NotImplementedError	__class____name__)selfsteps     e/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/optimizers/schedules/learning_rate_schedule.py__call__zLearningRateSchedule.__call__6   s,    !&t~~'>'>&? @4 4
 	
    c                 H    t        d| j                  j                   d      )Nr	   z;' must override `get_config()` in order to be serializable.r
   r   s    r   
get_configzLearningRateSchedule.get_config<   s.    !&t~~'>'>&? @H H
 	
r   c                      | di |S )zInstantiates a `LearningRateSchedule` from its config.

        Args:
            config: Output of `get_config()`.

        Returns:
            A `LearningRateSchedule` instance.
         r   )clsconfigs     r   from_configz LearningRateSchedule.from_configB   s     }V}r   N)r   
__module____qualname____doc__r   r   classmethodr   r   r   r   r   r   
   s&    (T

 	 	r   r   z+keras.optimizers.schedules.ExponentialDecayc                   4     e Zd ZdZ	 	 d fd	Zd Zd Z xZS )ExponentialDecaya	  A `LearningRateSchedule` that uses an exponential decay schedule.

    When training a model, it is often useful to lower the learning rate as
    the training progresses. This schedule applies an exponential decay function
    to an optimizer step, given a provided initial learning rate.

    The schedule is a 1-arg callable that produces a decayed learning
    rate when passed the current optimizer step. This can be useful for changing
    the learning rate value across different invocations of optimizer functions.
    It is computed as:

    ```python
    def decayed_learning_rate(step):
        return initial_learning_rate * decay_rate ^ (step / decay_steps)
    ```

    If the argument `staircase` is `True`, then `step / decay_steps` is
    an integer division and the decayed learning rate follows a
    staircase function.

    You can pass this schedule directly into a `keras.optimizers.Optimizer`
    as the learning rate.
    Example: When fitting a Keras model, decay every 100000 steps with a base
    of 0.96:

    ```python
    initial_learning_rate = 0.1
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=100000,
        decay_rate=0.96,
        staircase=True)

    model.compile(optimizer=keras.optimizers.SGD(learning_rate=lr_schedule),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(data, labels, epochs=5)
    ```

    The learning rate schedule is also serializable and deserializable using
    `keras.optimizers.schedules.serialize` and
    `keras.optimizers.schedules.deserialize`.

    Args:
        initial_learning_rate: A Python float. The initial learning rate.
        decay_steps: A Python integer. Must be positive. See the decay
            computation above.
        decay_rate: A Python float. The decay rate.
        staircase: Boolean.  If `True` decay the learning rate at discrete
            intervals.
        name: String.  Optional name of the operation.  Defaults to
            `"ExponentialDecay`".

    Returns:
        A 1-arg callable learning rate schedule that takes the current optimizer
        step and outputs the decayed learning rate, a scalar tensor of the
        same type as `initial_learning_rate`.
    c                     t         |           || _        || _        || _        || _        || _        | j                  dk  rt        d| j                         y Nr   z:Argument `decay_steps` must be > 0. Received: decay_steps=super__init__initial_learning_ratedecay_steps
decay_rate	staircasename
ValueErrorr   r&   r'   r(   r)   r*   r   s         r   r%   zExponentialDecay.__init__   sk     	%:"&$"	q ))-)9)9(:<  !r   c                    t        j                  | j                        5  t        j                  | j                        }|j
                  }t        j                  | j                  |      }t        j                  | j                  |      }t        j                  ||      }||z  }| j                  rt        j                  |      }t        j                  |t        j                  ||            cd d d        S # 1 sw Y   y xY wN)r   
name_scoper*   convert_to_tensorr&   dtypecastr'   r(   r)   floormultiplypower)r   r   r&   r1   r'   r(   global_step_recompps           r   r   zExponentialDecay.__call__   s    ^^DII& 	Q$'$9$9**%! *//E((4#3#3U;K$//59J!$$!6"[0A~~IIaL<< 5syyQ7OP	Q 	Q 	Qs   CC;;Dc                 v    | j                   | j                  | j                  | j                  | j                  dS Nr&   r'   r(   r)   r*   r:   r   s    r   r   zExponentialDecay.get_config   3    %)%?%?++//II
 	
r   )Fr    r   r   r   r   r%   r   r   __classcell__r   s   @r   r    r    O   s!    :B *Q
r   r    z1keras.optimizers.schedules.PiecewiseConstantDecayc                   0     e Zd ZdZd fd	Zd Zd Z xZS )PiecewiseConstantDecayax  A `LearningRateSchedule` that uses a piecewise constant decay schedule.

    The function returns a 1-arg callable to compute the piecewise constant
    when passed the current optimizer step. This can be useful for changing the
    learning rate value across different invocations of optimizer functions.

    Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5
        for the next 10000 steps, and 0.1 for any additional steps.

    ```python
    step = ops.array(0)
    boundaries = [100000, 110000]
    values = [1.0, 0.5, 0.1]
    learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay(
        boundaries, values)

    # Later, whenever we perform an optimization step, we pass in the step.
    learning_rate = learning_rate_fn(step)
    ```

    You can pass this schedule directly into a `keras.optimizers.Optimizer`
    as the learning rate. The learning rate schedule is also serializable and
    deserializable using `keras.optimizers.schedules.serialize` and
    `keras.optimizers.schedules.deserialize`.

    Args:
        boundaries: A list of Python numbers with strictly increasing
            entries, and with all elements having the same type as the
            optimizer step.
        values: A list of Python numbers that specifies the values for the
            intervals defined by `boundaries`. It should have one more
            element than `boundaries`, and all elements should have the same
            type.
        name: A string. Optional name of the operation. Defaults to
            `"PiecewiseConstant"`.

    Returns:
        A 1-arg callable learning rate schedule that takes the current optimizer
        step and outputs the decayed learning rate, a scalar tensor of the
        same type as the boundary tensors.

        The output of the 1-arg function that takes the `step`
        is `values[0]` when `step <= boundaries[0]`,
        `values[1]` when `step > boundaries[0]` and `step <= boundaries[1]`,
        ..., and `values[-1]` when `step > boundaries[-1]`.


    Raises:
        ValueError: if the number of elements in the `boundaries` and `values`
        lists do not match.
    c                     t         |           t        |      t        |      dz
  k7  r*t        d| dt        |       d| dt        |       d	      || _        || _        || _        y )N   zZThe length of boundaries should be 1 less than the length of values. Received: boundaries=z of length z, and values=.)r$   r%   lenr+   
boundariesvaluesr*   )r   rE   rF   r*   r   s       r   r%   zPiecewiseConstantDecay.__init__   sw    z?c&kAo-00:|;z?#= 9 [M,  %	r   c                 v   t        j                  | j                        5  | j                  D cg c]  }t        j                  |       }}| j
                  D cg c]  }t        j                  |       }}t        j                  |      }t        |      D ]D  \  }}|j                  |j                  k7  s t        j                  ||j                        }|||<   F |d   j                  }t        j                  d|      }t        j                  ||d   k  |      }	||	|d   z  z  }t        j                  ||d   kD  |      }
||
|d   z  z  }t        |d d |dd  |dd       D ]-  \  }}}t        j                  ||kD  ||k  z  |      }|||z  z  }/ |cd d d        S c c}w c c}w # 1 sw Y   y xY w)Nr   r1   rB   )r   r/   r*   rE   r0   rF   	enumerater1   r2   arrayzip)r   r   xrE   rF   ibresult_dtyperesult_valuestep_less_than_first_boundarystep_greater_than_last_boundarylowhighvaluestep_in_ranges                  r   r   zPiecewiseConstantDecay.__call__   s   ^^DII& %	 <@OOLq#//2LJL8<D1c++A.DFD((.D!*- &177djj(DJJ/A$%JqM	& "!9??L99Ql;L -0HH
1%|-) 9F1IEEL.1hhz"~%|/+ ;fRjHHL$'3BAB"% 6 T5 !$CZDDL1<!  556  K%	  %	 LD%	  %	 s0   F/F%F/F*7AF/9C"F/%
F//F8c                 J    | j                   | j                  | j                  dS )NrE   rF   r*   rY   r   s    r   r   z!PiecewiseConstantDecay.get_config(  s!    //kkII
 	
r   )PiecewiseConstantr<   r>   s   @r   r@   r@      s    2h& P
r   r@   z*keras.optimizers.schedules.PolynomialDecayc                   8     e Zd ZdZ	 	 	 	 d fd	Zd Zd Z xZS )PolynomialDecaya  A `LearningRateSchedule` that uses a polynomial decay schedule.

    It is commonly observed that a monotonically decreasing learning rate, whose
    degree of change is carefully chosen, results in a better performing model.
    This schedule applies a polynomial decay function to an optimizer step,
    given a provided `initial_learning_rate`, to reach an `end_learning_rate`
    in the given `decay_steps`.

    It requires a `step` value to compute the decayed learning rate. You
    can just pass a backend variable that you increment at each training
    step.

    The schedule is a 1-arg callable that produces a decayed learning rate
    when passed the current optimizer step. This can be useful for changing the
    learning rate value across different invocations of optimizer functions.
    It is computed as:

    ```python
    def decayed_learning_rate(step):
        step = min(step, decay_steps)
        return ((initial_learning_rate - end_learning_rate) *
                (1 - step / decay_steps) ^ (power)
               ) + end_learning_rate
    ```

    If `cycle` is True then a multiple of `decay_steps` is used, the first one
    that is bigger than `step`.

    ```python
    def decayed_learning_rate(step):
        decay_steps = decay_steps * ceil(step / decay_steps)
        return ((initial_learning_rate - end_learning_rate) *
                (1 - step / decay_steps) ^ (power)
               ) + end_learning_rate
    ```

    You can pass this schedule directly into a `keras.optimizers.Optimizer`
    as the learning rate.
    Example: Fit a model while decaying from 0.1 to 0.01 in 10000 steps using
    sqrt (i.e. power=0.5):

    ```python
    ...
    starter_learning_rate = 0.1
    end_learning_rate = 0.01
    decay_steps = 10000
    learning_rate_fn = keras.optimizers.schedules.PolynomialDecay(
        starter_learning_rate,
        decay_steps,
        end_learning_rate,
        power=0.5)

    model.compile(optimizer=keras.optimizers.SGD(
                      learning_rate=learning_rate_fn),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(data, labels, epochs=5)
    ```

    The learning rate schedule is also serializable and deserializable using
    `keras.optimizers.schedules.serialize` and
    `keras.optimizers.schedules.deserialize`.

    Args:
        initial_learning_rate: A Python float. The initial learning rate.
        decay_steps: A Python integer. Must be positive. See the decay
            computation above.
        end_learning_rate: A Python float. The minimal end learning rate.
        power: A Python float. The power of the polynomial. Defaults to
            `1.0`.
        cycle: A boolean, whether it should cycle beyond decay_steps.
        name: String.  Optional name of the operation. Defaults to
            `"PolynomialDecay"`.

    Returns:
        A 1-arg callable learning rate schedule that takes the current optimizer
        step and outputs the decayed learning rate, a scalar tensor of the
        same type as `initial_learning_rate`.
    c                     t         |           || _        || _        || _        || _        || _        || _        | j                  dk  rt        d| j                         y r"   )	r$   r%   r&   r'   end_learning_rater5   cycler*   r+   )r   r&   r'   r^   r5   r_   r*   r   s          r   r%   zPolynomialDecay.__init__  ss     	%:"&!2

	q ))-)9)9(:<  !r   c           
      j   t        j                  | j                        5  t        j                  | j                        }|j
                  }t        j                  | j                  |      }t        j                  | j                  |      }t        j                  ||      }t        j                  | j                  |      }| j                  rbt        j                  t        j                  |d      dt        j                  || j                  z              }t        j                  ||      }nt        j                  ||      }t        j                   ||      }	t        j"                  t        j                  ||z
  t        j                  d|	z
  |            |      cd d d        S # 1 sw Y   y xY w)Nr         ?rB   )r   r/   r*   r0   r&   r1   r2   r^   r5   r'   r_   whereequalceilr4   minimumdivideadd)
r   r   r&   r1   r^   r5   r6   decay_steps_recomp
multiplierr7   s
             r   r   zPolynomialDecay.__call__  s[   ^^DII& #	$'$9$9**%! *//E #)?)? GHHTZZ/E!$$!6!$$*:*:E!Bzz !YYII0!4HH/$2B2BBC

 &)\\&
&" &)[[&(:&" 

-/ABA77),==IIa!eU+ ";#	 #	 #	s   E?F))F2c                     | j                   | j                  | j                  | j                  | j                  | j
                  dS )Nr&   r'   r^   r5   r_   r*   rk   r   s    r   r   zPolynomialDecay.get_config  s<    %)%?%?++!%!7!7ZZZZII
 	
r   )g-C6?ra   Fr\   r<   r>   s   @r   r\   r\   0  s(    Oj !0$L
r   r\   z+keras.optimizers.schedules.InverseTimeDecayc                   4     e Zd ZdZ	 	 d fd	Zd Zd Z xZS )InverseTimeDecaya  A `LearningRateSchedule` that uses an inverse time decay schedule.

    When training a model, it is often useful to lower the learning rate as
    the training progresses. This schedule applies the inverse decay function
    to an optimizer step, given a provided initial learning rate.
    It requires a `step` value to compute the decayed learning rate. You can
    just pass a backend variable that you increment at each training step.

    The schedule is a 1-arg callable that produces a decayed learning
    rate when passed the current optimizer step. This can be useful for changing
    the learning rate value across different invocations of optimizer functions.
    It is computed as:

    ```python
    def decayed_learning_rate(step):
        return initial_learning_rate / (1 + decay_rate * step / decay_step)
    ```

    or, if `staircase` is `True`, as:

    ```python
    def decayed_learning_rate(step):
        return initial_learning_rate /
               (1 + decay_rate * floor(step / decay_step))
    ```

    You can pass this schedule directly into a `keras.optimizers.Optimizer`
    as the learning rate.
    Example: Fit a Keras model when decaying 1/t with a rate of 0.5:

    ```python
    ...
    initial_learning_rate = 0.1
    decay_steps = 1.0
    decay_rate = 0.5
    learning_rate_fn = keras.optimizers.schedules.InverseTimeDecay(
        initial_learning_rate, decay_steps, decay_rate)

    model.compile(optimizer=keras.optimizers.SGD(
                      learning_rate=learning_rate_fn),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(data, labels, epochs=5)
    ```

    Args:
        initial_learning_rate: A Python float. The initial learning rate.
        decay_steps: How often to apply decay.
        decay_rate: A Python number.  The decay rate.
        staircase: Whether to apply decay in a discrete staircase, as o
        pposed to continuous, fashion.
        name: String.  Optional name of the operation.  Defaults to
            `"InverseTimeDecay"`.

    Returns:
        A 1-arg callable learning rate schedule that takes the current optimizer
        step and outputs the decayed learning rate, a scalar tensor of the
        same type as `initial_learning_rate`.
    c                     t         |           || _        || _        || _        || _        || _        | j                  dk  rt        d| j                         y r"   r#   r,   s         r   r%   zInverseTimeDecay.__init__  sk     	%:"&$"	q ))-)9)9(:<  !r   c                    t        j                  | j                        5  t        j                  | j                        }|j
                  }t        j                  | j                  |      }t        j                  | j                  |      }t        j                  ||      }||z  }| j                  rt        j                  |      }t        j                  t        j                  d      |      }t        j                  |t        j                  ||            }	t        j                  ||	      cd d d        S # 1 sw Y   y xY w)NrB   )r   r/   r*   r0   r&   r1   r2   r'   r(   r)   r3   rK   rg   r4   rf   )
r   r   r&   r1   r'   r(   r6   r7   constdenoms
             r   r   zInverseTimeDecay.__call__!  s    ^^DII& 	<$'$9$9**%! *//E((4#3#3U;K$//59J!$$!6"[0A~~IIaLHHSYYq\51EGGE3<<
A#>?E::3U;	< 	< 	<s   DD::Ec                 v    | j                   | j                  | j                  | j                  | j                  dS r9   r:   r   s    r   r   zInverseTimeDecay.get_config2  r;   r   )Frm   r<   r>   s   @r   rm   rm     s     ;D ,<"
r   rm   z&keras.optimizers.schedules.CosineDecayc                   D     e Zd ZdZ	 	 	 	 d fd	Zd Zd Zd Zd Z xZ	S )CosineDecaya  A `LearningRateSchedule` that uses a cosine decay with optional warmup.

    See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
    SGDR: Stochastic Gradient Descent with Warm Restarts.

    For the idea of a linear warmup of our learning rate,
    see [Goyal et al.](https://arxiv.org/pdf/1706.02677.pdf).

    When we begin training a model, we often want an initial increase in our
    learning rate followed by a decay. If `warmup_target` is an int, this
    schedule applies a linear increase per optimizer step to our learning rate
    from `initial_learning_rate` to `warmup_target` for a duration of
    `warmup_steps`. Afterwards, it applies a cosine decay function taking our
    learning rate from `warmup_target` to `alpha` for a duration of
    `decay_steps`. If `warmup_target` is None we skip warmup and our decay
    will take our learning rate from `initial_learning_rate` to `alpha`.
    It requires a `step` value to  compute the learning rate. You can
    just pass a backend variable that you increment at each training step.

    The schedule is a 1-arg callable that produces a warmup followed by a
    decayed learning rate when passed the current optimizer step. This can be
    useful for changing the learning rate value across different invocations of
    optimizer functions.

    Our warmup is computed as:

    ```python
    def warmup_learning_rate(step):
        completed_fraction = step / warmup_steps
        total_delta = target_warmup - initial_learning_rate
        return completed_fraction * total_delta
    ```

    And our decay is computed as:

    ```python
    if warmup_target is None:
        initial_decay_lr = initial_learning_rate
    else:
        initial_decay_lr = warmup_target

    def decayed_learning_rate(step):
        step = min(step, decay_steps)
        cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps))
        decayed = (1 - alpha) * cosine_decay + alpha
        return initial_decay_lr * decayed
    ```

    Example usage without warmup:

    ```python
    decay_steps = 1000
    initial_learning_rate = 0.1
    lr_decayed_fn = keras.optimizers.schedules.CosineDecay(
        initial_learning_rate, decay_steps)
    ```

    Example usage with warmup:

    ```python
    decay_steps = 1000
    initial_learning_rate = 0
    warmup_steps = 1000
    target_learning_rate = 0.1
    lr_warmup_decayed_fn = keras.optimizers.schedules.CosineDecay(
        initial_learning_rate, decay_steps, warmup_target=target_learning_rate,
        warmup_steps=warmup_steps
    )
    ```

    You can pass this schedule directly into a `keras.optimizers.Optimizer`
    as the learning rate. The learning rate schedule is also serializable and
    deserializable using `keras.optimizers.schedules.serialize` and
    `keras.optimizers.schedules.deserialize`.

    Args:
        initial_learning_rate: A Python float. The initial learning rate.
        decay_steps: A Python int. Number of steps to decay over.
        alpha: A Python float. Minimum learning rate value for decay as a
            fraction of `initial_learning_rate`.
        name: String. Optional name of the operation.  Defaults to
            `"CosineDecay"`.
        warmup_target: A Python float. The target learning rate for our
            warmup phase. Will cast to the `initial_learning_rate` datatype.
            Setting to `None` will skip warmup and begins decay phase from
            `initial_learning_rate`. Otherwise scheduler will warmup from
            `initial_learning_rate` to `warmup_target`.
        warmup_steps: A Python int. Number of steps to warmup over.

    Returns:
        A 1-arg callable learning rate schedule that takes the current optimizer
        step and outputs the decayed learning rate, a scalar tensor of the
        same type as `initial_learning_rate`.
    c                     t         |           || _        || _        || _        || _        || _        || _        | j                  dk  rt        d| j                         y r"   )	r$   r%   r&   r'   alphar*   warmup_stepswarmup_targetr+   )r   r&   r'   rv   r*   rx   rw   r   s          r   r%   zCosineDecay.__init__  st     	%:"&
	(*q ))-)9)9(:<  !r   c                 f   t        j                  | j                        5  ||z  }t        j                  t        j
                  |      }ddt        j                  ||z        z   z  }d| j                  z
  |z  | j                  z   }t        j                  ||      cd d d        S # 1 sw Y   y xY w)NrH         ?ra   rB   )	r   r/   r*   rK   mathpicosrv   r4   )	r   r   r'   decay_from_lrr1   completed_fractionr|   cosine_decayeddecayeds	            r   _decay_functionzCosineDecay._decay_function  s    ^^DII& 	8!%!3477%0B C#''"7I2I*J$JKN4::~7$**DG<<w7	8 	8 	8s   A=B''B0c                     t        j                  | j                        5  ||z  }||z
  }||z  |z   cd d d        S # 1 sw Y   y xY wr.   )r   r/   r*   )r   r   rw   rx   r&   r   total_step_deltas          r   _warmup_functionzCosineDecay._warmup_function  sS     ^^DII& 	Q!%!4,/DD#&88;PP	Q 	Q 	Qs	   <Ac                     t        j                   j                        5  t        j                   j                        j
                  t        j                   j                        t        j                  |       j                  3t        j                         j                        cd d d        S t        j                   j                        t        j                   j                        t        j                  z         t        j                  k   fd fd      cd d d        S # 1 sw Y   y xY w)Nc                  ,    j                         S r.   )r   )r6   r&   r   rw   rx   s   r   <lambda>z&CosineDecay.__call__.<locals>.<lambda>  s    --& !)	 r   c                  2    j                  z
         S r.   )r   )r'   r1   r6   r   rw   rx   s   r   r   z&CosineDecay.__call__.<locals>.<lambda>  s#    ,,&5!	 r   )r   r/   r*   r0   r&   r1   r2   r'   rx   re   r   rw   cond)r   r   r'   r1   r6   r&   rw   rx   s   ` @@@@@@r   r   zCosineDecay.__call__  s(   ^^DII& (	$'$9$9**%! *//E((4#3#3U;K!$$!6!!)%([[&&" ++&)	(	 (	&  HHT%7%7?M88D$5$5u=L!$"K,$>" 88"\1 5(	 (	 (	s   BEBEEc                     | j                   | j                  | j                  | j                  | j                  | j
                  dS )Nr&   r'   rv   r*   rx   rw   r   r   s    r   r   zCosineDecay.get_config  s>    %)%?%?++ZZII!// --
 	
r   )        rt   Nr   )
r   r   r   r   r%   r   r   r   r   r=   r>   s   @r   rt   rt   <  s3    ]F 08Q)V
r   rt   z.keras.optimizers.schedules.CosineDecayRestartsc                   8     e Zd ZdZ	 	 	 	 d fd	Zd Zd Z xZS )CosineDecayRestartsaD	  A `LearningRateSchedule` that uses a cosine decay schedule with restarts.

    See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
    SGDR: Stochastic Gradient Descent with Warm Restarts.

    When training a model, it is often useful to lower the learning rate as
    the training progresses. This schedule applies a cosine decay function with
    restarts to an optimizer step, given a provided initial learning rate.
    It requires a `step` value to compute the decayed learning rate. You can
    just pass a backend variable that you increment at each training step.

    The schedule is a 1-arg callable that produces a decayed learning
    rate when passed the current optimizer step. This can be useful for changing
    the learning rate value across different invocations of optimizer functions.

    The learning rate multiplier first decays
    from 1 to `alpha` for `first_decay_steps` steps. Then, a warm
    restart is performed. Each new warm restart runs for `t_mul` times more
    steps and with `m_mul` times initial learning rate as the new learning rate.

    Example:
    ```python
    first_decay_steps = 1000
    lr_decayed_fn = (
        keras.optimizers.schedules.CosineDecayRestarts(
            initial_learning_rate,
            first_decay_steps))
    ```

    You can pass this schedule directly into a `keras.optimizers.Optimizer`
    as the learning rate. The learning rate schedule is also serializable and
    deserializable using `keras.optimizers.schedules.serialize` and
    `keras.optimizers.schedules.deserialize`.

    Args:
        initial_learning_rate: A Python float. The initial learning rate.
        first_decay_steps: A Python integer. Number of steps to decay over.
        t_mul: A Python float. Used to derive the number of iterations in
            the i-th period.
        m_mul: A Python float. Used to derive the initial learning rate of
            the i-th period.
        alpha: A Python float. Minimum learning rate value as a fraction of
            the `initial_learning_rate`.
        name: String. Optional name of the operation. Defaults to
            `"SGDRDecay"`.

    Returns:
        A 1-arg callable learning rate schedule that takes the current optimizer
        step and outputs the decayed learning rate, a scalar tensor of the
        same type as `initial_learning_rate`.
    c                     t         |           || _        || _        || _        || _        || _        || _        | j                  dk  rt        d| j                         y )Nr   zFArgument `first_decay_steps` must be > 0. Received: first_decay_steps=)	r$   r%   r&   first_decay_steps_t_mul_m_mulrv   r*   r+   )r   r&   r   t_mulm_mulrv   r*   r   s          r   r%   zCosineDecayRestarts.__init__1  ss     	%:"!2
	!!Q&//3/E/E.FH  'r   c           	      N   t        j                  | j                        5  t        j                  | j                        }|j
                  t        j                  | j                        }t        j                  | j                        }t        j                  | j                        t        j                  | j                        }t        j                  |      }||z  dfd	t        j                  t        j                  d      fdfd      \  }||z  }d|z  dt        j                  t        j                  t        j                         z        z   z  }	d|z
  |	z  |z   }
t        j"                  ||
      cd d d        S # 1 sw Y   y xY w)	Nc           
      4   |rxt        j                  t        j                  t        j                  d| dz
  z  z
              t        j                        z        }d|z  z
  dz
  z  }| |z
  |z  z  } || fS t        j                  |       }| |z  } || fS )zHelper for `cond` operation.ra   )r   r3   logr2   )r   	geometric	i_restartsum_rr1   r   s       r   compute_stepz2CosineDecayRestarts.__call__.<locals>.compute_stepW  s     !$		HH #&8C%K&H H%
 ''%.)!I !5)#33eDE*U2y(*)& !"444 !$		*< =I&)3& "444r   ra   c                        d      S )NFr   r   r   r   s   r   r   z.CosineDecayRestarts.__call__.<locals>.<lambda>r  s    %75I r   c                        d      S )NTr   r   r   s   r   r   z.CosineDecayRestarts.__call__.<locals>.<lambda>s  s    %74H r   rz   rH   rB   )F)r   r/   r*   r0   r&   r1   r2   r   rv   r   r   r   rc   r}   rK   r{   r|   r4   )r   r   r&   r   rv   r   r6   r   m_facr   r   r   r   r1   r   s              @@@@r   r   zCosineDecayRestarts.__call__I  sf   ^^DII& 9	@$'$9$9**%! *//E #)?)? GHHTZZ/EHHT[[%0EHHT[[%0E!$$!6!36G!G52 -0HH		%%IH-)I) 9$E gg		$''7:LL  5yN2U:G<< 5w?s9	@ 9	@ 9	@s   E-FF$c                     | j                   | j                  | j                  | j                  | j                  | j
                  dS )N)r&   r   r   r   rv   r*   )r&   r   r   r   rv   r*   r   s    r   r   zCosineDecayRestarts.get_config  s:    %)%?%?!%!7!7[[[[ZZII
 	
r   )g       @ra   r   	SGDRDecayr<   r>   s   @r   r   r     s(    2p 0:@x
r   r   z$keras.optimizers.schedules.serializec                 ,    t        j                  |       S )aD  Serializes a `LearningRateSchedule` into a JSON-compatible dict.

    Args:
        learning_rate_schedule: The `LearningRateSchedule` object to serialize.

    Returns:
        A JSON-serializable dict representing the object's config.

    Example:

    >>> lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    ...     0.1, decay_steps=100000, decay_rate=0.96, staircase=True)
    >>> keras.optimizers.schedules.serialize(lr_schedule)
    {'module': 'keras.optimizers.schedules',
    'class_name': 'ExponentialDecay', 'config': {...},
    'registered_name': None}
    )r   serialize_keras_object)learning_rate_schedules    r   	serializer     s    & 334JKKr   z&keras.optimizers.schedules.deserializec                 D    t        j                  | t               |d      S )a@  Instantiates a `LearningRateSchedule` object from a serialized form.

    Args:
        config: The serialized form of the `LearningRateSchedule`. Dictionary of
            the form {'class_name': str, 'config': dict}.
        custom_objects: A dictionary mapping class names (or function names) of
            custom (non-Keras) objects to class/functions.

    Returns:
        A `LearningRateSchedule` object.

    Example:

    ```python
    # Configuration for PolynomialDecay
    config = {
        'class_name': 'PolynomialDecay',
        'config': {'cycle': False,
            'decay_steps': 10000,
            'end_learning_rate': 0.01,
            'initial_learning_rate': 0.1,
            'name': None,
            'power': 0.5
        }
    }
    lr_schedule = keras.optimizers.schedules.deserialize(config)
    ```
    decay)module_objectscustom_objectsprintable_module_name)r   deserialize_keras_objectglobals)r   r   s     r   deserializer     s%    < 55y%%	 r   r.   )r   r{   	keras.srcr   keras.src.api_exportr   keras.src.savingr   r   r    r@   r\   rm   rt   r   r   r   r   r   r   <module>r      sT   /   - . ?@A A AAH ;<h
+ h
 =h
V ABq
1 q
 Cq
h :;X
* X
 <X
v ;<l
+ l
 =l
^ 67{
& {
 8{
| >?Q
. Q
 @Q
h 45L 6L* 67" 8"r   