
    BVh*                     n    d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  G d dej                        Z	y	)
z'Ftrl-proximal optimizer implementation.    )optimizer_v2)	array_ops)gen_training_ops)init_ops)math_opsc                   ^     e Zd ZdZ	 	 	 	 	 	 	 	 d fd	Zd Z fdZd	dZd	dZ fdZ	 xZ
S )
Ftrla
  Optimizer that implements the FTRL algorithm.

  "Follow The Regularized Leader" (FTRL) is an optimization algorithm developed
  at Google for click-through rate prediction in the early 2010s. It is most
  suitable for shallow models with large and sparse feature spaces.
  The algorithm is described by
  [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf).
  The Keras version has support for both online L2 regularization
  (the L2 regularization described in the paper
  above) and shrinkage-type L2 regularization
  (which is the addition of an L2 penalty to the loss function).

  Initialization:

  ```python
  n = 0
  sigma = 0
  z = 0
  ```

  Update rule for one variable `w`:

  ```python
  prev_n = n
  n = n + g ** 2
  sigma = (sqrt(n) - sqrt(prev_n)) / lr
  z = z + g - sigma * w
  if abs(z) < lambda_1:
    w = 0
  else:
    w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2)
  ```

  Notation:

  - `lr` is the learning rate
  - `g` is the gradient for the variable
  - `lambda_1` is the L1 regularization strength
  - `lambda_2` is the L2 regularization strength

  Check the documentation for the `l2_shrinkage_regularization_strength`
  parameter for more details when shrinkage is enabled, in which case gradient
  is replaced with a gradient with shrinkage.

  Args:
    learning_rate: A `Tensor`, floating point value, or a schedule that is a
      `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
    learning_rate_power: A float value, must be less or equal to zero.
      Controls how the learning rate decreases during training. Use zero for
      a fixed learning rate.
    initial_accumulator_value: The starting value for accumulators.
      Only zero or positive values are allowed.
    l1_regularization_strength: A float value, must be greater than or
      equal to zero. Defaults to 0.0.
    l2_regularization_strength: A float value, must be greater than or
      equal to zero. Defaults to 0.0.
    name: Optional name prefix for the operations created when applying
      gradients.  Defaults to `"Ftrl"`.
    l2_shrinkage_regularization_strength: A float value, must be greater than
      or equal to zero. This differs from L2 above in that the L2 above is a
      stabilization penalty, whereas this L2 shrinkage is a magnitude penalty.
      When input is sparse shrinkage will only happen on the active weights.
    beta: A float value, representing the beta value from the paper.
      Defaults to 0.0.
    **kwargs: Keyword arguments. Allowed to be one of
      `"clipnorm"` or `"clipvalue"`.
      `"clipnorm"` (float) clips gradients by norm; `"clipvalue"` (float) clips
      gradients by value.

  Reference:
    - [McMahan et al., 2013](
      https://research.google.com/pubs/archive/41159.pdf)
  c	                    t        t        | 
  |fi |	 |dk  rt        d|z        |dkD  rt        d|z        |dk  rt        d|z        |dk  rt        d|z        |dk  rt        d|z        | j	                  d|       | j	                  d| j
                         | j	                  d	|       | j	                  d
|       | j	                  d|       | j	                  d|       || _        || _        y )N        z9initial_accumulator_value %f needs to be positive or zeroz3learning_rate_power %f needs to be negative or zeroz:l1_regularization_strength %f needs to be positive or zeroz:l2_regularization_strength %f needs to be positive or zerozDl2_shrinkage_regularization_strength %f needs to be positive or zerolearning_ratedecaylearning_rate_powerl1_regularization_strengthl2_regularization_strengthbeta)superr	   __init__
ValueError
_set_hyper_initial_decay_initial_accumulator_value%_l2_shrinkage_regularization_strength)selfr   r   initial_accumulator_valuer   r   name$l2_shrinkage_regularization_strengthr   kwargs	__class__s             Y/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/keras/optimizer_v2/ftrl.pyr   zFtrl.__init__d   s6    
$t.v. 3&
E
#$% % S L*+ , ,!C'
F
$%& & "C'
F
$%& & ,c1;<= = 	OOO]3OOGT001OO)+>?OO02LMOO02LMOOFD!&?D#, 	.    c                     |D ]^  }|j                   j                  }t        j                  | j                  |      }| j                  |d|       | j                  |d       ` y )N)dtypeaccumulatorlinear)r"   
base_dtyper   constant_initializerr   add_slot)r   var_listvarr"   inits        r   _create_slotszFtrl._create_slots   sY     #ii""e**

)
)8d
mmC-
mmC"#r    c                    t         t        |   |||       |||f   j                  t	        t        j                  | j                  d|            t        j                  | j                  d|            t        j                  | j                  d|            t        j                  | j                  d|            t        j                  | j                  |                   y )Nr   r   r   r   )r   r   r   r   r   )r   r	   _prepare_localupdatedictr   identity
_get_hyperr   castr   )r   
var_device	var_dtypeapply_stater   s       r   r-   zFtrl._prepare_local   s    	$$ZKHY'(// ) 2 2 5yA!C'0'9'9 <iH(J'0'9'9 <iH(J##DOOFI$FG19::I2G		H
Ir    c                 n   |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }|d   |d   d|d   z  z  z   }| j                  |d      }| j                  |d      }	| j                  dk  rOt        j                  |j                  |j                  |	j                  ||d   |d   ||d	   | j                  
	      S t        j                  |j                  |j                  |	j                  ||d   |d   ||d   |d	   | j                  
      S )Nr   r          @lr_tr#   r$   r   r   r   )	r)   accumr$   gradlrl1l2lr_poweruse_lockingr   )
r)   r9   r$   r:   r;   r<   r=   l2_shrinkager>   r?   )devicer"   r%   get_fallback_apply_stateget_slotr   r   ResourceApplyFtrlhandle_use_lockingResourceApplyFtrlV2)
r   r:   r)   r5   r3   r4   coefficients#adjusted_l2_regularization_strengthr9   r$   s
             r   _resource_apply_densezFtrl._resource_apply_dense   sW   JJ		(<(<	J &B++Z,CD I11*iH  	12\&5I	l6"	"6$ 	$ ( MM#}-E]]3)F11S8//jj&!670 56''	) 	) 11jj&!670#$JK 56''
) 
)r    c                 r   |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }|d   |d   d|d   z  z  z   }| j                  |d      }	| j                  |d      }
| j                  dk  rPt        j                  |j                  |	j                  |
j                  |||d   |d   ||d	   | j                  

      S t        j                  |j                  |	j                  |
j                  |||d   |d   ||d   |d	   | j                        S )Nr   r   r7   r8   r#   r$   r   r   r   )
r)   r9   r$   r:   indicesr;   r<   r=   r>   r?   r   )r)   r9   r$   r:   rM   r;   r<   r=   r@   r>   r?   )rA   r"   r%   rB   rC   rD   r   r   ResourceSparseApplyFtrlrF   rG   ResourceSparseApplyFtrlV2)r   r:   r)   rM   r5   r3   r4   rI   rJ   r9   r$   s              r   _resource_apply_sparsezFtrl._resource_apply_sparse   s]   JJ		(<(<	J &B++Z,CD I11*iH  	12\&5I	l6"	"6$ 	$ ( MM#}-E]]3)F11S855jj&!670 56''
) 
) 77jj&!670#$JK 56'') )r    c                 2   t         t        |          }|j                  | j	                  d      | j
                  | j                  | j	                  d      | j	                  d      | j	                  d      | j	                  d      | j                  d       |S )Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r	   
get_configr.   _serialize_hyperparameterr   r   r   )r   configr   s     r   rR   zFtrl.get_config   s    4)+F
MM**?;++**+@A**+GH**+GH**6266! $ Mr    )gMbP?g      g?r   r   r	   r   r   )N)__name__
__module____qualname____doc__r   r+   r-   rK   rP   rR   __classcell__)r   s   @r   r	   r	      sK    HV ##'),*-*-47(.T#I$)L&)P r    r	   N)
rX   $tensorflow.python.keras.optimizer_v2r   tensorflow.python.opsr   r   r   r   OptimizerV2r	    r    r   <module>r^      s/    . > + 2 * *n<## nr    