
    2Vh[$                     n    d Z ddlZddlZddlmZ ddlmZ ddlm	Z	  G d dee	j                        Zd Zy)	a	  A class for Tensorflow specific optimizer logic.

The major behavior change for this class is for tf.distribute.

It will override methods from base Keras core Optimizer,
which provide distribute specific functionality, e.g. variable
creation, loss reduction, etc.
    N)backend)KerasAutoTrackable)base_optimizerc                   v     e Zd Z fdZ	 d fd	Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd ZddZ xZS )TFOptimizerc                 j    t        |   |i | t        j                  j	                         | _        y N)super__init__tf
distributeget_strategy_distribution_strategy)selfargskwargs	__class__s      V/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/backend/tensorflow/optimizer.pyr   zTFOptimizer.__init__   s*    $)&)&(mm&@&@&B#    c                     t        |t        j                        r|j                  }n|}| j                  j
                  j                  |      5  t        | !  |||      cd d d        S # 1 sw Y   y xY w)N)nameinitializer)	
isinstancer   Variablevaluer   extendedcolocate_vars_withr
   add_variable_from_reference)r   reference_variabler   r   colocate_varr   s        r   r   z'TFOptimizer.add_variable_from_reference   ss     ('*:*:;-33L-L((11DD
 	 76"; 7 	 	 	s   A,,A5c                     t        d      )Nzhstateless_apply is not supported with the TensorFlow backend (as it is incompatible with tf.distribute).)
ValueError)r   optimizer_variablesgradstrainable_variabless       r   stateless_applyzTFOptimizer.stateless_apply'   s     :
 	
r   c                 
   t        |t        j                        r|j                  }t	        j
                  ||j                        }t        |t        j                        r|j                  |       y |j                  |       y r	   )
r   r   r   r   r   castdtypeIndexedSlicesscatter_updateassignr   variabler   s      r   r,   zTFOptimizer.assign/   sY    h 0 01~~Hx~~.eR--.##E*OOE"r   c                 
   t        |t        j                        r|j                  }t	        j
                  ||j                        }t        |t        j                        r|j                  |       y |j                  |       y r	   )
r   r   r   r   r   r(   r)   r*   scatter_add
assign_addr-   s      r   r1   zTFOptimizer.assign_add8   [    h 0 01~~Hx~~.eR--.  '&r   c                 
   t        |t        j                        r|j                  }t	        j
                  ||j                        }t        |t        j                        r|j                  |       y |j                  |       y r	   )
r   r   r   r   r   r(   r)   r*   scatter_sub
assign_subr-   s      r   r5   zTFOptimizer.assign_subA   r2   r   c                 v   t        |t        j                        r|j                  }t	        |d      r|j                         }|j                  S t        |t        j                  j                        r<t	        |d      r0t	        |j                  d      r|j                  j                         }|j                  S )N_distributed_containerhandle)r   r   r   r   hasattrr7   r   __internal__CompositeTensorr8   
_unique_id)r   r.   s     r   _var_keyzTFOptimizer._var_keyJ   s    h 0 01~~H856668H """ x!@!@A(+)AB  ==?H"""r   c                       j                   y  fd}t        j                  j                  j                  j                  | j                  |       y )Nc                     fd}|D ]F  }t        |t        j                        r|j                  }| j                  j                  ||d       H y )Nc                     j                  |       rlt        j                  j                  | j                        }t        j                  j
                  | j                        }| j                  | |z  |z         y y r	   )_use_weight_decayr   r(   learning_rater)   weight_decayr5   )r.   lrwdr   s      r   weight_decay_fnz`TFOptimizer._apply_weight_decay.<locals>.distributed_apply_weight_decay.<locals>.weight_decay_fn^   sa    ))(3!3!3X^^DB!2!2HNNCB''2(:; 4r   F)group)r   r   r   r   r   update)distribution	variablesr   rF   r.   r   s        r   distributed_apply_weight_decayzGTFOptimizer._apply_weight_decay.<locals>.distributed_apply_weight_decay]   sR    < & h(8(89'~~H%%,,oU - r   )rC   r   r:   r   interimmaybe_merge_callr   )r   rJ   rK   s   `  r   _apply_weight_decayzTFOptimizer._apply_weight_decayY   sF    $	 	""**;;*''	
r   c                 X   |D cg c]*  }t        |t        j                        r|j                  n|, }}t	        t        ||            }| j                  |      }t        j                  j                  j                  j                  | j                  | j                  ||       y c c}w r	   )r   r   r   r   listzip_all_reduce_sum_gradientsr   r:   r   rL   rM   _distributed_tf_update_stepr   )r   r$   r%   rB   vgrads_and_varss         r   _backend_update_stepz TFOptimizer._backend_update_stepq   s     )
 "!W%5%56AGGA=
 
 c%)<=>77G
""**;;,,''		

s   /B'c                 f      fd}|D ]&  \  }}|j                   j                  ||||fd       ( y )Nc                 *    j                  || |      S r	   )update_step)vargradrB   r   s      r   apply_grad_to_update_varzITFOptimizer._distributed_tf_update_step.<locals>.apply_grad_to_update_var   s    ##D#}==r   Fr   rG   )r   rH   )r   rI   rU   rB   r\   r[   rZ   s   `      r   rS   z'TFOptimizer._distributed_tf_update_step   sG    	> ( 	ID#!!(((M*	 ) 	r   c                    t         j                  j                         }|s|S t        |      }t	        |      }|rc|D cg c]  }|d   	 }}t         j                  j                         j                  t         j                  j                  j                  |      }ng }g }d}|D ]6  \  }	}
|	|j                  d|
f       |j                  ||   |
f       |dz  }8 |t        |      k(  sJ d       |S c c}w )a  Returns all-reduced gradients aggregated via summation.

        Args:
            grads_and_vars: List of (gradient, variable) pairs.

        Returns:
            List of (gradient, variable) pairs
            where gradients have been all-reduced.
        r   N   zFailed to add all gradients)
r   r   get_replica_contextrP   filter_empty_gradients
all_reduceReduceOpSUMappendlen)r   rU   replica_contextfiltered_grads_and_varspairr$   reducedreduced_with_nonesreduced_posgrT   s              r   rR   z%TFOptimizer._all_reduce_sum_gradients   s    --;;=!!n-"8"H")@AT!WAEAmm779DD&&**EG G" 	!DAqy"))4)4"))7;+?*CDq 	! c'l*I,II*!!! Bs   C6c                    |D cg c]*  }t        |t        j                        r|j                  n|, }}t	        || j
                        D ]/  \  }}| j                  j                  j                  |d |f       1 yc c}w )zOverwrite model variables with their moving average values.

        This function overwrites variables on each device.

        Args:
          var_list: list of model variables.
        c                 $    | j                  |      S r	   r,   )abs     r   <lambda>zKTFOptimizer._overwrite_model_variables_with_average_value.<locals>.<lambda>   s    !((1+ r   )r   N)	r   r   r   r   rQ   _model_variables_moving_averager   r   rH   )r   r%   rT   rZ   average_vars        r   -_overwrite_model_variables_with_average_valuez9TFOptimizer._overwrite_model_variables_with_average_value   s     )
 "!W%5%56AGGA=
 

 !$!E!E!
 	C ''0077-[N 8 	
s   /A?c                     d |D cg c]  }|j                    }}fd}t        j                  j                  j                  j                  || j                  ||       y c c}w )Nc                 ,    | j                  | |z          y r	   rp   )rZ   r[   s     r   update_accumulatorzPTFOptimizer._backend_increment_gradient_accumulators.<locals>.update_accumulator   s    JJsTz"r   c                 n    t        ||      D ]%  \  }}| j                  j                  ||fd       ' y )NFr]   )rQ   r   rH   )rI   r$   accumulatorsr[   rZ   ry   s        r   "_distributed_tf_increment_grad_accz`TFOptimizer._backend_increment_gradient_accumulators.<locals>._distributed_tf_increment_grad_acc   sD     !5 	c%%,,+4' - r   )r   r   r:   r   rL   rM   r   )r   r$   	acc_gradsrT   r{   r|   ry   s         @r   (_backend_increment_gradient_accumulatorsz4TFOptimizer._backend_increment_gradient_accumulators   s]    	# *33A33	 	""**;;.''		
 4s   A$c                 D    t        j                  || j                  |      S r	   )r   clip_by_normclipnorm)r   valuesaxess      r   _clip_by_normzTFOptimizer._clip_by_norm   s     vt}}d;;r   )Nzerosr	   )__name__
__module____qualname__r   r   r&   r,   r1   r5   r=   rN   rV   rS   rR   rv   r~   r   __classcell__)r   s   @r   r   r      sQ    C
 :A
#''#
0
!"F,
*<r   r   c                    t        |       } | s| S g }g }| D ],  \  }}||j                  |       |j                  ||f       . t        |      }|s2| D cg c]  \  }}|j                   c}}f}t        d| d|  d      |r.t	        j
                  d|D cg c]  }|j                   c}       |S c c}}w c c}w )zDFilter out `(grad, var)` pairs that have a gradient equal to `None`.z(No gradients provided for any variable: z. Provided `grads_and_vars` is .zGradients do not exist for variables %s when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?)tuplere   r   r"   warningswarn)rU   filteredvars_with_empty_gradsr[   rZ   _rT   r.   s           r   ra   ra      s    >*NH# )	c<!((-OOT3K(	)
 XH(671QVV796xj A,,:+;1>
 	
 ) 44aff4		
 O 8 5s   B6B<
)__doc__r   
tensorflowr   	keras.srcr   &keras.src.backend.tensorflow.trackabler   keras.src.optimizersr   BaseOptimizerr   ra    r   r   <module>r      s8       E /K<$n&B&B K<\r   