
    2Vhr/                         d dl mZ d dl mZ d dl mZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ  edd	g       G d
 dej                               Zej                  j                  dej                         e_        y)    )backend)initializers)ops)keras_export)	optimizer)serialization_lib)trackingz#keras.optimizers.LossScaleOptimizerz(keras.mixed_precision.LossScaleOptimizerc                       e Zd ZdZ	 	 d fd	Zej                  d        Zed        Z	d Z
d Zd ZddZd	 Zd
 ZddZddZd Zed        Zej(                  d        Zed        Zd Zd Z fdZedd       Z xZS )LossScaleOptimizera   An optimizer that dynamically scales the loss to prevent underflow.

    Loss scaling is a technique to prevent numeric underflow in intermediate
    gradients when float16 is used. To prevent underflow, the loss is multiplied
    (or "scaled") by a certain factor called the "loss scale", which causes
    intermediate gradients to be scaled by the loss scale as well. The final
    gradients are divided (or "unscaled") by the loss scale to bring them back
    to their original value.

    `LossScaleOptimizer` wraps another optimizer and applies dynamic loss
    scaling to it. This loss scale is dynamically updated over time as follows:
    - On any train step, if a nonfinite gradient is encountered, the loss scale
      is halved, and the train step is skipped.
    - If `dynamic_growth_steps` have occurred since the last time the loss scale
      was updated, and no nonfinite gradients have occurred, the loss scale
      is doubled.

    Args:
        inner_optimizer: The `keras.optimizers.Optimizer` instance to wrap.
        initial_scale: Float. The initial loss scale. This scale will be updated
            during training. It is recommended for this to be a very high
            number, because a loss scale that is too high gets lowered far more
            quickly than a loss scale that is too low gets raised.
        dynamic_growth_steps: Int. How often to update the scale upwards. After
            every `dynamic_growth_steps` steps with finite gradients, the
            loss scale is doubled.
        {{base_optimizer_keyword_args}}
    c                     |j                  dd      st        d      t        |   dddi| || _        || _        || _        d | j                  _        y )NdynamicTzLossScaleOptimizer no longer supports `dynamic=False`. Instead, simply set `loss_scale_factor` directly on the `inner_optimizer`.learning_rateg         )pop
ValueErrorsuper__init__inner_optimizerinitial_scaledynamic_growth_stepsloss_scale_factor)selfr   r   r   kwargs	__class__s        Y/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/optimizers/loss_scale_optimizer.pyr   zLossScaleOptimizer.__init__.   sd     zz)T*% 
 	5s5f5.*$8! 26.    c                    | j                  ddt        j                         dd      | _        | j                  ddt        j                  | j
                        dd      | _        | j                  j                  |       d| _	        y )	Nr   intnonestep_counter)shapedtypeinitializeraggregationnamefloat32dynamic_scaleT)
add_variabler   Zerosr    Constantr   r'   r   buildbuiltr   var_lists     r   r+   zLossScaleOptimizer.buildC   s     --$**, . 
 "..$--d.@.@A  / 
 	""8,
r   c                 H    | j                   | j                  j                  z   S N)
_variablesr   	variablesr   s    r   r2   zLossScaleOptimizer.variablesV   s    !5!5!?!???r   c                       j                   s#t        d j                  j                   d       j	                        }t        j                  | fd fd      S )NzTo call `stateless_apply`, zx must be built (i.e. its variables must have been created). You can build it via `optimizer.build(trainable_variables)`.c                  *    j                         S r0   )_stateless_handle_finite_grads)gradsoptimizer_variablesr   trainable_variabless   r   <lambda>z4LossScaleOptimizer.stateless_apply.<locals>.<lambda>d   s    D77#U,? r   c                  (    j                         S r0   )"_stateless_handle_non_finite_grads)r8   r   r9   s   r   r:   z4LossScaleOptimizer.stateless_apply.<locals>.<lambda>g   s    D;;#%8 r   )r,   r   r   __name__check_finiter   cond)r   r8   r7   r9   finites   ```` r   stateless_applyz"LossScaleOptimizer.stateless_applyZ   sf    zz-dnn.E.E-F GO O 
 ""5)xx
 	
r   c           	      b     fd} fd}t        t         j                              }t        j                  |      5  t        j                  t        j                   j                   j                  dz
        ||      } j                  }t        ||      D 	
cg c]0  \  }	}
|	 j                  |
      r|	nt        j                  |	|      2 }}	}
 j                  j                   j                  j                  ||      \  }}d d d        z   }|fS c c}
}	w # 1 sw Y   xY w)Nc                  z   t        t        j                              } t        j                  |       5 }j
                  j                  d       j                  j                  j                  dz         d d d        j                  D cg c]  }j                  |       c}S # 1 sw Y   2xY wc c}w Nstate_mappingr          @)
listzipr2   r   StatelessScoper    assignr'   r1   get_current_valuemappingscopevr8   r   s      r   upscalezBLossScaleOptimizer._stateless_handle_finite_grads.<locals>.upscaleo   s    3t~~/BCDG''g> D%!!((+""))$*<*<s*BCD 9=H1E++A.HHD D Is   AB,B8,B5c                  *   t        t        j                              } t        j                  |       5 }j
                  j                  d       d d d        j                  D cg c]  }j                  |       c}S # 1 sw Y   2xY wc c}w )NrE      )	rH   rI   r2   r   rJ   r    
assign_addr1   rL   rM   s      r   	incrementzDLossScaleOptimizer._stateless_handle_finite_grads.<locals>.incrementv   st    3t~~/BCDG''g> 0%!!,,Q/08<H1E++A.HH0 0Hs   B)BBrE   rS   )rH   rI   r2   r   rJ   r   r?   equalr    r   r'   !_overwrite_variable_with_gradientdivider   rA   )r   r8   r7   r9   rQ   rU   rN   own_variablesscalegrP   unscaled_gradsnew_trainable_variablesnew_inner_variablesnew_optimizer_variabless   ``             r   r6   z1LossScaleOptimizer._stateless_handle_finite_gradsl   s3   	I	I s4>>+>?@##': 	HH		$++T-F-F-JKM &&E
  ':;	 Aq 9 F Fq I ZZ5)*N  $$44$$..#'#%	2 #02E"E&(???!	 	s   AD%"5D7D%D%%D.c                    t        t        | j                  |            }t        j                  |      5 }| j
                  j                  d       | j                  j                  | j                  dz         d d d        g }| j                  D ]"  }|j                  j                  |             $ ||fS # 1 sw Y   @xY wrD   )
rH   rI   r2   r   rJ   r    rK   r'   appendrL   )r   r8   r9   rN   rO   r_   rP   s          r   r<   z5LossScaleOptimizer._stateless_handle_non_finite_grads   s     s4>>+>?@##': 	@e$$Q'%%d&8&83&>?	@ #% 	GA#**5+B+B1+EF	G"$;;;	@ 	@s   AB99Cc                 0   | j                   sBt        j                  | j                  |       5  | j	                  |       d d d        d| _         t        j                         dk(  r| j                  ||       y | j                  ||       y # 1 sw Y   MxY w)N)callerT
tensorflow)r,   r   
name_scoper%   r+   	_tf_apply_common_apply)r   r7   r9   s      r   applyzLossScaleOptimizer.apply   su    zz##DIId; 0

./0DJ??,NN5"56u&9:0 0s   BBc                      j                   }|xs  j                  }t        ||      D cg c]0  \  }}| j                  |      r|nt	        j
                  ||      2 }}} j                  j                  ||        fd} fd}	t	        j                  t	        j                   j                   j                  dz
        ||	       y c c}}w )Nr9   c                       j                   j                  d        j                  j                   j                  dz         y Nr   rG   r    rK   r'   r3   s   r   rQ   zALossScaleOptimizer._stateful_handle_finite_grads.<locals>.upscale   s6    $$Q'%%d&8&83&>?r   c                  <     j                   j                  d       y )NrS   )r    rT   r3   s   r   rU   zCLossScaleOptimizer._stateful_handle_finite_grads.<locals>.increment   s    ((+r   rS   )r'   _trainable_variablesrI   rW   r   rX   r   rh   r?   rV   r    r   )
r   r7   r9   rZ   tvsr[   rP   r\   rQ   rU   s
   `         r   _stateful_handle_finite_gradsz0LossScaleOptimizer._stateful_handle_finite_grads   s    ""!>T%>%>
 E3	
 1 yDBB1E Au%&
 
 	""0C 	# 	
	@	, 	IId'')B)BQ)FG	
%
s   5Cc                     | j                   j                  d       | j                  j                  | j                  dz         y rl   rm   r3   s    r   !_stateful_handle_non_finite_gradsz4LossScaleOptimizer._stateful_handle_non_finite_grads   s5      #!!$"4"4s":;r   c                 x      j                        }t        j                  | fd j                         y )Nc                  (    j                         S r0   )rq   )r7   r   r9   s   r   r:   z2LossScaleOptimizer._common_apply.<locals>.<lambda>   s    D66* r   )r>   r   r?   rs   )r   r7   r9   r@   s   ``` r   rg   z LossScaleOptimizer._common_apply   s2    ""5) 22	
r   c                 0    ddl m} |j                  j                         rt	        d      |j
                  j                  j                         r j                  ||       y fd}|j                  j                         j                  |||f       y)z@Tensorflow specific logic for apply, which handles distribution.r   )rd   z,apply() must be called in a replica context.rj   c                       j                   j                  j                  f      } j                  |      d   } fd}t	        j
                  ||j                         y )Nargsr   c                  X     j                   j                  j                  f       y )Nrx   )extendedcall_for_each_replicarq   )distributionr7   r   r9   s   r   apply_fnzMLossScaleOptimizer._tf_apply.<locals>._handle_cross_replica.<locals>.apply_fn   s/     ))??::#%89 @ r   )r{   r|   r>   experimental_local_resultsr   r?   rs   )r}   r7   r9   finite_per_replicar@   r~   r   s   ```   r   _handle_cross_replicaz;LossScaleOptimizer._tf_apply.<locals>._handle_cross_replica   sl     ))??)) @  # &@@& Hd&L&Lr   rx   N)
keras.src.utils.module_utilsrd   
distributein_cross_replica_contextr   __internal__strategy_supports_no_merge_callrg   get_replica_context
merge_call)r   r7   r9   tfr   s   `    r   rf   zLossScaleOptimizer._tf_apply   s~    A==113KLL??%%EEGu:MN6 MM--/::%U4G,H ; r   c                     |D cg c]  }||	 }}|D cg c]*  }t        j                  t        j                  |            , }}t        j                  t        j                  |            S c c}w c c}w r0   )r   allisfiniteconvert_to_tensor)r   r7   r[   tensor_gradsfinite_gradss        r   r>   zLossScaleOptimizer.check_finite  s_    #(:aAM:::FGQQ0GGwws,,\:;; ;Gs   A0A0/A5c                 .    | j                   j                  S r0   r   r   r3   s    r   r   z LossScaleOptimizer.learning_rate  s    ##111r   c                 &    || j                   _        y r0   r   )r   r   s     r   r   z LossScaleOptimizer.learning_rate  s    -:*r   c                 .    | j                   j                  S r0   )r   
iterationsr3   s    r   r   zLossScaleOptimizer.iterations  s    ##...r   c                 T    | j                   r| j                  n| j                  }||z  S r0   )r,   r'   r   )r   lossrZ   s      r   
scale_losszLossScaleOptimizer.scale_loss  s%    &*jj""d6H6He|r   c                 :    | j                   j                  |       y r0   )r   finalize_variable_valuesr-   s     r   r   z+LossScaleOptimizer.finalize_variable_values  s    55h?r   c                     t         |          }t        j                  | j                        }|j                  || j                  | j                  d       |d= |S )N)r   r   r   r   )r   
get_configr   serialize_keras_objectr   updater   r   )r   configinner_optimizer_configr   s      r   r   zLossScaleOptimizer.get_config  sa    #%!2!I!I  "
 	#9!%!3!3(,(A(A	
 ?#r   c                 `    t        j                  |j                  d      |      } | |fi |S )Nr   )custom_objects)r   deserialize_keras_objectr   )clsr   r   r   s       r   from_configzLossScaleOptimizer.from_config-  s4    +DDJJ())
 ?-f--r   )g      @i  r0   )r=   
__module____qualname____doc__r   r	    no_automatic_dependency_trackingr+   propertyr2   rA   r6   r<   rh   rq   rs   rg   rf   r>   r   setterr   r   r   r   classmethodr   __classcell__)r   s   @r   r   r   
   s    @ !	6* .. /$ @ @
$+@Z
<
;
8<

(T<
 2 2 ; ; / /@ . .r   r   z{{base_optimizer_keyword_args}}N)	keras.srcr   r   r   keras.src.api_exportr   keras.src.optimizersr   keras.src.savingr   keras.src.utilsr	   	Optimizerr   r   replacebase_optimizer_keyword_argsr   r   r   <module>r      sv     "  - * . $ -2c.,, c.c.L	 077??%y'L'L  r   