
    Vh^                        U d dl mZ d dlmZmZ d dlZg Zee   e	d<   ej                  j                  j                         Zej                  j                  j                         Ze	 ddeej                   j"                     deej$                  j&                     deeef   ded	df
d
       Zdej$                  j.                  d	eej                   j"                     fdZy)    )Iterable)Anyno_type_checkN__all__optimizer_classparamsoptimizer_kwargsregister_hookreturnc                      t         j                  j                  d       t        dt         j                  j
                  ddf fd       }|D ]
  } ||        y)a  
    Upon ``backward()``, the optimizer specified for each parameter will fire after
    the gradient has been accumulated into the parameter.

    Note - gradients for these parameters will be set to None after ``backward()``.
    This means that any other optimizer not specified via `_apply_optimizer_in_backward`
    over this parameter will be a no-op.

    Args:
        optimizer_class: (Type[torch.optim.Optimizer]): Optimizer to apply to parameter
        params: (Iterator[nn.Parameter]): parameters to apply optimizer state to
        optimizer_kwargs: (Dict[str, Any]): kwargs to pass to optimizer constructor
        register_hook: (bool): whether to register a hook that runs the optimizer
            after gradient for this parameter is accumulated. This is the default
            way that optimizer in backward is implemented, but specific use cases
            (such as DDP) may wish to override this to implement custom behavior.
            (Default = True)

    Example::
        params_generator = model.parameters()
        param_1 = next(params_generator)
        remainder_params = list(params_generator)

        apply_optimizer_in_backward(torch.optim.SGD, [param_1], {"lr": 0.02})
        apply_optimizer_in_backward(torch.optim.Adam, remainder_params, {"lr": 0.04})

        model(...).sum().backward()  # after backward, parameters will already
        # have their registered optimizer(s) applied.

    z3torch.distributed.optim.apply_optimizer_in_backwardparamr   Nc                      t         vr2 j                         j                  j                  d   d   t          <     gfi }t	         d      sg  _        g  _        g  _         j
                  j                  |        j                  j                          j                  j                         sy d fd}t             j                  |      } t        vr	g t         <   t            j                  |       y )Nr   _in_backward_optimizersc                  V    j                   D ]  }|j                           d _        y )N)r   stepgrad)_unusedoptr   s     c/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/optim/apply_optimizer_in_backward.pyoptimizer_hookzc_apply_optimizer_in_backward.<locals>._apply_optimizer_in_backward_to_param.<locals>.optimizer_hookV   s*    44 
 EJ    )r   N)param_to_acc_grad_mapview_asgrad_fnnext_functionshasattrr   _optimizer_classes_optimizer_kwargsappendr
   param_to_optim_hook_handle_map)r   	optimizerr   handler   r	   r
   s   `   r   %_apply_optimizer_in_backward_to_paramzK_apply_optimizer_in_backward.<locals>._apply_optimizer_in_backward_to_param8   s     --+0==+?+G+G+V+V,,!%( $UG@/?@	u78,.E) (*E$&(E#%%,,Y7  ''8&&'78	 'u-;;NK6646*51&u-44V<r   )torch_C_log_api_usage_oncer   nn	Parameter)r   r   r	   r
   r#   r   s   ` ``  r   _apply_optimizer_in_backwardr)      s]    J 
HH  !VW&=UXX5G5G &=D &= &=P  5-e45r   modulec                 l    g }| j                         D ]  }|j                  t        |dg                |S )aU  
    Return a list of in-backward optimizers applied to ``module``'s parameters. Note that these
    optimizers are not intended to directly have their ``step`` or ``zero_grad`` methods called
    by the user and are intended to be used for things like checkpointing.

    Args:
        module: (torch.nn.Module): model to retrieve in-backward optimizers for

    Returns:
        List[torch.optim.Optimizer]: the in-backward optimizers.

    Example::
        _apply_optimizer_in_backward(torch.optim.SGD, model.parameters(), {"lr": 0.01})
        optims = _get_optimizers_in_backward(model)
    r   )
parametersextendgetattr)r*   optimsr   s      r   _get_in_backward_optimizersr0   e   s@      +-F""$ Ege%>CDE Mr   )T)collections.abcr   typingr   r   r$   r   liststr__annotations__utilsweakWeakTensorKeyDictionaryr    r   typeoptim	Optimizerr'   r(   dictboolr)   Moduler0    r   r   <module>r@      s    $ %  c  "'!1!1!I!I!K ((@@B  
 	P5%++//0P5UXX''(P5 38nP5 	P5
 
P5 P5f DAVAV<W r   