
    Vh&                     H   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
  ej                  e      Z ej                  d      Z G d de      Z e       Z G d	 d
e      Z e       Z G d de      Z e       Z G d de      Z e       Z G d de      Z e       Zy)    N)Optional)warning_once)HigherOrderOperator)_dtype   c                   &     e Zd Zd fdZd Z xZS )Wrapc                 $    t         |   d       y )Nwrapsuper__init__self	__class__s    L/home/dcms/DCMS/lib/python3.12/site-packages/torch/_higher_order_ops/wrap.pyr   zWrap.__init__   s         c                 B    dd l }ddl m} |fd       } |       S )Nr   disablec                       i } | S N )resultargsfunckwargss    r   wrapperzWrap.__call__.<locals>.wrapper   s    4*6*FMr   torch._dynamor   )r   r   r   r   torchr   r   s    ```   r   __call__zWrap.__call__   s'     	)		 
	 yr   returnN__name__
__module____qualname__r   r"   __classcell__r   s   @r   r	   r	      s    !r   r	   c                   &     e Zd Zd fdZd Z xZS )WrapWithSetGradEnabledc                 $    t         |   d       y )Nwrap_with_set_grad_enabledr   r   s    r   r   zWrapWithSetGradEnabled.__init__(   s    56r   c                 J    dd l ddl m} |fd       } |       S )Nr   r   c                  b    j                        5    i cd d d        S # 1 sw Y   y xY wr   )set_grad_enabled)r   enable_gradr   r!   wrapped_funcs   r   r   z0WrapWithSetGradEnabled.__call__.<locals>.wrapper1   s3    ''4 5#T4V45 5 5s   %.r   )r   r2   r3   r   r   r   r   r!   s    ````  @r   r"   zWrapWithSetGradEnabled.__call__+   s'     	)		5 
	5 yr   r#   r%   r*   s   @r   r,   r,   '   s    7r   r,   c            	       B     e Zd Z fdZdedee   dedee   fdZ xZ	S )WrapWithAutocastc                 $    t         |   d       y )Nwrap_with_autocastr   r   s    r   r   zWrapWithAutocast.__init__=   s    -.r   device_typedtypeenabledcache_enabledc           	      V   
 dd l 
ddl m} |
fd       }	 |	       S )Nr   r   c                  h    j                        5    i cd d d        S # 1 sw Y   y xY wr   )autocast)r   r;   r8   r9   r:   r   r!   r3   s   r   r   z*WrapWithAutocast.__call__.<locals>.wrapperO   s7    UG]K 5#T4V45 5 5s   (1r   )r   r8   r9   r:   r;   r3   r   r   r   r   r!   s    ```````  @r   r"   zWrapWithAutocast.__call__@   s,     	)		5 	5 
	5 yr   )
r&   r'   r(   r   strr   r   boolr"   r)   r*   s   @r   r5   r5   <   s<    /  	
  ~r   r5   c                   *     e Zd ZdZd fdZd Z xZS )WrapActivationCheckpointa  
    This operator is used to wrap torch.utils.checkpoint. This avoids
    TorchDynamo to look into saved tensor hooks and directly passes the control
    to AOT Autograd, which is ok with tracing saved tensor hooks. As a result of
    AOT tracing torch.utils.checkpoint code, we have a backward graph with
    recomputed forward nodes.

    However, we might deprecate this operator soon. The difficulty arises in the
    functionalization of rng ops. Today, there are two different
    functionalization of rng ops - one at AOT autograd and other at Inductor.
    And they are difficult to map to each other. The rng states also complicate
    pattern matching in Inductor. Due to the ease of implementation, we are
    currently inclined towards functionalization at Inductor level, which means
    that duplication/recomputation is done as a compiler pass in the
    partitioners. See TagActivationCheckpoint for more information.
    c                 (    t         |   dd       y )Nwrap_activation_checkpointF	cacheabler   r   s    r   r   z!WrapActivationCheckpoint.__init__l   s    5Gr   c                     dd l mc m} ddlm} d|d<   d|d<   |j                         5  ddlm}  | ||      j                  g|i |cd d d        S # 1 sw Y   y xY w)Nr   InterpreterFuse_reentrantpreserve_rng_state
checkpoint)	torch.fx.tracebackfx	tracebacktorch.fxrI   preserve_node_metatorch.utils.checkpointrM   run)r   functionr   r   fx_tracebackrI   rM   s          r   r"   z!WrapActivationCheckpoint.__call__o   sg     	21("'',#$,,. 	J9k(377I$I&I	J 	J 	Js   "AAr#   )r&   r'   r(   __doc__r   r"   r)   r*   s   @r   rB   rB   Z   s    "HJr   rB   c                   @     e Zd ZdZd fdZed        Zd Zd Z xZ	S )TagActivationCheckpointa  
    This operator is supposed to be used only with torch.compile stack. This
    accepts a Fx graph module which needs to be checkpointed. This operator adds
    "recomputable" tag to the nodes of the Fx graph that should be recomputed.

    The goal is to:
    1. Avoid using Dynamo to trace through saved tensor hooks.
    2. For selective checkpointing case, let AOTAutograd trace through
       saved tensor hooks but has special logic with TorchDispatchMode to override
       the usual saved_tensor_hooks fn logic in order to tag the nodes.
    3. Rely on the partitioners to actually duplicate the nodes.
    This sits well in the torch.compile stack, because by the time graph
    reaches partitioner, inductor has already run its functionalization of rng
    ops (by setting fixed seed for each random op, see `replace_random_passes`).
    Therefore, the duplication of nodes, by design, respects the rng states in
    the forward and recomputed forward in backward.
    c                 (    t         |   dd       y )Ntag_activation_checkpointFrE   r   r   s    r   r   z TagActivationCheckpoint.__init__   s    4Fr   c                 p   ddl m} t        j                  |      }t	               }|j
                  D ]  }|dv r|j                  |        |j                  d       | j                         D ci c]  }||v s|| |    }}| j                         D ci c]  }||vs|| |    }}||fS c c}w c c}w )a	  
        checkpoint fn can have mixed kwargs between checkpointed fn and
        checkpoint fn itself. For example
        >> def gn(x, y, z=None):
        >>     a = torch.matmul(x, y)
        >>     if z is not None:
        >>         return torch.matmul(a, z)
        >>     return a
        >> def fn(x, y, z):
        >>     return torch.cos(checkpoint(gn, x, y, use_reentrant=False, z=z))
        In the above case, z belongs to checkpointed function gn, but
        use_reentrant belongs to the checkpoint function. This function splits
        the kwargs into checkpoint_kwargs and gmod_kwargs (or
        checkpointed_fn_kwargs).
        We do sorting to ensure same graph from run to run for better
        debuggability. It is not required for correctness.
        r   rL   )rU   r   r   rK   )rS   rM   inspect	signatureset
parametersaddkeys)r   rM   ckpt_signaturecheckpoint_keysnamecheckpoint_kwargsgmod_kwargss          r   divide_kwargsz%TagActivationCheckpoint.divide_kwargs   s    & 	6 **:6%"-- 	&D55%	& 	01 ,2;;=
#'DO<SD&,
 
 ,2;;=
#'D<WD&,
 
 !+--

s   0	B.:B.	B3 B3c                     ddl m} t        t              }|j                  j
                  D ]K  }|j                  dv s||j                  d<   |rd |j                  d<   3|j                  |j                  d<   M |S )Nr   )CheckpointPolicy)call_functioncall_methodcall_moduleac_graph_id	recompute)	rS   rj   nextuidgraphnodesopmetaPREFER_RECOMPUTE)r   gmodis_sacrj   unique_graph_idnodes         r   	tag_nodesz!TagActivationCheckpoint.tag_nodes   sr    ;s)JJ$$ 	ODwwII+:		-(-1DIIk* .>-N-NDIIk*	O r   c                    dd l mc m} ddlm} d|j
                  v r{t        t        d       d|d<   d|d<   |j
                  d   |d<   | j                  |d	
      }|j                         5  ddl
m}  | ||      j                  g|i |cd d d        S | j                  |d
      }|j                         5    ||      j                  | cd d d        S # 1 sw Y   y xY w# 1 sw Y   y xY w)Nr   rH   _checkpoint_context_fnz
Detected that context_fn is passed to torch.utils.checkpoint under torch.compile.
Please make sure the checkpointed region does not contain in-place ops (e.g. torch.relu_).
FrJ   rK   
context_fnT)rx   rL   )rN   rO   rP   rQ   rI   ru   r   logr{   rR   rS   rM   rT   )r   rw   r   r   rV   rI   rM   s          r   r"   z TagActivationCheckpoint.__call__   s   11(#tyy0 ',F?# ,1F'(#'99-E#FF<  >>$t>4D002 J=!+d"3"7"7I$I&IJ J
 >>$u>5D
 002 4,{4(,,d34 4J J4 4s   -"C<C'C$'C0r#   )
r&   r'   r(   rW   r   staticmethodrh   r{   r"   r)   r*   s   @r   rY   rY      s-    $G $. $.L&4r   rY   )r]   	itertoolsloggingtypingr   torch._loggingr   
torch._opsr   torch.typesr   	getLoggerr&   r   countrq   r	   r   r,   r.   r5   r7   rB   rD   rY   r[   r   r   r   <module>r      s        ' *  g!iooa $ v0 $ 45 * 6 &' "J2 "JJ 67 r41 r4j 45 r   