
    VhN                     
   d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d d	lmZ 	 d d
lmZmZmZ eedf   Zeeef   Zeee      Z ee	eeee      f      Z!defdZ"defdZ#defdZ$dedefdZ%e G d d             Z& G d d      Z' G d de'      Z( G d de'      Z)e G d d             Z*e G d d             Z+e G d d              Z,e G d! d"             Z-y# e$ r d d
lmZmZmZ Y w xY w)#    )Sequence)	dataclass)cached_property)AnyOptionalUnionN)
OpOverload)
DeviceMesh)DTensorSpec)	Placement)tree_leavestree_map_onlyTreeSpec.returnc                     | j                   J d       t        j                  | j                   j                  | j                   j                  | j                   j
                        S )zL
    This is used to propagate tensor metadata, must be under fake mode
    z)DTensorSpec does not contain tensor_meta.)dtype)tensor_metatorchempty_stridedshapestrider   )args    S/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/tensor/_op_schema.py!_rebuild_tensor_from_dtensor_metar   #   sT     ??&S(SS&oo##     opc                 :    | j                   j                  d   dk(  S )N_)_schemanamer   s    r   _is_inplace_opr#   /   s     ::??2#%%r   c                 2    d| j                   j                  v S )Nout)r    overload_namer"   s    r   _is_out_variant_opr'   6   s     BJJ,,,,r   specc           	      8   | yt        | t              r2dj                  | j                  D cg c]  }t	        |       c}      S t        | t
              r.ddj                  | D cg c]  }t        |       c}      z   dz   S t        d|        c c}w c c}w )NNone (, )z!Unknown spec type to print: spec=)
isinstancer   join
placementsstrr   _pretty_print_specRuntimeError)r(   pss      r   r3   r3   =   s    |	D+	&ww81A899	D(	#TYYtD! 21 5DEEKK>tfEFF	 9Ds   B&B
c                       e Zd ZU dZeeeee   df   f   ed<   dZ	ee
e      ed<   dZeeee         ed<   edefd       Zed	        Zdd
edefdZdefdZy)PlacementStrategyaQ  
    A placement strategy describes acceptable sharding placements of the output
    and the tensor arguments of an operation.

    note: when the op return value is a single DTensor object, output_specs is
    DTensorSpec; when the return value is a tuple of Optional[DTensor],
    output_specs is a tuple of Optional[DTensorSpec].
    .output_specsNinput_specsredistribute_costr   c                 ~    t        | j                  t              r| j                  S t        d| j                         )z
        This function requires that the strategy have exactly one DTensorSpec as the
        output spec. If the output_specs is a tuple, we throw an exception.
        z;function output_spec expects a single DTensorSpec but got: )r/   r9   r   
ValueErrorselfs    r   output_speczPlacementStrategy.output_spec\   s>     d''5$$$MdN_N_M`a r   c                     t        | j                  t              r| j                  j                  S t        | j                  t              r-| j                  d   }t        |t              sJ |j                  S t        d| j                         )Nr   zUfunction output_spec expects a single DTensorSpec or a tuple of DTensorSpec but got: )r/   r9   r   meshtupler=   r?   out_specs     r   rB   zPlacementStrategy.meshi   s}    d''5$$)))))51((+Hh444== ghlhyhygz{ r   indexc                     | j                   J d       t        | j                         |kD  s-J d| dt        | j                          d| j                           | j                   |   S )Nz)input_specs of PlacementStrategy is None!zInvalid index z for input_specs of length z: )r:   len)r?   rF   s     r   
input_speczPlacementStrategy.input_specv   sz    +X-XX+4##$u, 	
UG#>4##$%R(8(8'9;	
, &&r   c                     | j                   t        | j                          d}nd}t        | j                        }| | S )Nz -> r+   )r:   r3   r9   )r?   input_specs_stroutput_spec_strs      r   __str__zPlacementStrategy.__str__~   sL    '!3D4D4D!E FdKO O,T->->?!"?"344r   )r   )__name__
__module____qualname____doc__r   r   rC   r   __annotations__r:   r   r;   listfloatr   r@   rB   intrI   r2   rM    r   r   r8   r8   H   s     U8K+@#+E%FFGG37K(;/07 6:xT%[ 129
[ 
 
 
 
' 'K '5 5r   r8   c                       e Zd ZdZy)StrategyTypezi
    Base class type for op strategy, We have two StrategyType:
        OpStrategy and TupleStrategy
    N)rN   rO   rP   rQ   rV   r   r   rX   rX      s    r   rX   c                        e Zd ZdZdee   ddf fdZdefdZde	fdZ
ed        Zed	        Zed
        Zed        Z xZS )
OpStrategyz[
    OpStrategy that consists of a list of placement strategies associated with the op
    
strategiesr   Nc                 0    t         |           || _        y N)super__init__r[   )r?   r[   	__class__s     r   r_   zOpStrategy.__init__   s    3=r   c                     dj                  | j                  D cg c]  }t        |       c}      }| j                  }d| d| S c c}w )Nr-   [z
] @ mesh: )r0   r[   r2   
mesh_shape)r?   strategystrategy_list_strrc   s       r   rM   zOpStrategy.__str__   sH     IIT__&Us8}&UV__
$%Z
|<< 'Vs   Ac                 :    t        d | j                  D              S )zR
        Returns the max number of shards across all placement strategies
        c              3   H   K   | ]  }|j                   j                    y wr]   )r@   
num_shards).0rd   s     r   	<genexpr>z,OpStrategy.max_num_shards.<locals>.<genexpr>   s     Sx8''22S    ")maxr[   r>   s    r   max_num_shardszOpStrategy.max_num_shards   s     S4??SSSr   c                 4    | j                   d   j                  S Nr   )r[   rB   r>   s    r   rB   zOpStrategy.mesh   s    q!&&&r   c                 H    | j                   d   j                  j                  S ro   )r[   rB   r   r>   s    r   rc   zOpStrategy.mesh_shape   s    q!&&,,,r   c                 H    | j                   d   j                  j                  S ro   )r[   r@   ndimr>   s    r   rr   zOpStrategy.ndim   s    q!--222r   c                 H    | j                   d   j                  j                  S ro   )r[   r@   r   r>   s    r   r   zOpStrategy.shape   s    q!--333r   )rN   rO   rP   rQ   rS   r8   r_   r2   rM   rU   rm   propertyrB   rc   rr   r   __classcell__r`   s   @r   rZ   rZ      s    >4(9#: >t >= =
T T ' ' - - 3 3 4 4r   rZ   c                   N     e Zd ZdZdee   ddf fdZdedefdZ	de
fdZ xZS )	TupleStrategya?  
    TupleStrategy represents the output strategy of this op is a tuple
    of strategy, i.e. If the output of this op is a tuple of tensors or list of tensors
    with possibly different placement strategies, we should return a TupleStrategy that
    contains a tuple of OpStrategy, where each child represents the sharding strategy
    of "each element" of the tuple/list of tensors the op returns.

    NOTE: if the output of the op is a List[Tensor] and they share the same placement
    strategy, then we should return a single OpStrategy instead of a TupleStrategy
    childsr   Nc                 0    t         |           || _        y r]   )r^   r_   ry   )r?   ry   r`   s     r   r_   zTupleStrategy.__init__   s    .4r   rF   c                 \    | j                   |   }t        |t              sJ |j                  S r]   )ry   r/   rZ   rB   )r?   rF   op_strategys      r   
child_meshzTupleStrategy.child_mesh   s,    kk%(+z222r   c           	          dj                  t        | j                        D cg c]  \  }}t        |        c}}      }d| dS c c}}w )Nr-   zTupleStrategy(r.   )r0   	enumeratery   r2   )r?   idxstratchild_strategies_strs       r   rM   zTupleStrategy.__str__   sM    #yy/8/EFeE
|_F 
   45Q77 Gs   A
)rN   rO   rP   rQ   r   rX   r_   rU   r
   r}   r2   rM   ru   rv   s   @r   rx   rx      s?    	5x5 5$ 5   
  
8 8r   rx   c                   H    e Zd ZU dZdZeed<   dZee	e
      ed<   dZeed<   y)RuntimeSchemaInfoa  
    RuntimeSchemaInfo stores the operator schema related information for runtime (eager)
    execution. This is mainly used for two ways: 1. to generate hash for args to determine
    whether to re-run sharding prop or not 2. to determine if we need pytree
    d   static_argnumNstatic_kwargkeyFneeds_pytree)rN   rO   rP   rQ   r   rU   rR   r   r   rS   r2   r   boolrV   r   r   r   r      s2     M3+/OXd3i(/ L$r   r   c                      e Zd ZU dZeed<   eed<   eed<   dZe	e
   ed<   edeedf   fd	       Zedeedf   fd
       ZdefdZdefdZddZdedefdZdefdZdefdZddedefdZdefdZdedefdZdefdZdefdZ ddZ!y)OpSchemaa  
    OpSchema is a data class that describes an operator input schemas, it includes
    DTensorSpecs (instead of DTensor) and non-tensor args/kwargs (positional order
    preserved). It is mainly used by the DTensor's dispatching logic to perform various
    actions (i.e. sharding propagation, caching sharding decisions, redistribute, etc.)

    NOTE: this should be used as a read only data class
    TODO: make this a frozen dataclass

    Args:
        op: the operator overload we are intercepting
        args_schema: contains args except that the DTensor args have been replaced
            with its DTensorSpec or OpStrategy
        kwargs_schema: contains kwargs except that the DTensor kwargs have been replaced
            with its DTensorSpec or OpStrategy
    r   args_schemakwargs_schemaNschema_infor   .c                     | j                   +| j                   j                  rt        | j                        n| j                  }t	        d |D              S )z
        args_spec: Tuple[DTensorSpec, ...]: contains a clean list of args spec list
            with NO non-DTensor positional arguments (i.e. int/float/tuple, etc)
            mainly used by sharding propagation to propagate the output spec
        c              3   B   K   | ]  }t        |t              s|  y wr]   r/   r   ri   items     r   rj   z%OpSchema.args_spec.<locals>.<genexpr>	  s     Ldj{.KTL   r   r   r   r   rC   r?   argss     r   	args_speczOpSchema.args_spec   sO     +0@0@0M0M (()!! 	
 LdLLLr   c                     | j                   +| j                   j                  rt        | j                        n| j                  }t	        d |D              S )Nc              3   B   K   | ]  }t        |t              s|  y wr]   )r/   rZ   r   s     r   rj   z)OpSchema.args_strategy.<locals>.<genexpr>  s     Kdjz.JTKr   r   r   s     r   args_strategyzOpSchema.args_strategy  sO     +0@0@0M0M (()!! 	
 KdKKKr   c                     dj                  | j                  D cg c]  }t        |       c}      }d| j                   d| d| j                   dS c c}w )Nr-   zOpSchema(op=z, args_schema=(z), kwargs_schema=r.   )r0   r   r2   r   r   )r?   
arg_schemar   s      r   __repr__zOpSchema.__repr__  s]    ii4CSCS TZZ TU477) $(M *"0014	
 !Us   Ac                    g }d }| j                   D ]  }t        |t              r1|j                  t	        |             |j
                  j                  }Et        |t              rXt        |j                        dk(  sJ |j                  t        |j                  d   j                               |j                  }t        |t              rI|j                  d   }t        |t              sJ |j                  }|j                  t	        |             |j                  t	        |             " d| j                   ddj!                  |       d| dS )N   r   zOp(op=z, args_schema=r-   z	 @ mesh: r.   )r   r/   r   appendr2   rB   r   rZ   rH   r[   r3   r9   rc   rx   ry   r   r0   )r?   r   rc   r   first_op_strtgys        r   rM   zOpSchema.__str__  s   !#
## 	-C#{+""3s8, XX^^
C,3>>*a///""#5cnnQ6G6T6T#UV ^^
C/"%**Q-!/:>>>,77
""3s8,""3s8,	- ytyy/E.FiPZ|[\]]r   c                     d}| j                   D ]Q  }t        |t              s|j                  !t	        d |j                  j
                  D              sHd} || _        y  || _        y )NFc              3   P   K   | ]  }t        |t        j                           y wr]   )r/   r   SymInt)ri   r6   s     r   rj   z)OpSchema.__post_init__.<locals>.<genexpr>7  s     Pqz!U\\2Ps   $&T)r   r/   r   r   anyr   has_symints)r?   r   as      r   __post_init__zOpSchema.__post_init__3  sa    !! 	A![)amm.GPAMM<O<OPP"&K&	
 'r   arg_idxc                     | j                   |   }t        |t              }|ryt        |t              syt	        d |D              S )NTFc              3   H   K   | ]  }t        |t              xs |d u   y wr]   r   )ri   es     r   rj   z?OpSchema.arg_type_tensor_or_tensor_list_like.<locals>.<genexpr>E  s#     Hq:a-:d:Hrk   )r   r/   r   rS   all)r?   r   r   	is_tensors       r   #arg_type_tensor_or_tensor_list_likez,OpSchema.arg_type_tensor_or_tensor_list_like<  sB    w'sK0	#t$HCHHHr   c                     | j                   j                  j                  }t        |      dkD  xr' t	        |d   j
                  t        j                        S )Nr   r   )r   r    returnsrH   r/   typer   
TensorTyper?   return_typess     r   return_type_tuple_tensor_likez&OpSchema.return_type_tuple_tensor_likeG  sJ     ww..< 1$ 
O  %"2"2*
 	
r   c                     | j                   j                  j                  }t        |d   j                  t
        j                        S ro   )r   r    r   r/   r   r   r   r   s     r   return_type_tensorzOpSchema.return_type_tensorO  s4    ww.. ,q/..0@0@AAr   validatec           
      .   | j                   d   }t        |t        t        f      r|j                  }n}t        |t
        t        t        f      rIt        |t              r|j                  d   n|d   }t        |t        t        f      sJ |j                  }nt        d| j                   d      |rc| j                   dd D ]Q  }t        |t        t        f      s|j                  |k7  s*t        d| j                   d| d|j                   d	       |S )
a  
        This util can be used to get a mesh from the OpSchema that contains multiple
        DTensors as arguments. When `validate` is True, it will try to validate that all the
        arguments have the same mesh to avoid unexpected cross mesh errors.

        NOTE: this util currently does not handle TupleStrategy when `validate=True`,
        this is because for TupleStrategy there could be different types of checks, i.e.:
            - for stack and cat like op, we need to check within a TupleStrategy is every
              input is on the same mesh
            - for foreach like ops we need to check "zipped" inputs are on the same mesh
              for each index.
        r   z+Cannot find device mesh from args for op : .r   Nz1DTensor does not support cross-mesh operation on z! Got meshes:  z>. Please make sure all the arguments have the same DeviceMesh.)r   r/   r   rZ   rB   rS   rC   rx   ry   r=   r   r4   )r?   r   	first_argrB   
first_elemr   s         r   get_mesh_from_argszOpSchema.get_mesh_from_argsU  s    $$Q'	i+z!:;>>D	D%#?@ i7   #q\ 
 j;
*CDDD??DJ477)STUVV''+ cK#<=#((dBR&KDGG9 U''+fAchhZ 8WX  r   c                      j                   st         j                        d }n, j                   j                   j                   j                  }t         fdt         j                        D              }|,t         fd|D              }t         j                  ||f      S t         j                  |f      S )Nc              3      K   | ]:  \  }}j                  |      s|k\  rt        |t              rt        |      n| < y wr]   )r   r/   rS   rC   )ri   ir   r?   r   s      r   rj   z$OpSchema.__hash__.<locals>.<genexpr>  sD      
177:a=>P #1d+E!H2
s   A Ac              3   V   K   | ]   }j                   j                  |d        " y wr]   )r   get)ri   kr?   s     r   rj   z$OpSchema.__hash__.<locals>.<genexpr>  s)      #45""&&q$/#s   &))	r   rH   r   r   r   rC   r   hashr   )r?   r   args_to_hashkwargs_to_hashr   s   `   @r   __hash__zOpSchema.__hash__{  s     0 01M"O ,,::M"..>>O 
!$"2"23
 

 &" #9H# N ,?@@,/00r   otherc                    t        |t              sy| j                  |j                  k7  ryt        | j                        t        |j                        k7  ry| j
                  st        | j                        }d }n,| j
                  j                  }| j
                  j                  }t        t        | j                  |j                              D ],  \  }\  }}t        |t              r||k7  r y||k\  s&||k7  s, y |rB|D ]=  }| j                  j                  |d       |j                  j                  |d       k7  s= y y)NFT)r/   r   r   rH   r   r   r   r   r   zipr   r   r   )r?   r   r   r   r   self_arg	other_argkeys           r   __eq__zOpSchema.__eq__  s2   %*77ehht C(9(9$::  0 01M"O ,,::M"..>>O(1  %"3"34)
 	$A$) (K0X5Jm#I(=	 & !%%))#t48K8K8O8O9  !	! r   c                 @    t        t        t        | j                        S )z
        gen_fake_args: generate fake args for the operator, this is mainly used
            by sharding propagation rules to generate fake args for the operator
            to run the local tensor operator and get the output spec.
        )r   r   r   r   r>   s    r   gen_fake_argszOpSchema.gen_fake_args  s     :D<L<L
 	
r   c                 @    t        t        t        | j                        S )z
        gen_fake_kwargs: generate fake kwargs for the operator, this is mainly used
            by sharding propagation rules to generate fake kwargs for the operator
            to run the local tensor operator and get the output spec.
        )r   r   r   r   r>   s    r   gen_fake_kwargszOpSchema.gen_fake_kwargs  s     :D<N<N
 	
r   c                 r   | j                   }g }d}|j                  ,|j                  j                  rt        |j                        }n|j                  }|D ]=  }t        |t              r|j                  ||          |dz  }-|j                  |       ? t        |      | _        |j                  | _	        y )Nr   r   )
r   r   r   r   r   r/   r   r   rC   r   )r?   origin_schemasuggestion_args_specnew_arg_schemaidx_of_args_specr   r   s          r   !_inplace_rewrap_schema_suggestionz*OpSchema._inplace_rewrap_schema_suggestion  s    #~~')%%1))66)4]5N5N)OK'33K 	+C#{+%%&:;K&LM A% %%c*	+ !0*88r   )r   N)T)r   r   r   N)"rN   rO   rP   rQ   r	   rR   ArgsType
KwargsTyper   r   r   rt   rC   r   r   rZ   r   r2   r   rM   r   rU   r   r   r   r   r
   r   r   objectr   r   r   r   rV   r   r   r   r      s   " 	N/3K+,3M5c!12 M M 	LuZ_5 	L 	L
# 
^ ^('	I3 	I4 	I
t 
BD B$4 $: $L1# 1,#F #t #J
x 

 
9r   r   c                   N    e Zd ZU dZeed<   dZee   ed<   dZ	e
ed<   ed        Zy)OutputShardinga  
    OutputSharding is a data class that is used by the sharding propagation,
    it could set the output_spec upon successful propagation. If needs_redistribute
    is set to True, a redistribute_schema would be returned together to indicate
    the input arguments needs to be redistributed before the op execution.

    NOTE: the redistribute_schema generated by sharding propagation should be
    exactly the same as the operator OpSchema, except the DTensorSpecs
    r@   Nredistribute_schemaFneeds_redistributec                 \   t        | j                  t              r| j                  j                  S t        | j                  t              rB| j                  d   }t        |t              r|j                  S t        dt        |             t        dt        | j                               )Nr   zUnknown output spec type: )r/   r@   r   rB   rC   r=   r   rD   s     r   rB   zOutputSharding.mesh  s    d&&4##(((((%0''*H(K0}}$ #=d8n=M!NOO9$t?O?O:P9QRSSr   )rN   rO   rP   rQ   OutputSpecTyperR   r   r   r   r   r   r   rB   rV   r   r   r   r     s>      .2(+2$$
T 
Tr   r   c                       e Zd ZU dZeed<   eed<   ee   ed<   e	e   ed<   e
eef   ed<   dZee   ed<   dZee   ed	<   y)
OpInfoz7
    All Runtime Op execution info are packed here
    compute_meshschemaflat_args_schema
local_argslocal_kwargsNargs_tree_specoutput_sharding)rN   rO   rP   rQ   r
   rR   r   rS   r   r   dictr2   r   r   r   r   r   rV   r   r   r   r     s[      6l"  sF{##)-NHX&- 15OXn-4r   r   ).collections.abcr   dataclassesr   	functoolsr   typingr   r   r   r   
torch._opsr	   torch.distributed.device_meshr
   &torch.distributed.tensor._dtensor_specr   (torch.distributed.tensor.placement_typesr   torch.utils._cxx_pytreer   r   r   ImportErrortorch.utils._pytreerC   r   r   r   r2   r   rS   PlacementListr   r   r#   r'   r3   r8   rX   rZ   rx   r   r   r   r   rV   r   r   <module>r      sy   $ ! % ' '  ! 4 > >LL #v+
Xi() %Xh{6K-L LMN	f 	&z &-: -GV G G ;5 ;5 ;5| "4 "4J8L 88   ( w9 w9 w9t T T T: 5 5 5]   s   
C0 0DD