
    Vh'                        d dl mZ d dlZd dlmZ d dlmc mc mZ	 ej                  j                  Zd Zd Zd Zd Zdej                   j"                  deed	f   d
eeef   defdZdej                   j"                  deed	f   d
eeef   defdZdej                   j"                  deed	f   deeef   defdZdej                   j"                  deed	f   deeef   defdZy)    )castNc                     | d   dk7  S )N   r    )paddings    Q/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/tensor/_tp_conv.py_requires_data_exchanger	      s    1:?    c                     |d   dk7  rt        d      |d   dk7  r-|d   dk7  rt        d      |d   dz  | d   kD  rt        d      y	| d   |d   z  dk(  r|d   |d   k(  st        d      y	)
Nr   z3Dilation must be 1 for tensor parallel convolution.r   zGStride must be 1 when there is padding for tensor parallel convolution.      zbkernel_size[3] // 2 should be less than or equal to input_size[3] for tensor parallel convolution.zIt requires that input_size[3] is divisible by stride[1] and stride[1] equals kernel_size[3] when there is padding for tensor parallel convolution.T)RuntimeError)
input_sizekernel_sizestrider   dilations        r   _is_supportedr      s    {aPQQqzQ!9>Y  q>QA.t   1q	)Q.6!9A3NI  r
   c                    | d d d d d d | d f   j                         }| d d d d d d d |f   j                         }t        j                  |      }	t        j                  |      }
t        j                  t        j
                  ||      }t        j                  t        j
                  ||      }t        j                  t        j                  |	|      }t        j                  t        j                  |
|      }t        j                  ||||g      }|D ]  }|j                           |dk(  rt        j                  | |	gd      } | S ||dz
  k(  rt        j                  |
| gd      } | S t        j                  |
| |	gd      } | S )Nr   )dimr   )

contiguoustorch
zeros_likedistP2POpisendirecvbatch_isend_irecvwaitcat)	in_tensord1d2leftrightranksizesend_to_rightsend_to_leftrecv_from_rightrecv_from_leftsend_op_rightsend_op_leftrecv_op_rightrecv_op_leftreqsreqs                    r   _ring_send_recv_constructr2   (   s_   aAstm,779MQ1crc\*557L&&|4O%%m4NJJtzz=%@M::djj,=LJJtzz?EBM::djj.$?L!!	lMBD  
 qyIIy/:C	  
	II~y9rB	  II~y/JPRS	r
   c                    | d d d d d d | d f   j                         }| d d d d d d d |f   j                         }t        j                  |      }	t        j                  |      }
t        j                  t        j
                  ||      }t        j                  t        j
                  ||      }t        j                  t        j                  |	|      }t        j                  t        j                  |
|      }t        j                  ||||g      }|D ]  }|j                           |dk(  rI| d d d d d d d | f   } t        j                  | d d d d d d | d f   |	      | d d d d d d | d f<   y ||dz
  k(  rF| d d d d d d |d f   } t        j                  | d d d d d d d |f   |
      | d d d d d d d |f<   y | d d d d d d || f   } t        j                  | d d d d d d | d f   |	      | d d d d d d | d f<   t        j                  | d d d d d d d |f   |
      | d d d d d d d |f<   y )Nr   r   )
r   r   r   r   r   r   r   r   r   add)grad_in_tensorr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   s                    r   _ring_send_recv_aggregater6   D   s!   "1aRCD=1<<>M!!Q3B3,/::<L&&|4O%%m4NJJtzz=%@M::djj,=LJJtzz?EBM::djj.$?L!!	lMBD  
 qy'1a2#6(-		1aRCD=)?)
q!Q}% 
	'1a5',yy1aCRC<(.(
q!Q|$ (1aRC8(-		1aRCD=)?)
q!Q}% (-yy1aCRC<(.(
q!Q|$r
   op_calllocal_tensor_args.local_tensor_kwargsreturnc           	         | t         j                  j                  k(  sJ t        |      dk(  sJ t	        j
                         }t	        j                         }t        t        j                  |d         }t        t        j                  |d         }|dd \  }}}	t        |j                  |j                  |||	      sJ t        |t              sJ t        |      s
 | |i |}
|
S |j                  d   dz
  }|dz  }||z
  }||z   |k(  sJ |dz   |z  }|dz
  |z   |z  }t        |||||||      }t        |      }||d<   t        t         t"        df   |      } | |i |}
|d   }|
j%                  d      }|dk(  r|
d d d d d d d ||z
  f   }
|
S ||dz
  k(  r|
d d d d d d |d f   }
|
S |
d d d d d d |||z
  f   }
|
S )N	   r   r   r      r   .)atenconvolutiondefaultlenr   get_rankget_world_sizer   r   Tensorr   shape
isinstancelistr	   r2   tupleobjectr'   )r7   r8   r9   r&   r'   r!   weightr   r   r   local_resultsdr"   r#   r%   r$   local_tensor_args_list	padding_wws                      r   tp_convolutionrP   j   s   
 d&&..... !Q&&&==?D DU\\#4Q#78I%,, 1! 45F 1!A 6FGX&,,RRRgt$$$"7+!2J6IJ LLOa!VVBw!||T!q44' .r2tUD$
	
 "&&7!8$-q! vs{!35KL!2J6IJ AJ	q!19)!Q?Q]?*BCM  TAX)!Q9:*=>M  *!Q9q9}3L*LMMr
   c           	         | t         j                  j                  k(  sJ t        |      dk(  sJ t	        j
                         }t	        j                         }t        t        j                  |d         }t        t        j                  |d         }t        t        j                  |d         }|dd \  }}	}
t        |j                  |j                  ||	|
      sJ t        |	t              sJ t        |	      s
 | |i |}|S |j                  d   dz
  }|dz  }||z
  }||z   |k(  sJ |dz   |z  }|dz
  |z   |z  }t        |||||||      }|	d   }|dk(  r/t        j                   j"                  j%                  |d|fdd      }ne||dz
  k(  r/t        j                   j"                  j%                  ||dfdd      }n.t        j                   j"                  j%                  |||fdd      }t        |      }||d<   ||d<   t        t&        t(        d	f   |      } | |i |}|d   }|!t+        |||||||      }t        |      }||d<   t        t&        t(        d	f   |      }|S )
N   r   r   r         r   constant.)r>   convolution_backwardr@   rA   r   rB   rC   r   r   rD   r   rE   rF   rG   r	   r2   nn
functionalpadrH   rI   r6   )r7   r8   r9   r&   r'   grad_out_tensorr!   rJ   r   r   r   rK   rL   r"   r#   r%   r$   rN   rM   r5   s                       r   tp_convolution_backwardr[      s   
 d//77777 !R'''==?D D5<<):1)=>OU\\#4Q#78I%,, 1! 45F 1!A 6FGX&,,RRRgt$$$"7+!2J6IJ LLOa!VVBw!||T!q44' .r2tUD$
	
 AJ	19#hh1155!YQO TAX#hh1155)QQO $hh1155)Y!7QO
 "&&7!8$3q!$-q! vs{!35KL!2J6IJ 'q)%6BeT4N !/M-M!U63;/?r
   argskwargsc                    t         j                  j                  j                  | ||      }t         j                  j                  j                  j                  |       |j                  }|J d       t        | t        |j                        |j                        }t         j                  j                  j                  ||j                        S )N"output sharding should not be None)dtensorDTensor_op_dispatcherunwrap_to_op_infosharding_propagator	propagateoutput_shardingrP   rH   
local_argslocal_kwargswrapoutput_specr7   r\   r]   op_inforf   rK   s         r   convolution_handlerrm      s     oo,,>>wfUG OO""66@@I--O&L(LL& #w))*G,@,@M ??))..22 r
   c                    t        |      }t        |d   t        j                        rt        |d   t        j                        sJ |d   j	                  |d   j
                  |d   j                        |d<   t        |      }t        j                  j                  j                  | ||      }t        j                  j                  j                  j                  |       |j                  }|J d       t        | t        |j                        |j                        }t        j                  j                  j!                  ||j"                        S )Nr   r   r_   )rG   rF   r`   ra   redistributedevice_mesh
placementsrH   rb   rc   rd   re   rf   r[   rg   rh   ri   rj   rk   s         r   convolution_backward_handlerrr      s    :Dd1gw/JtAw4XXX1g""47#6#6Q8J8JKDG;D oo,,>>wfUG OO""66@@I--O&L(LL& ,w))*G,@,@M ??))..22 r
   )typingr   r   torch.distributeddistributedr   torch.distributed.tensor._apitensor_apir`   opsr>   r	   r   r2   r6   _ops
OpOverloadrH   rI   dictstrrP   r[   rm   rr   r   r
   r   <module>r~      sU       / / yy~~
*8#
L2ZZ""2VS[)2 c6k*2 	2jDZZ""DVS[)D c6k*D 	DNZZ""

 f 	.ZZ""

 f 	r
   