
    Vhs             
       	   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlZd dl m!Z" d dl#m!c m$c m%c m&Z& d dl'm!c m$c m%c m(Z) d dl*m!c m$c m%c m+Z, d dl-m.Z. d dl/m.c m0Z1 d dl2m3Z4 d d	l2m5Z6 d d
l7m8Z8 d dl9m:Z: d dl;m<Z<m=Z= d dl>m?Z@mAZBmCZDmEZF d dlGmHZH d dlImJZJmKZKmLZL d dlMmNZNmOZO d dlPmQZQmRZR d dlSmTZT d dlUmVZVmWZW d dlXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZgmhZhmiZimjZjmkZk d dllmmZmmnZnmoZompZpmqZqmrZrmsZsmtZt d dlum!c mvc mwZw d dlxmyZy d dlzZz	 d dl{Z{dZ|ej                  dk(  rd dlZnd dlZ G d de.j                        Z G d d      Z ed      Zde_         e ej                  d d             Zd d!d"did#eed$dd%dd&g d'ggZe"j                  j                  e"j                  j                  e"j                  j                  e"j                  j                  gZe"j                  j                  e"j                  j                  e"j                  j                  e"j                  j                  gZe"j                  j                  e"j                  j                  e"j                  j                  e"j                  j                  gZd(Z e
d)e      Z G d* d+e      Z ete| d,      Zej*                  d-   Z ej.                  d.d/      Zd0Zd1d2iZded3Zd4 Zd5Zd6Zd7Zd8Zd9Z G d: d;e      Z G d< d=e.j                        Z G d> d?e.j                        Z G d@ dAe.j                        Z G dB dCe.j                        Z G dD dEe.j                        Z G dF dGe.j                        Z G dH dIe.j                        Z G dJ dKe.j                        Z G dL dMe.j                        Z G dN dOe.j                        Z e       Z e       Z edP      Z e.j`                  d%dQR      ZdS ZdTZd0dUdUdVZdW ZdX ZedY        ZedZ        Zdejr                  dfd[Zdejr                  fd\Zd] Zd^ Z G d_ d`      Z G da dbeY      Z G dc dd      Z emej                         y# e}$ r dZ|Y w xY w)f    N)
namedtupleOrderedDictdefaultdict)contextmanagernullcontext)	dataclass)	timedelta)reduce)Union
NamedTupleCallableAny)TEST_MASTER_ADDR)TEST_MASTER_PORT)TorchDispatchMode)
DeviceType)
GradScalerautocast)post_localSGD_hookpowerSGD_hookdefault_hooksquantization)_apply_optimizer_in_backward)get_world_size_get_default_group_get_pg_config)$_verify_param_shape_across_processes_sync_module_states)ExecutionTraceObserverProfilerActivity)DistributedDataParallel)_dump_DDP_relevant_env_vars_MixedPrecision)MultiProcessTestCase
TEST_SKIPSinit_multigpu_helperinitialize_temp_directoriescleanup_temp_dirsimple_sparse_reduce_testsskip_if_rocm_multiprocessskip_if_small_worldsizeskip_if_odd_worldsizeskip_if_lt_x_gpunccl_skip_if_lt_x_gpuskip_if_no_gpurequire_n_gpus_for_nccl_backendrequires_nccl_versioncaptured_outputwith_nccl_blocking_waitwith_dist_debug_levelsverify_ddp_error_loggedDistTestCases)instantiate_parametrized_testsIS_MACOS
IS_WINDOWSFILE_SCHEMA	IS_FBCODEIS_SANDCASTLEskip_but_pass_in_sandcastleskip_but_pass_in_sandcastle_if)DistributedSamplerTFwin32c                   &     e Zd Zd fdZd Z xZS )NetWithBuffersc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        | j                  dt        j                  dd             y N
   Fbias   buffer   	super__init__nnLinearabregister_buffertorchrandnself	__class__s    d/home/dcms/DCMS/lib/python3.12/site-packages/torch/testing/_internal/distributed/distributed_test.pyrM   zNetWithBuffers.__init__m   sQ    2r.2qu-Xu{{1a'89    c                 x    | j                   j                  d       | j                  | j                  |            S NrH   )rI   add_rQ   rP   rV   xs     rX   forwardzNetWithBuffers.forwards   s+    vvdffQi  rY   returnN__name__
__module____qualname__rM   r_   __classcell__rW   s   @rX   rB   rB   l   s    :!rY   rB   c                       e Zd Zd Zd Zy)Fooc                     || _         y N)r^   r]   s     rX   rM   zFoo.__init__y   s	    rY   c                     d }| j                   j                         D ]  \  }}|j                   |   } |||      r y y)Nc                 l    t        | t        j                        rt        j                  | |      S | |k(  S rk   )
isinstancerS   Tensorequal)valueothers     rX   eqzFoo.__eq__.<locals>.eq~   s+    %.{{5%00E>!rY   FT)__dict__items)rV   rr   rs   attrrq   other_values         rX   __eq__z
Foo.__eq__}   sI    	"
  ==..0 	KD%...Ke[)	 rY   N)rc   rd   re   rM   rx    rY   rX   ri   ri   x   s    
rY   ri   rE   rH         nested)key1key2key3foorJ   string)r{      r|   rP   rQ   r   c                   J    e Zd ZU ej                  ed<   ej                  ed<   y)TestNamedTupleInput_1rP   rQ   N)rc   rd   re   rS   tensor__annotations__ry   rY   rX   r   r      s    ||O||OrY   r   zno torchvisionBACKENDINIT_METHODenv://i,  test_DistributedDataParallel  c                 V   t        |t        j                  j                        r|j	                         n|j
                  }|D cg c]Y  }|j                  j                  |       s|j                  j                  |       r!|r|j                  t        j                  k7  r|[ c}S c c}w rk   )rn   rS   profilerprofileeventsfunction_eventsnameendswith
startswithdevice_typer   CUDA)
event_namer   dedup_gpu_user_annotation
event_listevents        rX   get_profiling_eventr      s     h 6 67 	%%  &ZZ  ,

0E0Ej0Q.%2C2Cz2V 	  s   AB&c                    t        j                  ddd      }|j                          |j                  }| j	                  |       t        |      5 }t        j                  |      d   }ddd       t        d|        t        j                  |       D cg c]  }|j                  d      d	k(  s| c}S # 1 sw Y   QxY wc c}w )
zTorch profiler includes nccl metadata in an inserted operator called "record_param_comms"
    We will need to test metadata obtained from profiler herew+tz.jsonF)modesuffixdeletetraceEventsNzTrace saved to r   record_param_comms)tempfileNamedTemporaryFilecloser   export_chrome_traceopenjsonloadprintosremoveget)proftf
trace_filefr   es         rX   get_profiler_nccl_metar      s     
	$	$75
B HHJJZ(	j	 -Q1m,-	OJ<
() IIjG!v2F!FAGG- - Hs   B:C3C:C:Expected to have finished reduction in the prior iterationz:passing the keyword argument `find_unused_parameters=True`z.Since `find_unused_parameters=True` is enabledz:`forward` function outputs participate in calculating losszMset the environment variable TORCH_DISTRIBUTED_DEBUG to either INFO or DETAILc                       e Zd ZU eed<   ej                  ed<   eej                  e
f   ed<   eed<   dZeed<   dZeed<   dZeed	<   y)
DDPUnevenTestInputr   modelinpsync_intervalFthrow_on_early_terminationNhookstate)rc   rd   re   strr   rN   Moduler   rS   r   tupleintr   boolr   r   r   r   ry   rY   rX   r   r      sJ    
I99	u||U"	##',,D(E3rY   r   c                   &     e Zd Zd fdZd Z xZS )_FC2c                     t         |           t        j                  ddd      | _        d| j                  j
                  _        y )NrE   2   TrF   F)rL   rM   rN   rO   fcrG   requires_gradrU   s    rX   rM   z_FC2.__init__  s2    ))B.%*"rY   c                 (    | j                  |      }|S rk   r   r]   s     rX   r_   z_FC2.forward  s    GGAJrY   r`   rb   rg   s   @rX   r   r     s    +
rY   r   c                   &     e Zd Zd fdZd Z xZS )Netc                 d   t         |           t        j                  ddd      | _        t               | _        t        j                  ddd      | _        t        j                         | _	        t        j                  t        j                  ddg      j                         d      | _        y )NrJ   rE   FrF   r   r{   r   )rL   rM   rN   rO   fc1r   fc2fc3ReLUrelu	ParameterrS   r   longno_grad_paramrU   s    rX   rM   zNet.__init__  sx    99Q/699R/GGI	\\LL!Q %%'u
rY   c                     | j                  | j                  |            }| j                  | j                  |            }| j                  |      }t	        j
                  |d      S )NrH   dim)r   r   r   r   Fsoftmaxr]   s     rX   r_   zNet.forward  sL    IIdhhqk"IIdhhqk"HHQKyy""rY   r`   rb   rg   s   @rX   r   r     s    
#rY   r   c                   &     e Zd Zd fdZd Z xZS )LargeNetc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        y )N  i  FrF   r   rL   rM   rN   rO   r   r   rU   s    rX   rM   zLargeNet.__init__#  s6    99T4e499T3U3rY   c                 J    | j                  |      }| j                  |      }|S rk   r   r   r]   s     rX   r_   zLargeNet.forward(  s!    HHQKHHQKrY   r`   rb   rg   s   @rX   r   r   "  s    4
rY   r   c                   &     e Zd Zd fdZd Z xZS )Taskc                 ~    t         |           t        j                  t	        j
                  dd            | _        y NrJ   )rL   rM   rN   r   rS   onesprU   s    rX   rM   zTask.__init__/  s)    ejjA./rY   c                      | j                   |z   S rk   )r   r]   s     rX   r_   zTask.forward3  s    vvzrY   r`   rb   rg   s   @rX   r   r   .  s    0rY   r   c                   &     e Zd Zd fd	Zd Z xZS )BatchNormNetc                     t         |           t        j                  ddd      | _        t        j
                  d|      | _        t        j                  ddd      | _        y )NrJ   (   FrF   r{   affine)rL   rM   rN   rO   r   BatchNorm1dbnr   )rV   r   rW   s     rX   rM   zBatchNormNet.__init__8  sH    99Q/..6299R/rY   c                     t        j                  | j                  |      d      }| j                  |      }t        j                  |d      }| j	                  |      }t        j                  |d      S )N)r{   rE   )r   r   rH   r   )rS   reshaper   r   r   r   r   r]   s     rX   r_   zBatchNormNet.forward>  sU    MM$((1+{3GGAJMM!X&HHQKyy""rY   Trb   rg   s   @rX   r   r   7  s    0#rY   r   c                   &     e Zd Zd fdZd Z xZS )UnusedParamTwoLinLayerNetc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        t        j                  ddd      | _        y )NrE   FrF   r   )rL   rM   rN   rO   rP   rQ   crU   s    rX   rM   z"UnusedParamTwoLinLayerNet.__init__G  sJ    2r.2r.1ae,rY   c                 N    | j                  |      }| j                  |      }||fS rk   r   rV   r^   rP   rQ   s       rX   r_   z!UnusedParamTwoLinLayerNet.forwardM  %    FF1IFF1I1vrY   r`   rb   rg   s   @rX   r   r   F  s    -rY   r   c                   &     e Zd Zd fdZd Z xZS )DictOutputModulec                 @    t         |           t               | _        y rk   )rL   rM   r   modulerU   s    rX   rM   zDictOutputModule.__init__T  s    /1rY   c                 `    | j                  |      }|d   |d   z   j                         }||dS )Nr   rH   )predictionsloss)r   sum)rV   r^   r  r  s       rX   r_   zDictOutputModule.forwardX  s:    kk!nAQ/446&
 	
rY   r`   rb   rg   s   @rX   r   r   S  s    2
rY   r   c                   &     e Zd Zd fdZd Z xZS )TwoLinLayerNetc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        y )NrE   FrF   rH   )rL   rM   rN   rO   rP   rQ   rU   s    rX   rM   zTwoLinLayerNet.__init__b  s6    2r.2qu-rY   c                 N    | j                  |      }| j                  |      }||fS rk   r   r   s       rX   r_   zTwoLinLayerNet.forwardg  r   rY   r`   rb   rg   s   @rX   r  r  a  s    .
rY   r  c                   *     e Zd ZdZd fd	Zd Z xZS )EmbeddingNetDifferentParamsz{
    A module containing an embedding with different dimension or different # of
    parameters depending on the rank.
    c                     t         |           |s|dk(  rdnd}t        j                  d|      | _        t        j
                  |d      | _        |rt        j
                  ddd      | _        y y )	Nr   r   r   rE   )num_embeddingsembedding_dimrH   FrF   )rL   rM   rN   	Embedding	embeddingrO   linlin2)rV   rankdiff_num_paramsr  rW   s       rX   rM   z$EmbeddingNetDifferentParams.__init__s  s]    .$!)R}U99]A.		!QU3DI rY   c                 F    | j                  |      }| j                  |      S rk   )r  r  r]   s     rX   r_   z#EmbeddingNetDifferentParams.forward{  s    NN1xx{rY   F)rc   rd   re   __doc__rM   r_   rf   rg   s   @rX   r	  r	  m  s    
4rY   r	  c                   &     e Zd Zd fdZd Z xZS )ControlFlowToyModelc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        y NrE   FrF   )rL   rM   rN   rO   lin1r  rU   s    rX   rM   zControlFlowToyModel.__init__  s6    IIb"51	IIb"51	rY   c                     t        j                  |t        j                  dd|j                              }|r3| j	                  t        j                  | j                  |                  S t        j                  | j                  |            S )N   rE   device)rS   rp   r   r  r  r   r   r  )rV   r^   use_second_layers      rX   r_   zControlFlowToyModel.forward  s^     ;;q%**RAHH*MN99QVVDIIaL12266$))A,''rY   r`   rb   rg   s   @rX   r  r    s    2
(rY   r  r   Gz?)momentumc                 X    | j                  d      d   }|t        v r	t        |   S t        S )N.r   )splitCUSTOMIZED_TIMEOUTDEFAULT_TIMEOUT)test_id	test_names     rX   get_timeoutr)    s/    c"2&I&&!),,rY   <   r   )test_ddp_uneven_inputs test_ddp_model_diff_across_rankstest_ddp_has_finalizedc                     d }t         | vrt        dt          d|        S  |t        j                  t                     st        dt          d      S d S )Nc                    | t         j                  j                  k(  rt        j                         S | t         j                  j                  k(  rt        j
                         S | t         j                  j                  k(  rt        j                         S | t         j                  j                  k(  rt        j                         S | t        j                  d   v ryy)NpluginTF)distBackendGLOOis_gloo_availableNCCLis_nccl_availableMPIis_mpi_availableUCCis_ucc_availabler6   backend_featurebackends    rX   checkz+require_backend_is_available.<locals>.check  s    dll'''))++dll'''))++dll&&&((**dll&&&((**m33H==rY   zTest requires backend z to be one of z to be availablec                     | S rk   ry   funcs    rX   <lambda>z.require_backend_is_available.<locals>.<lambda>       rY   )r   r=   r1  r2  )backendsr>  s     rX   require_backend_is_availablerE    sc     h*$WI^H:F
 	
 g&'*$WI-=>
 	
 rY   c                 d    t        t        j                  d         | k  rt        d| d      S d S )N
WORLD_SIZEzTest requires world size of dc                     | S rk   ry   r@  s    rX   rB  z$require_world_size.<locals>.<lambda>  rC  rY   )r   r   environr=   )
world_sizes    rX   require_world_sizerL    s9    
2::l#$z1**:a.9
 	
 rY   c               #     K   t         j                  d   } t         j                  j                  | d      }t	        |d      5 }	 t
        j                  dk(  r8t        j                  |j                         t        j                  d       d  n6t        j                  |j                         t        j                         d  t
        j                  dk(  r4t        j                  |j                         t        j                  d       n2t        j                  |j                         t        j                         |j!                          	 d d d        y # t
        j                  dk(  r4t        j                  |j                         t        j                  d       n2t        j                  |j                         t        j                         |j!                          w xY w# 1 sw Y   y xY ww)NTEMP_DIRlockfilewr@   rH   )r   rJ  pathjoinr   sysplatformmsvcrtlockingfilenoLK_RLCKfcntlflockLOCK_EXLK_UNLCKLOCK_UNr   )rN  rO  lfs      rX   _lockr_    s(    zz*%Hww||Hj1H	h	 	||w&ryy{FNNA>BIIK7||w&ryy{FOOQ?BIIK7HHJ  ||w&ryy{FOOQ?BIIK7HHJ s9   A G2G&BEB	G&	G2BG##G&&G/+G2c               #     K   t        j                         dk(  r-t        j                         \  } }t	        j
                  |        nd }|g}t        j                  |       |d   }	 | t        j                         dk(  rt	        j                  |       y y # t        j                         dk(  rt	        j                  |       w w xY wwNr   )r1  get_rankr   mkstempr   r   broadcast_object_listr   )fdr   object_lists      rX   _rank_temp_filerg    s     }}!##%D
&K{+q>D
==?aIIdO  4==?aIIdO  s   A$C'B +.C/CCc                     || }|(t        j                  | | | |      j                  |      S t        j                  | | | |      j                  |      j                  |      S Ndtype)rS   emptyfill_cuda)sizerq   rk  	device_ids       rX   _build_tensorrq    s]    }{{4t59??FF{{4t59??FKKIVVrY   c                     || }t        j                  t        |       D cg c]  }| c}|      j                  |      S c c}w )N)ro  rk  )rS   rl  rangerm  )r   dim_sizerq   rk  _s        rX   _build_multidim_tensorrv    s:    };;uSz:!X:%HNNuUU:s   	Ac                  V    t         j                  j                  j                  d      S NT)record_shapes)rS   autogradr   r   ry   rY   rX   _create_autograd_profilerr{    s     >>""***>>rY   c                      t         j                  j                  t         j                  j                  j                  gd      S NT)
activitiesry  )rS   r   r   r    CPUry   rY   rX   _create_torch_profilerr    s;    >>!!NN++//
 	 "  rY   c                   2    e Zd ZdZed        Zedd       Zy)Barrierr   c                    d| _         t        j                  j                  t        j                  d   d      }t        j
                  |      D ]5  }t        j                  t        j                  j                  ||             7 y )Nr   rN  barrier)
barrier_idr   rQ  rR  rJ  listdirunlink)clsbarrier_dirf_names      rX   initzBarrier.init  sY    ggll2::j#99Ejj- 	9FIIbggll;78	9rY   Nc                    |t        j                         }| xj                  dz  c_        t        j                  j                  t        j                  d   d      }t        t        j                               }t        j                  j                  ||      }t               5  t        |d      5 }|j                  t        | j                               d d d        d d d        t        j                         }	 d}t               5  t        j                  |      D ]a  }	t        t        j                  j                  ||	            5 }|j                         }
t        |
      | j                  k\  r|dz  }d d d        c 	 d d d        ||k(  ry t        j                         |z
  |kD  rt!        d      t        j"                  d       # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   nxY w)NrH   rN  r  rP  r   zbarrier timeout皙?)r1  r   r  r   rQ  rR  rJ  r   getpidr_  r   writetimer  readr   RuntimeErrorsleep)r  wait_fortimeoutr  pidbarrier_filer   
start_timearrivedr  datas              rX   synczBarrier.sync  s   **,H!ggll2::j#99E"))+ww||K5W 	-lC( -ACNN+,-	- YY[
G ) jj5 )Fbggll;?@ )A vvxt96#qLG) ))) ("yy{Z''1"#455JJsO 	- -	- 	-) )) )sI   $G1%GGAG/	.G#7
G/G	GG #G,(G//G8NrE   )rc   rd   re   r  classmethodr  r  ry   rY   rX   r  r    s-    J9 9  rY   r  c                        e Zd Ze fd       Z fdZ fdZed        Zede	fd       Z
ed        Zed        Z xZS )	TestDistBackendc                 b    t        t              t        j                  d<   t        |           y )NMASTER_ADDR)r   r  r   rJ  rL   
setUpClass)r  rW   s    rX   r  zTestDistBackend.setUpClass6  s!    $'$4

=!rY   c                     t         |           t                t        j	                          | j
                  j                  g| _        y rk   )rL   setUpr'   r  r  r-  __wrapped__skip_return_code_checksrU   s    rX   r  zTestDistBackend.setUp<  s4    #% )-(C(C(O(O'P$rY   c                 6    t                t        | 	          y rk   )r(   rL   tearDownrU   s    rX   r  zTestDistBackend.tearDownF  s    rY   c                 *    t          | j                   S rk   )r:   	file_namerV   s    rX   init_methodzTestDistBackend.init_methodJ  s    t~~.//rY   ra   c                      yNFry   r  s    rX   destroy_pg_upon_exitz$TestDistBackend.destroy_pg_upon_exitN  s     rY   c                    t         dk(  rDt        j                  j                         s&t	        j
                  t        d   j                          | |      }||_        ||_	        t        j                  j                         rgt        j                  j                         t        |j                        k  r3t	        j
                  t        d|j                      j                         	 t        j                  |t              }t!        |      }t#        j$                  |j&                  t         t        |j                        |j                  |       |j-                          |j/                  ||       |j-                          t#        j0                          t	        j
                  d       y # t(        $ r=}	d|	j*                  d   v r&t	        j
                  t        d   j                          d }	~	ww xY w)	Nncclno_cudaz
multi-gpu-secondsr  r=  rK  r  r  	recompiler   backend_unavailable)r   rS   rn  is_availablerS  exitr%   	exit_coder  r  device_countr   rK  CUSTOM_PG_TIMEOUTr   default_pg_timeoutr	   r1  init_process_groupr  r  args_barrierrun_testdestroy_process_group)
r  r  r(  r  pipekwargsrV   pg_timeout_secondsr  r   s
             rX   _runzTestDistBackend._runS  si   fUZZ%<%<%>HHZ	*4459~	"::""$)@)@)BSOOF
 *
 HHZ*T__,= >?IIJ	!2!6!6yBT!U(:;G## ,,t/YY  	i&""$  	affQi'$9:DDE		s   *A+F1 1	G7:8G22G7c                 (    t         j                  d   S )NrG  r   rJ  r  s    rX   rK  zTestDistBackend.world_size{  s    zz,''rY   )rc   rd   re   r  r  r  r  propertyr  r   r  r  rK  rf   rg   s   @rX   r  r  5  sx     
Q 0 0 d   # #N ( (rY   r  c                        e Zd Z G d d      Zy)DistributedTestc                   <T   e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Z eedk7  d       eej+                  d       d      d               Ze eedk7  d      d               Z eedk7  d      d        Z eeej6                  d   vde d       ed       ed      d                      Z eeej6                  d   vde d      ed               Z  eeej6                  d   vde d       ed       ed      d                      Z! eeej6                  d   vde d       ed       ed      d                      Z" eeej6                  d   vde d       ed       ed      d                       Z# eeej6                  d   vde d      ed!               Z$ eeej6                  d   vde d       ed       ed      d"                      Z% eeej6                  d   vde d       ed      d#               Z& eeej6                  d   vde d       ed      d$               Z' ed      d%        Z( eeej6                  d   vde d       ed      d&               Z) eeej6                  d   vde d       ed       ed      d'                      Z*e eed(k7  xs e+xs e,d)      d*               Z-e eed(k7  xs e+xs e,d)      d+               Z.e eed(k7  d,       e/d-d.      d/                      Z0e eed(k7  d,       e/d-d.      d0                      Z1e eed(k7  d,       e/d-d.      d1                      Z2ee eed(k7  d,       e/d-d.      d2                             Z3 eedk7  d3      d4        Z4 eedk7  d3      d5        Z5 eed(k7  d,       e/d-d.      d6               Z6 eed(k7  d,       e/d-d.      d7               Z7 eed(k7  d,       e/d-d.      d8               Z8e eed(k7  d9       e/d-d.      dd;                     Z9e eed(k7  d9       e/d-d.      d<                      Z:e eed(k7  d9       e/d-d.      d=                      Z;e eed(k7  d9       e/d-d.       ee+d>       ee<xs e=d?      d@                                    Z>dA Z? eed(k(  dB      dC        Z@ eed(k(  dD      dE        ZA eed(k(  dD       ee+d>       ee<xs e=d?      dF                      ZBdG ZC eeej                  dH   v e dI      dJ        ZE eeej                  dH   v e dI      dK        ZF eeej                  dH   v e dI       ee+dL       ee<xs e=d?      dM                      ZGdN ZH eed(k(  dD      dO        ZI eed(k(  dD      dP        ZJ eed(k(  dD       ee+dL       ee<xs e=d?      dQ                      ZKdR ZL eed(k(  dS      dT        ZM eed(k(  dS      dU        ZN eed(k(  dS       ee+dL       ee<xs e=d?      dV                      ZO eed(k(  dW      dX        ZP	 	 	 ddZZQ eed(k(  d[      d\        ZR eedk7  xr ed(k7  d]      ed^               ZSe eed(k(  d[      d_               ZT eed(k(  d[      d`        ZU eed(k7  da      edb               ZV	 	 ddcZW eed(k(  d[       eeej                  dd   v e de      df               ZX eed(k7  dg       eeej                  dd   v e de      edh                      ZY eed(k(  d[       eeej                  dd   v e de      di               ZZ eed(k(  d[       eeej                  dd   v e de      dj               Z[ eed(k(  d[       eeej                  dd   v e de      dk               Z\ eed(k(  d[       eeej                  dd   v e de      edl                      Z] eed(k(  d[       eeej                  dd   v e de      edm                      Z^ eed(k(  d[       eeej                  dd   v e de      edn                      Z_ eed(k(  d[       eeej                  dd   v e de      edo                      Z` eed(k(  d[       eeej                  dd   v e de      dp               Za eed(k(  d[       eeej                  dd   v e de      dq               Zb eed(k(  d[       eeej                  dd   v e de      dr               Zc eed(k(  d[       eeej                  dd   v e de      ds               Zd	 	 ddtZe eed(k(  d[       eeej                  dd   v e de      du               Zf eed(k7  dg       eeej                  dd   v e de      edv                      Zg eed(k7  dw       eeej                  dd   v e de      edx                      Zh	 ddzZi eed(k7  d{      ed|               Zjdyd:dYd:d}d~ZkdYd:elj                  dYfdZn eed(k(  d[      d        Zo eed(k(  d[      d        Zp eedk7  xr ed(k7  d      ed               Zq eedk7  xr ed(k7  d      ed               Zr eed(k(  d[      d        Zs eed(k(  d[      d        Zt eedk7  xr ed(k7  d      ed               Zu eed(k(  d[      d        Zv eed(k(  d[      d        Zw eed(k(  d[      d        Zxe eed(k(  d[      d               Zye eed(k(  d[      d               Zze eed(k(  d[      d               Z{e eed(k(  d[      d               Z| eed(k(  d[      d        Z} eed(k(  d[      d        Z~ eed(k(  d[      d        Z eed(k(  d[      d        Zd Z eedk7  d      d        Z eedk7  d      ed               Zed        Zed        Zed        Zed        Z eed(k(  d[      d        Z	 	 ddZ edh      d        Z edh      d        Z edh      d        Z edh      d        Ze edh      d               Ze edh      d               Ze edh      d               Ze edh      d               Z edh      d        Z edh      d        Z edh      d        Z edh      d        ZdYd:elj                  fdZ eed(k(  d[       eedk(  d      d               Z eed(k(  d[       eedk(  d      d               Z eed(k7  d      ed               Z eed(k(  d[       eedk(  d      d               Z eed(k7  d      ed               Z eed(k(  d[       eedk(  d      ed                      Z eed(k(  d[       eedk(  d      d               Z	 ddZ eed(k(  d[       eedk(  d      d               Z eed(k(  d[       eedk(  d      d               Z eed(k7  d      ed               Z eed(k(  d[       eedk(  d      ed                      Z eed(k(  d[       eedk(  d      d               ZdYd:elj                  fdZ eed(k(  d[      d        Z eed(k7  d      ed               Z eed(k(  d[      d        Z eed(k7  d      ed               Ze eed(k(  d[      d               Z eed(k(  d[      d        Z eed(k7  d«      edÄ               Z	 ddĄZ eed(k7  dū      edƄ               Z eed(k7  dū      edǄ               ZdȄ Zelj                  fdɄZ eeej                  d   v e d˝      d̄        Z eeej                  d   v e d˝      d̈́        Ze eeej                  d   v e d˝      d΄               Z eeej                  d   v e d˝      dτ        Z eeej                  d   v e d˝      dЄ        ZdYd:elj                  fdфZdYd:elj                  fd҄ZdYd:elj                  fdӄZ eedk7  dի      dք        Z eed(k7  d׫      ed؄               Z eedk7  dի      dل        Z eed(k7  d׫      edڄ               Z eedk7  dի      dۄ        Z eed(k7  d׫      ed܄               Z eedk7  dի      d݄        Z eed(k7  d׫      edބ               Z eedk7  d߫      d        Z eed(k7  d      ed               Z eedk7  d߫      d        Z eed(k7  d      ed               Z eedk7  dի      ed               Z eed(k7  d׫      eed                      Z eedk7  dի      ed               Z eed(k7  d׫      eed                      Z eedk7  d߫      ed               Z eed(k7  d׫      eed                      Z eedk7  dի      d        Z eed(k7  d׫      ed               Z eedk7  dի      d        Z eed(k7  d׫      ed               Z eedk7  d߫      d        Z eed(k7  d      ed               Z	 ddZe eedk(  d       eedk(  xr e,d      d                      Zee eedk(  d      d                      Zee eedk(  d      d                      Z eeej                  d   v e d      d        Ze eeej                  d   v e d      d               Z eeej                  d   v e d      d        Zd Zd Zd Z	 ddZސd  Z	 	 	 	 	 ddZ	 	 	 	 ddZddZ eed(k(  d      d        Z eed(k(  d      d        Z eeej6                  d   vde d      d	        Z eeej6                  d   vde d       e eej                  d
               d               Z eed(k(  d      d        Z eeej6                  d   vde d       e eej                  d
               d               Z eeej6                  d   vde d       e eej                  d
               d               Zd Z	  ed      d        Z	 ddZ ed      d        Z ed      d        Z ed      d        ZdefdZ ed      d        Zd Z ed      d        Z ed      d        Z ed      d        Z ed      d        Zdd Z eeej6                  d   vde d       e eej                  d
               d!               Z eeej6                  d   vde d       e eej                  d
               d"               Z eeej6                  d   vde d       e eej                  d
               d#               Z eeej6                  d   vde d       e eej                  d
               d$               Z	 dd%Z 	 dd&Z	 dÐd'Z eedk7  xr ed(k7  xr edk7  d(       eed      d)               Z eedk7  xr ed(k7  xr edk7  d(       eed      d*               Z eedk7  xr ed(k7  xr edk7  d(       eed      d+               Z eedk7  xr ed(k7  xr edk7  d(       eed      d,               Z eedk7  xr ed(k7  xr edk7  d(       eed      d-               Z eeej6                  d   vde d      ed.               Z	dd/Z
 eeej6                  d   vde d      ed0               Z	 	 dĐd1Zd2 Zd3 Zd4 Zd5 Zd6 Z ed       eeej6                  d   vde d      d7               Z ed       eeej6                  d   vde d      d8               Zd9 Z ed      e eeej6                  d   vde d      d:                      Z ed      e eeej6                  d   vde d      d;                      Z ed       eeej6                  d   vde d      d<               Z eeej6                  d   vde d      ed=               Zd> Z eeej6                  d   vde d      ed?               Z eeej6                  d   vde d      ed@               Z eeej6                  d   vde d      edA               Z eeej6                  d   vde d      e ed      dB                      Z eeej6                  d   vde d      edC               Z eeej6                  d   vde d      edD               Z  eeej6                  d   vde d      edE               Z!dF Z" eed(k(  d      dG        Z# eeej6                  d   vde d      edH               Z$ eed(k(  d      dI        Z%e&dJ        Z'e(jR                  d:fdKZ* ed(h       ed      dL               Z+ ed(h       ed      dM               Z, ed(h       e eej                  d
               dN               Z- ed(h       ed      dO               Z. eeej6                  d   vde d       e eej                  d
               dP               Z/ddQZ0 eej6                  dR          e1 eej                  d
         ej                  dS          e2g dTU      dV                      Z3 eej6                  dR          e1 eej                  d
         ej                  dS          e2g dWU      dX                      Z4ddYZ5 eedk(  d       eej6                  dR          e2g dWU      dZ                      Z6 eedk(  d       eej6                  dR          e2g dWU      d[                      Z7d\ Z8 ed       eeej6                  d   vde d      d]               Z9 ed       eeej6                  d   vde d      d^               Z:dd_Z; eej6                  dR          ed       e<d`      da                      Z= eej6                  dR          ed       ee+dL       ee<xs e=d?      db                             Z>dce?dd:fddZ@ eej6                  dR          ed       ee+dL       ee<xs e=d?       eAj                  ed(k7  de      df                                    ZC ed       eeej6                  d   vde d      dg               ZDdh ZE ed       eeej6                  d   vde d      di               ZF ed       eeej6                  d   vde d      dj               ZG ed       eeej6                  d   vde d      dk               ZH ed       eeej6                  d   vde d      dl               ZIddmZJ eej6                  dR          e1 eej                  d
         ej                  dS          e2dngU       eAj                  do      dp                             ZL eej6                  dR          e1 eej                  d
         ej                  dS          e2dngU      dq                      ZMddrZN eej6                  dR          ed      ds               ZO e2g dTU       eej6                  dR          ed      dt                      ZP eej6                  dR          ed      du               ZQ eej6                  dR          ed      dv               ZR eej6                  dR          ed      dw               ZS edh      dx        ZT e2g dTU       eej6                  dR          ed      dy                      ZU eej6                  dR          ed      dz               ZV e2g dTU       eej6                  dR          ed      d{                      ZW edh      d|        ZXd} ZYd~ ZZ eej6                  dR          ed      d               Z[ eej6                  dR          ed      d               Z\	 ddZ]d Z^ eej6                  dR          eedk(  xr e,d       ed      d                      Z_ eej6                  dR          eedk(  xr e,d       ed      d                      Z`d Za eej6                  dR          eedk(  xr e,d       ed      d                      Zb eej6                  dR          eedk(  xr e,d       ed      d                      Zcd Zd eeej6                  d   vde d       ed      d               Ze eeej6                  d   vde d       ed      d               Zf eeej6                  d   vde d       ed      d               Zg	 dŐdZh eej6                  dR          ed      d               Zi edh       ee<xs e=d      d               Zj edh      d        Zkd Zlem eej6                  dR          e eej                  d
               d                      Znem eej6                  dR          e eej                  d
               d                      Zo edh      d        Zp edh      e ee<xs e=d      d                      Zq edh      ed               Zr eej6                  dR          e2dgU       ed      d                      Zs eeej6                  d   vde d       e2dgU       ed      d                      Ztd Zu e2g dTU       eej6                  dR          ed      d                      Zv e2g dTU       eej6                  dR          ed      d                      Zw eeej6                  d   vde d       ed      d               Zx eeej6                  d   vde d       ed       eAj                  do      d                      Zy ed       eeej6                  d   vde d      d               Zz ed       eeej6                  d   vde d      d               Z{ ed       eeej6                  d   vde d      d               Z| ed       eeej6                  d   vde d      d               Z}d Z~ ed       eeej6                  d   vde d      d               Z ed       eeej6                  d   vde d      d               Zd Z ed       eeej6                  d   vde d      d               Z ed       eeej6                  d   vde d      d               Z ed       eeej6                  d   vde d      d               Z ed       eeej6                  d   vde d      d               Z ed       eeej6                  d   vde d       eAj                  d      d                      Z	  ed       eeej6                  d   vde d      d               Z ed       eed(k7  xr edk7  d      d               Z ed       eed(k7  xr edk7  d      d               Z ed       eeej6                  d   vde d      d               Z eej6                  dR          ed      d               Zd Z eeej6                  d   vde d       e eej                  d
                edyd      d                      Z eej6                  dR          ed      d               Z ed       ed       eeej6                  d   vde d      d                      Z ed       ed       eeej6                  d   vde d      d                      Zy:(  DistributedTest._DistTestBasec                 .    t        j                  |i | y rk   )r  r  )rV   r  r  s      rX   r  z&DistributedTest._DistTestBase._barrier  s    LL$)&)rY   c                 z    ddg}t        j                  |fi |}t        j                         }||vrg d |fS |||fS )NrH   rJ   )r1  	new_grouprb  rV   r  groupgroup_idr  s        rX   _init_group_testz.DistributedTest._DistTestBase._init_group_test  sK    FE~~e6v6H==?D5 D$''8T**rY   c                     t        t        dt        j                                     }t        j                  di |}t        j
                         }|||fS )Nr   ry   )listrs  r1  r   r  rb  r  s        rX   _init_full_group_testz3DistributedTest._DistTestBase._init_full_group_test  sE    q$"5"5"789E~~//H==?D8T**rY   c                     t        t        dt        j                                     }t        j                  j
                  }t        j                         }|||fS ra  )r  rs  r1  r   r  WORLDrb  rV   r  r  r  s       rX   _init_global_testz/DistributedTest._DistTestBase._init_global_test  sC    q$"5"5"789Ezz''H==?D8T**rY   c                    t        |j                  j                               }|j                  j                         D ]  \  }}| j                  |||           t	        |j                               }t	        |j                               }t        ||      D ]  \  }}	t        t        j                               D 
cg c]  }
t        j                  |       }}
t        j                  ||       t        t        j                               D 
cg c]  }
t        j                  |	       }}
|D ]  }| j                  ||        t        j                  ||	       |D ]  }| j                  ||	         y c c}
w c c}
w rk   )dictr   named_buffersassertEqualr  buffersziprs  r1  r   rS   
empty_like
all_gather)rV   m1m2m1_buf_dictr   buf
m1_buffers
m2_buffersbuf1buf2ru  gathered_bufsgathered_bufs_m2rQ   s                 rX   _verify_buffers_equalz3DistributedTest._DistTestBase._verify_buffers_equal  sZ   ryy6689KYY446 9	c  k$&789 bjjl+Jbjjl+J #J
 ; .t49$:M:M:O4P!/0E$$T*! ! t449$:M:M:O4P$/0E$$T*$  $ ' .A$$Q-. 0$7) .A$$Q-..!$s   >E:E?c                    t        t              }|D ]H  }|j                  di       }|j                  dd      }| j                  |d       | j                  |j                  dd      d       ||   j	                  |       |dv ru| j                  |d   d       | j                  |d   d       | j                  |j                  d	d
      d       | j                  |j                  dd
      d       | j                  |j                  dd
      d       | j                  |j                  dd
      d       | j                  |j                  dd
      d       K |S )zTorch profiler includes nccl metadata in an inserted operator called "record_param_comms"
            We test for basic fields in this profiler event that correspond to the nccl communication
            collectivesr  zCollective name rk  >   waitzProcess Group Description
default_pgzProcess Group RankszIn msg nelemsr   r   Out msg nelemsz
Group sizezGlobal rank startzGlobal rank stride)r   r  r   assertNotEqualappendr  assertGreaterEqual)rV   nccl_meta_eventsper_coll_metar   r  collnames         rX    _sanity_check_profiler_nccl_metaz>DistributedTest._DistTestBase._sanity_check_profiler_nccl_meta  sG    (-M% OuuVR(88$5r:##Hb1##DHHWb$92>h'..t4x'  &A!BLQ##D)>$?D''"(EqI''1A2(FJ''r(BAF''1Db(I1M''1Er(JAN#O( ! rY   c                 2   t               5 \  }}t                |j                         j                         }d d d        d }g d}|D ]  } ||      }| j	                  |        g d}|D ]  } ||      }| j                  |        y # 1 sw Y   WxY w)Nc                 `    d|  d| t         j                  v rt         j                  |     S d S )Nzenv:=N/Ar  vars    rX   format_linezRDistributedTest._DistTestBase.test_dump_DDP_relevant_env_vars.<locals>.format_line  s4    cU!sbjj7HBJJsO#TUUe#TUUrY   )r  MASTER_PORTrG  NCCL_TOPO_DUMP_FILETORCH_NCCL_ASYNC_ERROR_HANDLING)xxxyyyzzz)r2   r"   getvalue
splitlinesassertInassertNotIn)rV   outru  linesr  varsr   lines           rX   test_dump_DDP_relevant_env_varsz=DistributedTest._DistTestBase.test_dump_DDP_relevant_env_vars  s     " 4hsA+-1134VD  +"3'dE*+D
  ."3'  u-.14 4s   ,BBc                    t         j                  j                  t         j                  d   d      }t	        t        j
                               }t        j                         }t        t         j                  j                  ||      d      5 }|j                  t	        t        j                                      d d d        | j                          t               }t        j                  |      D ]\  }t        t         j                  j                  ||            5 }|j                  t        |j!                                      d d d        ^ | j#                  t%        |      |       | j                          t        j                         dk(  rMt        j                  |      D ]5  }t        j&                  t         j                  j                  ||             7 | j                          y # 1 sw Y   8xY w# 1 sw Y   xY w)NrN  test_dirrP  r   )r   rQ  rR  rJ  r   r  r1  r   r   r  rb  r  setr  addr   r  r  lenr  )rV   r  r  num_processesr   	all_ranksr  s          rX   test_get_rankz+DistributedTest._DistTestBase.test_get_rank  sb   ww||BJJz$:JGHbiik"C //1Mbggll8S137 .1DMMO,-. MMOI**X. 1"'',,x89 1QMM#affh-01 11 S^];MMO}}!# jj2 >FIIbggll8V<=> MMO#. .1 1s   -G0)G=0G:=H	c                    t        j                         dkD  rddg}nddg}t        j                  |      }t        j	                         }| j                  t        j                         |       t        j                         |v r&| j                  t        j                  |      |       y | j                  t        d      5  t        j                  |       d d d        y # 1 sw Y   y xY w)NrJ   rH   r   zInvalid process group specified)
r1  r   r  r   lowerr  get_backendrb  assertRaisesRegex
ValueError)rV   r  r  backend_strs       rX   test_get_backendz.DistributedTest._DistTestBase.test_get_backend  s    ""$q(AA~~e,H!--/KT--/=}}%'  !1!1(!;[I++ A / $$X./ / /s    CC(c                 h   t         j                         }| j                  t        j                  t         j                               |       | j                  t        j                  t               |       | j                  t              5  t        j                  d        d d d        | j                  t              5  t        j                  d       d d d        | j                  t              5  t        j                  dg       d d d        y # 1 sw Y   qxY w# 1 sw Y   JxY w# 1 sw Y   y xY w)Nrz   gloo)r   r  r  r1  r2  upperassertRaisesr  )rV   r=  s     rX   test_Backend_enum_classz5DistributedTest._DistTestBase.test_Backend_enum_class  s    mmoGT\\'--/:GDT\\'2G<"":. #T"#"":.  Q "":. 'fX&' '	# #   ' 's$   
D=D0D(DD%(D1c                     t        j                         dkD  rddg}nddg}t        j                  |      }| j                          t        j                  |       y )NrJ   rH   r   )r1  r   r  r  r  rV   r  r  s      rX   test_destroy_groupz0DistributedTest._DistTestBase.test_destroy_group  sJ    ""$q(AA~~e,HMMO&&x0rY   c                    t        j                         dkD  rddg}nddg}t        j                  |      }t        j                         |v r^| j	                  t        j                  |      d       | j                  t        j                  |      t        t        d            v        y | j	                  t        j                  |      d       | j	                  t        j                  |      d       y )NrJ   rH   r   r   )r1  r   r  rb  r  
assertTruer  rs  r&  s      rX   test_get_rank_size_groupz6DistributedTest._DistTestBase.test_get_rank_size_group(  s    ""$q(AA~~e,H}}%'  !4!4X!>Bh 74a> IJ  !4!4X!>C  x!8"=rY   c                 v    | j                         \  }}}| j                          t        j                  |       y rk   )r  r  r1  r  rV   ru  r  s      rX   test_destroy_full_groupz5DistributedTest._DistTestBase.test_destroy_full_group6  s-    !779NAxMMO&&x0rY   c                    | j                         \  }}}| j                  t        j                  |      t        j                                | j                  t        j                  |      t        j                                y rk   )r  r  r1  r   rb  r,  s      rX   test_get_rank_size_full_groupz;DistributedTest._DistTestBase.test_get_rank_size_full_group<  sY    !779NAxT00:D<O<O<QRT]]84dmmoFrY   c                    t        j                  |      }|dk(  rt        j                         |j                         z   }t        j                         t         j
                  j                  k(  r| j                  t        d      }n| j                  t        d      }|5  t        j                  |       d d d        | j                  t        j                         |d       y y # 1 sw Y   1xY w)Nr   zfailed to pass monitoredBarrierz (Timed out|closed|timeout) r  )delta)r1  rb  r  total_secondsget_debug_level
DebugLevelDETAILr  	Exceptionr  assertGreaterAlmostEqual)rV   r  r  
local_rankexpected_timeexception_ctxs         rX   _test_barrier_timeoutz3DistributedTest._DistTestBase._test_barrier_timeoutA  s    x0J Q $		g.C.C.E E '')T__-C-CC$($:$:!#D%M %)$:$:!#A%M # +LL*+--diik=PS-T	+ +s   C$$C-r!  z#Only gloo backend supports timeoutsfile://zRequires file:// initialization method. Both tcp:// and env:// rely on the TCP store for which reinitialization has proven racy.c                 |   t        j                          | j                  t        t        j
                  d                t        d      }t        j                  t        t        t        t        j
                  d         | j                  |       | j                  t         j                  j                  |       y )NrG  )r  rH   r  r  )r1  r  r  r   r   rJ  r	   r  r   r   r  r;  r  r  )rV   r  s     rX   test_barrier_timeout_globalz9DistributedTest._DistTestBase.test_barrier_timeout_globalW  s     &&( MM3rzz,'?#@MA  *G##'rzz,78YY &&tzz'7'7ArY   c                 r    t        d      }| j                  |      \  }}}|| j                  ||       y y )Nr   r  r  )r	   r  r;  rV   r  ru  r  s       rX   test_barrier_timeout_groupz8DistributedTest._DistTestBase.test_barrier_timeout_groupr  sB    
  *G!2272CNAx#**8W= $rY   c                 r    t        d      }| j                  |      \  }}}|| j                  ||       y y )NrH   r  r@  )r	   r  r;  rA  s       rX   test_barrier_timeout_full_groupz=DistributedTest._DistTestBase.test_barrier_timeout_full_group|  sB      *G!777HNAx#**8W= $rY   subgroupzThe z< backend does not support creating subgroups on CUDA devicesr{   rJ   c                 \   d}t        j                  |      \  }}t        j                         }| j                  |j	                         |       | j                  t        |      ||z         | j                  t        j                  |             |D ]  }t        j                  |        y r   )	r1  new_subgroupsr   r  ro  r  assertFalse_rank_not_in_groupr  )rV   subgroup_sizecur_subgroup	subgroupsrK  rE  s         rX   test_new_subgroupsz0DistributedTest._DistTestBase.test_new_subgroups  s     M&*&8&8&G#L),,.J\..0-@S^Z--GHT44\BC% 5**845rY   c                     | j                  t        d      5  t        j                  d       d d d        y # 1 sw Y   y xY w)Nzmust not exceedd   r  r  r1  rG  r  s    rX   0test_new_subgroups_group_size_exceeds_world_sizezNDistributedTest._DistTestBase.test_new_subgroups_group_size_exceeds_world_size  s8     ''
4EF (""3'( ( (   6?c                     | j                  t        d      5  t        j                  d       d d d        y # 1 sw Y   y xY w)Nz0The world size must be divisible by 'group_size'rz   rP  r  s    rX   9test_new_subgroups_world_size_not_divisible_by_group_sizezWDistributedTest._DistTestBase.test_new_subgroups_world_size_not_divisible_by_group_size  s=     ''N & ""1%& & &rR  c                    | j                         \  }}}t        t        j                         t              }||   d   }t        j
                  ddgddgg      \  }}|dk\  r| j                  |       np| j                  |j                         d       | j                  t        |      d       |dk(  s|dk(  r| j                  ||d          n| j                  ||d          |D ]  }t        j                  |        y )Nr   rJ   rH   rz   ranks_per_subgroup_listr{   )r  r&   r1  r   r   new_subgroups_by_enumerationassertIsNoner  ro  r  r  )	rV   _group	_group_idr  rank_to_GPUrp  rK  rL  rE  s	            rX   !test_new_subgroups_by_enumerationz?DistributedTest._DistTestBase.test_new_subgroups_by_enumeration  s     '+&<&<&>#FIt.t/B/B/DgNK#D)!,I&*&G&G*+Q!Q(8'#L) A~!!,/  !2!2!4a8  Y3>Y!^$$\9Q<@$$\9Q<@% 5**845rY   c                    | j                         \  }}}t        t        j                         t               t        |      }| j                  t        d      5  t        j                  ddg|dgg       d d d        y # 1 sw Y   y xY w)NNThe new group's rank should be within the world_size set by init_process_groupr   rH   rJ   rV  )r  r&   r1  r   r   r  r  rX  )rV   rZ  r  _rankrK  s        rX   ?test_new_subgroups_by_enumeration_input_rank_exceeds_world_sizez]DistributedTest._DistTestBase.test_new_subgroups_by_enumeration_input_rank_exceeds_world_size  s     '+&<&<&>#FHe !4!4!6@'1J''`  11./Vj!_,E	  s   A>>Bc                     | j                          | j                  t        d      5  t        j                  ddgddgg       d d d        y # 1 sw Y   y xY w)Nr_  r   rV  )r  r  r  r1  rX  r  s    rX   5test_new_subgroups_by_enumeration_negative_input_rankzSDistributedTest._DistTestBase.test_new_subgroups_by_enumeration_negative_input_rank  sY     ""$''`  11.0"XBx,@	  s   AAc                     | j                  t        d      5  t        j                  dgddgddgg       d d d        y # 1 sw Y   y xY w)Nz$Rank 1 has appeared in both subgroupr   rH   rJ   rz   rV  )r  r  r1  rX  r  s    rX   &test_new_subgroups_overlap_not_allowedzDDistributedTest._DistTestBase.test_new_subgroups_overlap_not_allowed  sQ     ''B  11./S1a&1a&,A  s	   ?Ac           	         t        j                         }t        t        j                         t              }||   d   }t        j                  t        j                  dddd      t        j                         t        j                  ddd            j                  |      }|j                         D ]&  }t        j                  |j                        |_        ( t        j                   |j                         d        |j                         D ];  }| j#                  |j                  t        j                  |j                               = |j                         D ])  }t        j                  |j                        |z  |_        + t        j$                  ddgd	
      }t        j                   |j                         |       t        j&                  |      sR|j                         D ]>  }| j#                  |j                  t        j                  |j                        dz         @ y |j                         D ]>  }| j#                  |j                  t        j                  |j                        |z         @ y )Nr   rz   rH   )kernel_sizepaddingr   FrF   )paramsprocess_groupr  ranksr=        ?)r1  rb  r&   r   r   rN   
SequentialConv2dr   rO   rn  
parametersrS   	ones_liker  model_averaging_utilsaverage_parametersr  r  rI  )rV   r  r\  rp  r   r   
group_nccls          rX   test_average_parametersz5DistributedTest._DistTestBase.test_average_parameters  s    ==?D.t/B/B/DgNK#D)!,IMM		!QAq9			!QU+ d9o	  %%' 101!44'') %%' B  )@AB %%' 80478q!ffEJ!44'') **:6))+ LA$$QVVU__QVV-Ds-JKL ))+ MA$$QVVU__QVV-Dt-KLMrY   c                    t        j                         }t        j                         }t        |t              }||   d   }t        j                  ddd      j                  |      }t        |j                               }t        j                  |j                        |z  }t        j                  |j                        t        t        |            z  |z  }d}	dD ]  }
t        j                   |	|
      }t        dd	      D ]  }t#        j$                  |      |_        |j                         D ]&  }t        j                  |j                        |_        ( |j)                  |j                                ||
k\  r(||
z
  |	z  dk(  r| j+                  |j                  |       | j+                  |j                  |         y )
Nr   rH   r   FrF   r{               periodwarmup_stepsr  )r1  rb  r   r&   r   rN   rO   rn  nextrs  rS   rt  r  r  rs  	averagersPeriodicModelAveragercopydeepcopygradrv  r  rV   r  rK  r\  rp  r   paramr   expected_avg_tensorr  r  averagersteprl  s                 rX   test_periodic_model_averagerz:DistributedTest._DistTestBase.test_periodic_model_averager   s    ==?D,,.J.z7CK#D)!,IIIa/44Y?E))+,E__UZZ047F

+c%
2C.DDzQ   F 0 =$::! "!RL =D!%v!6EJ"'"2"2"4 B&+ooejj&AB //0@0@0BC|+1D0NRS0S((5HI ((V<=	=rY   c                    t        j                         }t        j                         }t        |t              }||   d   }t        j                  ddd      j                  |      }t        |j                               }t        j                  j                  |j                         d      }d}d	D ]  }	t        j                  ||	
      }
t        dd      D ]v  }|j                   D ]Z  }|d   D ]P  }t        j"                  |j$                        |z  |_        t        j"                  |j$                        |z  |_        R \ |
j)                  |j                          ||	k\  r||	z
  |z  dk(  r{|j                   D ]j  }|d   D ]`  }|j&                  | j+                  |j$                  t        j"                  |j$                        t-        t        |            z  |z         b l |j                   D ]U  }|d   D ]K  }|j&                  | j+                  |j$                  t        j"                  |j$                        |z         M W y  y )Nr   rH   r   FrF   r  lrr{   rz  r  r  rl  )r1  rb  r   r&   r   rN   rO   rn  r  rs  rS   optimSGDr  r  rs  param_groupsrt  r  r  rv  r  r  )rV   r  rK  r\  rp  r   r  optr  r  r  r  param_grouprl  s                 rX   (test_periodic_model_averager_param_groupzFDistributedTest._DistTestBase.test_periodic_model_averager_param_groupC  s*   ==?D,,.J.z7CK#D)!,IIIa/44Y?E))+,E++//%"2"2"4/=CF 0 "$::! "!RL "D'*'7'7 M&1(&; MF*///%***E*LFK*///%***E*LFKMM
 //0@0@A|+1D0NRS0S+.+;+; 	"K*5h*? "#);;#6$, $ 0 0$)JJ$)OOEJJ$?&)%
*;&<%=&0%1!""	" ,/+;+; "K*5h*? "#);;#6$, $ 0 0$)JJ

0Kd0R!"""+"	"rY   c                    t        j                         }t        j                         }t        |t              }||   d   }t        j                  ddd      j                  |      }t        |j                               }t        j                  |j                        |z  }t        j                  |j                        t        t        |            z  |z  }d}	dD ]	  }
t        j                   t#        |	|fg      |
      }t%        j&                  |	|
	      }t        dd
      D ]  }t)        j*                  |      |_        |j                         D ]&  }t        j                  |j                        |_        ( |j/                  |j                                ||
k\  r(||
z
  |	z  dk(  r| j1                  |j                  |       | j1                  |j                  |         y )Nr   rH   r   FrF   r{   rz  period_group_size_dictr  r  r  )r1  rb  r   r&   r   rN   rO   rn  r  rs  rS   rt  r  r  rs  hierarchicalSGDHierarchicalModelAveragerr   r  r  r  r  r  rv  r  r  s                 rX   Ntest_1_level_hierarchical_model_averager_equivalent_to_periodic_model_averagerzlDistributedTest._DistTestBase.test_1_level_hierarchical_model_averager_equivalent_to_periodic_model_averagerp  s    ==?D,,.J.z7CK#D)!,IIIa/44Y?E))+,E__UZZ047F

+c%
2C.DDzQ   F 0 =*DD ,78L7M+N!-	 %::! "!RL =D!%v!6EJ"'"2"2"4 B&+ooejj&AB //0@0@0BC|+1D0NRS0S((5HI ((V<==rY   c                 *   t        j                         }t        j                         }t        |t              }||   d   }t        j                  ddd      j                  |      }t        |j                               }t        j                  |j                        |z  }d}d}	d}
d}d}d	}t        |
|	f||f||fg      }t        j                  ||
      }| j!                  t        j"                         t%        |             |j&                  |
   }|j&                  |   }t)        |      d   }t)        |      d   }||	z  |	z  t+        j,                  t/        t1        |	                  z   j3                         }||z  |z  t+        j,                  t/        t1        |                  z   j3                         }| j!                  ||       | j!                  ||       t        j                  |j                        t5        |      z  |	z  }t        j                  |j                        t5        |      z  |z  }t        j                  |j                        t5        t1        |            z  |z  }t1        dd      D ]  }t7        j8                  |      |_        |j                         D ]&  }t        j                  |j                        |_        ( |j=                  |j                                |dk(  s|dk(  r| j!                  |j                  |       |dk(  s|dk(  r| j!                  |j                  |       |dk(  s|dk(  s
|dk(  s|dk(  r| j!                  |j                  |       | j!                  |j                  |        y )Nr   rH   r   FrF   rE   rJ   r{      r  ro           r{  r  r}        )r1  rb  r   r&   r   rN   rO   rn  r  rs  rS   rt  r  r   r  r  r  get_pg_countr  period_process_group_dictr   nparrayr  rs  tolistr  r  r  r  rv  )rV   r  rK  r\  rp  r   r  r   r  subgroup_size1subgroup_avg_period1subgroup_size2subgroup_avg_period2global_avg_periodr  r  	subgroup1	subgroup2real_group_ranks_res1real_group_ranks_res2expect_group_ranks_res1expect_group_ranks_res2$expected_avg_tensor_within_subgroup1$expected_avg_tensor_within_subgroup2expected_global_avg_tensorr  rl  s                              rX   (test_3_level_hierarchical_model_averagerzFDistributedTest._DistTestBase.test_3_level_hierarchical_model_averager  sO    ==?D,,.J.z7CK#D)!,IIIa/44Y?E))+,E__UZZ047F LN#$ N#$  !%0)>:)>:&
3&" '@@'=LH T..0#6L2MN ::;OPI ::;OPI$29$=g$F!$29$=g$F! &7((4n 5678fh $
 &7((4n 5678fh $ 24KL24KL 

++,- ! 1 

++,- ! 1 

+c%
2C.DDzQ ' a 9!]]62
#..0 >F"'//%**"=FK> ++E,<,<,>?2:$$UZZ1KLRZ42:$$UZZ1UVRZ42:trz$$UZZ1UV $$UZZ8%9rY   r  zFCoalescing manager currently tests with NCCL only; internal test flakyc                    | j                          t        j                         }t        j                         }t	        |t
              }||   d   }t        j                  j                  |       d}d}t        |      D cg c]  }t        j                  ||       }}t        j                         5  t        |      D ]  }	t        j                  ||	           	 d d d        t        j                  ||z  |      }
t        j                  |
       t        |      D ]#  }	| j                  ||	   |
|	|z  |	dz   |z          % | j                          y c c}w # 1 sw Y   xY w)Nr   rJ   r  r  rH   )r  r1  rb  r   r&   r   rS   rn  
set_devicers  r   _coalescing_manager
all_reducer  )rV   r  rK  r\  rp  	num_collssize_per_collru  small_tensorsi
big_tensors              rX   test_coalescing_managerz5DistributedTest._DistTestBase.test_coalescing_manager  sD    MMO==?D,,.J.z7CK#D)!,IJJ!!),IMEJ9EU@A

=;M  ))+ 6y) 6AOOM!$4566 I$=iPJOOJ'9%   !!$q=0AE]3JK MMO#6 6s   E5)EE&c                    | j                          t        j                         }t        j                         }t	        |t
              }||   d   }t        j                  j                  |       d}d}t        |      D cg c]  }t        j                  ||       }}t        j                  d      5 }	t        |      D ]  }
t        j                  ||
           	 d d d        	j                          t        j                  ||z  |      }t        j                  |       t        |      D ]#  }
| j                  ||
   ||
|z  |
dz   |z          % | j                          y c c}w # 1 sw Y   xY w)Nr   rJ   r  r  T)	async_opsrH   )r  r1  rb  r   r&   r   rS   rn  r  rs  r   r  r  r  r  )rV   r  rK  r\  rp  r  r  ru  r  cmr  r  s               rX   test_coalescing_manager_asyncz;DistributedTest._DistTestBase.test_coalescing_manager_async  sQ    MMO==?D,,.J.z7CK#D)!,IJJ!!),IMEJ9EU@A

=;M  ))D9 6Ry) 6AOOM!$4566 GGII$=iPJOOJ'9%   !!$q=0AE]3JK MMO%6 6s   E*7)E//E8zNCCL Batch Send Recv Only)rJ      r   zNeed NCCL 2.7+ for send/recvc                 &   | j                          t        j                         }t        j                         }t	        |t
              }||   d   }t        j                  j                  |       g }t        |      D cg c]  }d  }}t        |      D cg c]  }d  }}dD ]P  }	|	t        j                  d<   t        d|      D ]  }
t        |dz   |      j                  |
      }t        |
dz   d|      j                  d      ||
<   t        |
dz   d|      j                  |      ||
<   t        j                  t        j                  ||
   |
      }|j!                  |       t        j                  t        j"                  ||
      }|j!                  |        t        j$                  |      }|D ]  }|j'                           t        d|      D ]  }
| j)                  ||
   ||
           S | j                          y c c}w c c}w )Nr   )10TORCH_NCCL_BLOCKING_WAITrH   rp  r   rq   rp  )r  r1  rb  r   r&   r   rS   rn  r  rs  r   rJ  rq  rm  P2POpirecvr  isendbatch_isend_irecvr  r  )rV   r  rK  r\  rp  p2p_op_listru  recv_tensorsexpected_tensorsvalsrcsend_tensorrecv_opsend_opreqsreqs                   rX   test_batch_isend_irecv_ncclz9DistributedTest._DistTestBase.test_batch_isend_irecv_nccl:  s    MMO==?D,,.J.z7CK#D)!,IJJ!!),K*/
*;<QD<L<.3J.?@@@! O9<

56 J/ 0C"/qI"N"T"T#K )6arY)eBi !% -:arY-eDk %S) #jj\#5FLG&&w/"jj[#FG&&w/0 --k: CHHJ !J/ OC$$\#%68H8MNO+O0 MMO7 =@s   ?	H		Hc                 F   | j                          t        j                         }t        j                         }t	        |t
              }||   d   }t        j                  j                  |       t        ||      }t        |d|      }t        j                  t        j                  ||dz   |z        }t        j                  t        j                  ||dz
  |z   |z        }t        j                  ||g      }	|	D ]  }
|
j                           | j                          y )Nr   r  r   r  rH   )r  r1  rb  r   r&   r   rS   rn  r  rq  r  r  r  r  r  )rV   r  rK  r\  rp  r  recv_tensorr  r  r  r  s              rX   )test_batch_isend_irecv_ring_exchange_ncclzGDistributedTest._DistTestBase.test_batch_isend_irecv_ring_exchange_ncclb  s     MMO==?D,,.J.z7CK#D)!,IJJ!!),'
iHK'
"	RKjj[4!8z:QRGjj

K$(Z*?:)MG ))7G*<=D 
 MMOrY   c                 h   | j                          t        j                          t        j                         }t	        t        j
                         t              }||   d   }g }|dk(  rt        |dz   |      }t        |dz   d|      }t        j                  t        j                  |d      }|j                  |       t        j                  t        j                  |d      }|j                  |       t        j                  |      }	|	D ]  }
|
j                           | j                          y )Nr   rH   r  r   r  )r  r1  r  rb  r&   r   r   rq  r  r  r  r  r  r  )rV   r  r\  rp  r  r  r  r  r  r  r  s              rX    test_batch_isend_irecv_self_ncclz>DistributedTest._DistTestBase.test_batch_isend_irecv_self_nccly  s     MMO LLN==?D.t/B/B/DgNK#D)!,IKqy+D1H	J+D1HB)T**TZZa@""7+**TZZa@""7+--k: CHHJ MMOrY   c                    | j                          t        j                          t        j                         }t	        t        j
                         t              }||   d   }t        j                  j                  |       g }|dk(  rd}n|dk(  rd}|dv rt        |dz   |      }t        dz   d|      }t        j                  t        j                  ||      }|j                  |       t        j                  t        j                  ||      }	|j                  |	       t        j                  |      }
|
D ]  }|j!                           | j                          y )Nr   rH   rJ   )rH   rJ   r  r   r  )r  r1  r  rb  r&   r   r   rS   rn  r  rq  r  r  r  r  r  r  )rV   r  r\  rp  r  peerr  r  r  r  r  r  s               rX   (test_batch_isend_irecv_no_rank_zero_ncclzFDistributedTest._DistTestBase.test_batch_isend_irecv_no_rank_zero_nccl  s   
 MMO LLN==?D.t/B/B/DgNK#D)!,IJJ!!),Kqyv~+D1H	J+D1HB)T**TZZdC""7+**TZZdC""7+--k: CHHJ MMOrY   zGLOO Batch Send Recv CPUc                 .   | j                          t        j                         }g }t        dt        j                               D ]  }||k(  r	t        |dz         }t        |dz   d      }t        j                  t        j                  ||      }|j                  |       t        j                  t        j                  ||      }|j                  |        t        j                  |      }|D ]  }	|	j                           | j                          y )Nr   rH   r   rq   r  r1  rb  rs  r   rq  r  r  r  r  r  r  
rV   r  r  r  r  r  r  r  r  r  s
             rX   test_batch_isend_irecv_glooz9DistributedTest._DistTestBase.test_batch_isend_irecv_gloo  s    MMO==?DKQ 3 3 56 ,$;+D1H5+C!G2>**TZZcB""7+**TZZcB""7+, ))+6D 
 MMOrY   c                 6   | j                          t        j                         }g }t        dt        j                               D ]  }||k(  r	t        |dz         }t        |dz   d      }t        j                  t        j                  |||      }|j                  |       t        j                  t        j                  |||      }|j                  |        t        j                  |      }|D ]  }	|	j                           | j                          y )Nr   rH   r   r  tagr  r  s
             rX    test_batch_isend_irecv_gloo_tagsz>DistributedTest._DistTestBase.test_batch_isend_irecv_gloo_tags  s    MMO==?DKQ 3 3 56 ,$;+D1H5+C!G2>**TZZcsK""7+**TZZctL""7+, ))+6D 
 MMOrY   c                    | j                          t        j                         }|dk(  rt        t        j                         t
              }||   d   }| j                  t        d      5  t        |dz   |      }t        j                  t        j                  |d      }t        j                  |g       d d d        y y # 1 sw Y   y xY w)Nr   z^Invalid ``op``rH   r  )r  r1  rb  r&   r   r   r  r  rq  r  	broadcastr  )rV   r  r\  rp  r  r  s         rX   test_batch_isend_irecv_op_errz;DistributedTest._DistTestBase.test_batch_isend_irecv_op_err  s     MMO==?Dqy243F3F3H'R'-a0	++J8IJ 6"/qI"NK"jjaHG**G956 6 6 6s   *AC  C	c                     | j                          t        j                         }|dk(  r7| j                  t        d      5  t        j
                  ddg       d d d        y y # 1 sw Y   y xY w)Nr   z^Invalid ``p2p_op_list``rH   rJ   )r  r1  rb  r  r  r  )rV   r  s     rX   "test_batch_isend_irecv_op_list_errz@DistributedTest._DistTestBase.test_batch_isend_irecv_op_list_err  sa     MMO==?Dqy++J8RS 3**Aq623 3 3 3s    A""A+c                 8   | j                          t        j                         }t        t        j                         t
               t        j                  ddgd      }t        j                  ddgd      }|dk(  r| j                  t        d      5  t        |dz         }t        j                  t        j                  |d|      }t        j                  t        j                  |d|      }t        j                  ||g       d d d        y y # 1 sw Y   y xY w)Nr   rH   r!  rn  r  z"All ops need to use the same group)r  r1  rb  r&   r   r   r  r  r  rq  r  r  r  )rV   r  
group_gloorw  r  send_op_gloosend_op_nccls          rX   (test_batch_isend_irecv_mixed_backend_errzFDistributedTest._DistTestBase.test_batch_isend_irecv_mixed_backend_err  s     MMO==?D !4!4!6@q!ffEJq!ffEJqy++ D I #0q"9K#'::djj+q*#UL#'::djj+q*#UL**L,+GHI I I Is   A2DDzNCCL Send Recv OnlyNc                    t        j                         }t        j                         }t        |t              }||   d   }t
        j                  j                  |       t        |dz   |      }||n	t               }|5 }t        d|      D ]|  }	|	|k(  r.t        d|      D ]  }
|
|k(  r	t        j                  ||
         6t        |	dz         }t        |	dz   d|      }t        j                  ||	       | j                  ||       ~ | j                          d d d        |t        j                         }|t         v r| d| dfD ]w  }t#        |d	      }| j%                  |       t        t        j                               D cg c]  }|dz   gd
z  g }}|D ]  }| j%                  |j&                  |v        ! y y y y # 1 sw Y   xY wc c}w )Nr   rH   r  r   r  :send:recvTr   rz   )r1  rb  r   r&   r   rS   rn  r  rq  r   rs  sendrecvr  r  r  &SEND_RECV_PROFILING_SUPPORTED_BACKENDSr   r)  input_shapes)rV   profiler_ctxr  rK  r\  rp  r   profiler_clsr   r  dstexpected_tensoroutput_tensorr=  r   r   expected_shapesr   s                     rX   _test_send_recv_ncclz2DistributedTest._DistTestBase._test_send_recv_nccl  s    ==?D,,.J.z7CK#D)!,IJJ!!),"4!8yAF+7+C<L   J/ ICd{#(J#7 3C"d{ ( IIfc23 +8a*@(5!G2) 		-5((HI  # & '**,DD*1%'8WIU:K&L 	S
!4Zae!f/ :?t?R?R?T9U+15dQhZ!^,+ + &, SE OOE,>,>/,QRS	S E ('   6+s    BG	G	Gc                 $    | j                          y rk   )r  r  s    rX   test_send_recv_ncclz1DistributedTest._DistTestBase.test_send_recv_nccl?  s     %%'rY   c                 z    t         j                  j                  j                  d      }| j	                  |       y rx  )rS   rz  r   r   r  rV   r  s     rX   %test_send_recv_nccl_autograd_profilerzCDistributedTest._DistTestBase.test_send_recv_nccl_autograd_profilerE  s/     !>>22:::NL%%l3rY   zKineto in fbcode causes hangzYtorch.profiler not enabled for mac/windows: https://github.com/pytorch/pytorch/pull/56124c                     t         j                  j                  t         j                  j                  j                  t         j                  j                  j
                  gd      }| j                  |       y r}  )rS   r   r   r    r  r   r  r	  s     rX   "test_send_recv_nccl_torch_profilerz@DistributedTest._DistTestBase.test_send_recv_nccl_torch_profilerL  s]     !>>11NN3377NN3388 # 2 L %%l3rY   c                    t        j                         }|dz   }t        |      }||n	t               }|5 }t	        dt        j
                               D ]  }||k(  r@t	        dt        j
                               D ]  }||k(  r	t        j                  ||         H|dz   }	t        |	      }
t        |	d      }t        j                  ||       | j                  ||
        	 d d d        |t        j                         }|t        v r| d| dfD ]  }t        |      }t        d |D              }t        j
                         dz
  }| j                  ||       t	        t        j
                               D cg c]  }|dz   gdz  g }}|D ]:  }| j                  |j                         | j                  |j                  |v        <  y y y # 1 sw Y   xY wc c}w )	NrH   r   r   r  r  r  c              3   4   K   | ]  }|j                     y wrk   count.0r   s     rX   	<genexpr>z@DistributedTest._DistTestBase._test_send_recv.<locals>.<genexpr>z       )Ba!'')B   rz   )r1  rb  rq  r   rs  r   r  r  r  r  r  r   r  r)  is_asyncr  )rV   r  r  	send_sizer   ctxr   r  r  	recv_sizer  r  r=  r   r   event_countexpected_event_countr  r   s                      rX   _test_send_recvz-DistributedTest._DistTestBase._test_send_recv_  s   ==?DqI"9-F".":,C I D$7$7$9: ICd{#(D,?,?,A#B 3C"d{ ( IIfc23 %(!G	*7	*B(5ir(J		-5((HII  '**,DD*1%'8WIU:K&L S
!4Z!F&))B6)B&B/3/B/B/Dq/H,((6JK :?t?R?R?T9U+15dQhZ!^,+ + &, SE OOENN; OOE,>,>/,QRSS E (!I I6+s   B.G=G Gz,Nccl send/recv tested by test_send_recv_ncclc                 (    | j                  d        y Nr  )r  r  s    rX   test_send_recvz,DistributedTest._DistTestBase.test_send_recv  s       d 3rY   z,NCCL send/recv tested by test_send_recv_ncclc                 <    t               }| j                  |       y r  )r{  r  rV   autograd_profiler_ctxs     rX    test_send_recv_autograd_profilerz>DistributedTest._DistTestBase.test_send_recv_autograd_profiler  s     %>$?!  .C DrY   c                 :    t               }| j                  |      S r  )r  r  rV   torch_profiler_ctxs     rX   test_send_recv_torch_profilerz;DistributedTest._DistTestBase.test_send_recv_torch_profiler  s"     "8!9''5G'HHrY   c           	      p   t        j                         }d}t        ||      }g }g }||n	t               }|5 }t	        dt        j
                               D ]  }	|	|k(  rt	        dt        j
                               D ]  }	|	|k(  r	dD ]  }
t        |d      }|
dk(  r't        j                  |      }|j                  |       nK|
dk(  rFt        j                  |      }|j                          |j                         }|j                  |       | j                  |j                        j                                  t        j                  ||	       t        j                  ||	        	 d d d        |!t        j                         }|t         v r| d| d	fD ]  }t#        |      }| j%                  t'        d
 |D              dt        j
                         dz
  z         |D ]>  }| j                  |j(                         | j%                  |j*                  |gdz  g       @  t-        j.                  t-        j0                  |      t-        j0                  |      fd      }t	        t        j
                               D cg c]  }t-        j2                  |       }}t        j4                  ||       g }|D ]  }||j7                         z  } ddlm} |j=                           ||      D cg c]  \  }}t?        tA        |             }}}| j%                  t        j
                         t?        |             | j%                  dt        j
                         dz
  z  gt        j
                         z  |       | jC                          y y # 1 sw Y   /xY wc c}w c c}}w )NrE   r  r   )r  r  r   r  r  r  z:recvAnySourcec              3   4   K   | ]  }|j                     y wrk   r  )r  r   s     rX   r  zKDistributedTest._DistTestBase._test_send_recv_any_source.<locals>.<genexpr>  s     @@r  rJ   rH   rz   )groupby)"r1  rb  rq  r   rs  r   r  r  r  r  _source_rankr)  rs   allr  r  r  r   r  r  r  r  rS   catr   r  r  r  	itertoolsr+  sortr  r  r  )rV   r  r  send_recv_sizer   
recv_ranksirecv_ranksr  r   r  r  r  senderworkr=  r   r   r   recv_ranks_tensorru  global_recv_ranksglobal_recv_ranks_listr+  keyr  	frequencys                             rX   _test_send_recv_any_sourcez8DistributedTest._DistTestBase._test_send_recv_any_source  sp   ==?DN">>FJK".":,C / D$7$7$9: /Cd{#(D,?,?,A#B PC"d{ ((9 P0=nTV0W#'6>-1YY}-EF$.$5$5f$=%)W_+/::m+DD$(IIK-1->->-@F$/$6$6v$>
 !%0@0@0H0L0L0N OP	P, 		&#.		&#.5//: '**,DD*1%'8WI^:T&U 	Y
!4Z!F((@@@!4!4!6!:; &, YE OOENN; ,,U-?-?>BRUVBVAWXY	Y %*II\\*-u||K/HI1%!
 #4#6#6#89% $$%67%! %  13DE)+&/ >F*fmmo=*> .&++-7>?U7V)3eCU$	    !4!4!6IG  $--/!3458K8K8MMy M (;/ /`%s   D:N N- N2 N*zsendrecv anysourcez+ does not support send/recv from any sourcec                 (    | j                  d        y r  )r;  r  s    rX   test_send_recv_any_sourcez7DistributedTest._DistTestBase.test_send_recv_any_source  s    
 +++>rY   c                 <    t               }| j                  |       y r  )r{  r;  r"  s     rX   +test_send_recv_any_source_autograd_profilerzIDistributedTest._DistTestBase.test_send_recv_any_source_autograd_profiler  s    
 %>$?!++9N+OrY   z!Kineto in fbcode code causes hangc                 :    t               }| j                  |      S r  )r  r;  r&  s     rX   (test_send_recv_any_source_torch_profilerzFDistributedTest._DistTestBase.test_send_recv_any_source_torch_profiler  s"     "8!922@R2SSrY   c                    t        j                         }t        j                         }d}t        ||      }||n	t	               }|5 }t        d|      D ]  }||k(  rkt        d|      D ][  }	|	|k(  r	t        |d      }
t        j                  |
|	|	       | j                  |
j                  |	      j                                ] st        j                  |||        	 d d d        |t        j                         }|t        v r| d| dfD ]  }t        |      }t        d |D              }t        j                         d	z
  }| j                  ||       |D ]Z  }| j                  |j                          | j                  |j"                  |       | j                  |j$                  |gd
z  g       \  y y y # 1 sw Y   xY w)NrE   r  r   r   r  r  r  c              3   4   K   | ]  }|j                     y wrk   r  r  s     rX   r  zIDistributedTest._DistTestBase._test_send_recv_with_tag.<locals>.<genexpr>$  r  r  rH   rz   )r1  rb  r   rq  r   rs  r  r)  rs   r-  r  r  r  r   r  r  r  r   r  )rV   r  r  rK  r1  r   r  r   r  r  r  r=  r   r   r  r  r   s                    rX   _test_send_recv_with_tagz6DistributedTest._DistTestBase._test_send_recv_with_tag
  s   ==?D,,.JN">>F".":,C 9 J/ 9Cd{#(J#7 IC"d{ (,9.PR,SM IImScB OOM,<,<S,A,E,E,GHI 		&#4899 '**,DD*1%'8WIU:K&L 	Y
!4Z!F&))B6)B&B/3/B/B/Dq/H,((6JK%+ YE OOENN; ,,UZZD ,,U-?-?>BRUVBVAWXY	Y E (9 9s   BGG
c                 (    | j                  d        y r  )rD  r  s    rX   test_send_recv_with_tagz5DistributedTest._DistTestBase.test_send_recv_with_tag,  s     ))t)<rY   c                 :    t               }| j                  |      S r  )r{  rD  r"  s     rX   )test_send_recv_with_tag_autograd_profilerzGDistributedTest._DistTestBase.test_send_recv_with_tag_autograd_profiler2  s"     %>$?!00>S0TTrY   c                 :    t               }| j                  |      S r  )r  rD  r&  s     rX   &test_send_recv_with_tag_torch_profilerzDDistributedTest._DistTestBase.test_send_recv_with_tag_torch_profiler9  s"     "8!900>P0QQrY   c           
         t        j                         }t        j                         }||n	t               }|5 }|dk(  rnt	        d|      D cg c]"  }t        j
                  t        |d      |      $ }}|D ]1  }|j                          | j                  |j                                3 n>t        |d      }	t        j                  |	d       | j                  |	t        |d             | j                          d d d        |;t        j                         }
|
t        v r|dk(  r|
 dn|
 d}t        |      }t!        d |D              }|dk(  rt        j                         dz
  nd}| j                  ||       t	        dt        j                               D ci c]
  }||gdz  g }}|D ]  }| j                  |j"                         | j                  |j$                  |       |dk(  r,| j                  |j&                  |j)                         v        k| j                  |j&                  ||           y y y c c}w # 1 sw Y   NxY wc c}w )	Nr   rH   rE   r   r  r  c              3   4   K   | ]  }|j                     y wrk   r  r  s     rX   r  z<DistributedTest._DistTestBase._test_isend.<locals>.<genexpr>a  s     %>!agg%>r  rz   )r1  rb  r   r   rs  r  rq  r  r)  is_completedr  r  r  r  r  r   r  r  r   r  values)rV   r  r  rK  r  r   destrequestsrequestr   r=  expected_event_namer   r  expected_countrr  r   s                     rX   _test_isendz)DistributedTest._DistTestBase._test_isendF  sC   ==?D,,.J".":,C  19 %*!Z$8   

=r#:DA H   $, @(<(<(>?@ +44FIIfa($$V]4-DE   '**,DD-1QY7)5)wiu<M ( 11DdKF"%%>v%>">KBF!)T%8%8%:Q%>QRN$$^[A /4At7J7J7L.M')*QC!G9'O ' "( X7((5HI19 OO % 2 2o6L6L6N N !,,U-?-?QUAVWX E (    8's$   I'I4BI IIIzNccl does not support isendc                 (    | j                  d        y r  )rU  r  s    rX   
test_isendz(DistributedTest._DistTestBase.test_isends  s     $/rY   c                 <    t               }| j                  |       y r  )r{  rU  r"  s     rX   test_isend_autograd_profilerz:DistributedTest._DistTestBase.test_isend_autograd_profilery  s     %>$?!*?@rY   c                 <    t               }| j                  |       y r  )r  rU  r&  s     rX   test_isend_torch_profilerz7DistributedTest._DistTestBase.test_isend_torch_profiler  s     "8!9*<=rY   zNccl does not support irecvc                 `   t        j                         }t        j                         }|dk(  rt        d|      D cg c]  }t	        |d       }}t        d|      D cg c]  }t        j
                  ||dz
     |        }}t        d|      D ]_  }||dz
     j                          | j                  ||dz
     j                                | j                  ||dz
     t	        |d             a n"t	        |d      }t        j                  |d       | j                          y c c}w c c}w )Nr   rH   r   rE   )r1  rb  r   rs  rq  r  r  r)  rM  r  r  r  )rV   r  rK  r  r  rP  r   s          rX   
test_irecvz(DistributedTest._DistTestBase.test_irecv  s&    ==?D,,.Jqy6;Az6J$/2M#r*$  $
  %Q
3 JJ/a8#> 
 !J/ XCS1W%**,OOHS1W$5$B$B$DE$$%5cAg%>cSU@VWX
 'tR0		&!$MMO!$s   D&#D+Fc           
         t         j                  ddft         j                  ddft         j                  ddft         j                  ddft         j
                  ddft         j                  ddft         j                  d	dffD ]  \  }}}	|	r|s|D ]  }
t        |
d
z   ||      }|r|j                  ||   d         }||
k(  rj|rCt        j                         }d|_        |
|_        | j                  dd|j                  |g|       w| j                  ddt        j                  ||
|       t        |
d
z   d|      }|r|j                  ||   d         }|rCt        j                         }d|_        |
|_        | j                  dd|j                  |g|       n$| j                  ddt        j                  ||
|       | j!                  |j#                         |j#                                | j!                  |j%                  |      j'                         t        j(                  d               | j+                          y )Ng|۽Fg0.+gTrc     g     jg  4&krH   r   
:broadcastr   )rS   floatdoublehalfint8uint8r   r   rq  rn  r1  BroadcastOptions
rootTensorrootRankcall_dist_opr  r  ro  nemaxr   r  )rV   r  r  r  rn  r\  with_optionsrk  rq   requires_cudar  r  optsr   s                 rX   _test_broadcast_helperz4DistributedTest._DistTestBase._test_broadcast_helper  s+    fe,w.T4(R'c5)D%(UE*0 ;+um !  0C&3C!GUE&JO*9*>*>{4?PQR?S*Ts{'#'#8#8#:D./DO,/DM -- , $ ( 2 2!0 1 $ !-- , % $ / # ( "/sQwE!B%+[[T1B11E%FF'#'#8#8#:D./DO,/DM -- ,dH4F4FRV !-- , % $ & # ( ((8L8L8NO(("IIo6::<ell5>Q]0;z MMOrY   z!Nccl does not support CPU tensorsc                 R    | j                         \  }}}| j                  |||       y rk   )r  ro  r  s       rX   test_broadcastz,DistributedTest._DistTestBase.test_broadcast  s+     %)$:$:$<!E8T''x>rY   z2Only Gloo and Nccl backend supports CUDA allReducec                     | j                         \  }}}t        t        j                         t              }||   d   }t
        j                  j                  |       | j                  |||d|       y )Nr   T)	r  r&   r1  r   r   rS   rn  r  ro  rV   r  r  r  r\  rp  s         rX   test_broadcast_cudaz1DistributedTest._DistTestBase.test_broadcast_cuda  se     %)$:$:$<!E8T.t/B/B/DgNK#D)!,IJJ!!),''xt[QrY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  ro  r  s       rX   test_broadcast_groupz2DistributedTest._DistTestBase.test_broadcast_group  s+    
 %)$9$9$;!E8T''x>rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  ro  r  s       rX   test_broadcast_full_groupz7DistributedTest._DistTestBase.test_broadcast_full_group  s+     %)$>$>$@!E8T''x>rY   z/Only NCCL backend supports high priority streamc                 J   | j                         \  }}}t        t        j                         t              }||   d   }t
        j                  j                  |       t        t        dz         }|t        j                  d<   t        j                  d|t        j                               }t        |      \  }}}	t        j                  ||      }t        j                  j!                         }
d|
_        t        j                  |||	|
      }| j%                  |||d|d       y )Nr   rH   r  r   FT)r  r&   r1  r   r   rS   rn  r  r   r  r   rJ  
rendezvousr  PrefixStoreProcessGroupNCCLOptionsis_high_priority_streamro  )rV   r  ru  r  r\  rp  new_portgen_iteratorstorero  rn  r  s               rX   test_nccl_high_priority_streamz<DistributedTest._DistTestBase.test_nccl_high_priority_stream  s     "335NE1d.t/B/B/DgNK#D)!,IJJ!!),;?+H(0BJJ}%??8T4;N;N;PQL $\ 2E4$$Xu5E((002D+0D(,,UD$EH''xt[RVWrY   c
                 Z   |D ]  }
t        |
dz         j                  ||
k(  r|n|      }|r|j                  |	|   d         }| j                  ddt        j
                  ||
|||j                  g       ||
k(  sx| j                  |t        |
dz   |              | j                          y )NrH   r   :reduceFtensor_shapes)	rq  rm  rn  ri  r1  r
   shaper  r  )rV   r  r  r  opmaster_valueworker_valueexpected_valuern  r\  r  r   s               rX   _test_reduce_helperz1DistributedTest._DistTestBase._test_reduce_helper*  s      U&sQw/55$(CKL\ #[[T):1)=>F!!KK#)<<. " 	 3;$$V]37N-ST#U& MMOrY   r
   z does not support reducec                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y NrJ   rE   rH   )r  r  r1  ReduceOpSUMr  r  s       rX   test_reduce_sumz-DistributedTest._DistTestBase.test_reduce_sumK  sX     %)$:$:$<!E8T$$!!R3u:>*+rY   zOnly Nccl supports CUDA reducec                 D   | j                         \  }}}t        t        j                         t              }||   d   }t
        j                  j                  |       | j                  |||t        j                  j                  ddddt        |      dz
  z  z   d|	       y Nr   rJ   rE   rH   T)r  r&   r1  r   r   rS   rn  r  r  r  r  r  rs  s         rX   test_reduce_sum_cudaz2DistributedTest._DistTestBase.test_reduce_sum_cuda^  s     %)$:$:$<!E8T.t/B/B/DgNK#D)!,IJJ!!),$$!!B#e*q.))
rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddt        t        j                  dgt        |      dz
  z  d             y r  )	r  r  r1  r  PRODUCTr
   operatormulr  r  s       rX   test_reduce_productz1DistributedTest._DistTestBase.test_reduce_productw  sb     %)$:$:$<!E8T$$%%x||bTSZ!^%<a@rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y Ni  rH   )r  r  r1  r  MINr  s       rX   test_reduce_minz-DistributedTest._DistTestBase.test_reduce_min  s@     %)$:$:$<!E8T$$xt}}'8'8$1rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y Nr   rE   )r  r  r1  r  MAXr  s       rX   test_reduce_maxz-DistributedTest._DistTestBase.test_reduce_max  s@     %)$:$:$<!E8T$$xt}}'8'8"b"rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y r  )r  r  r1  r  r  r  r  s       rX   test_reduce_group_sumz3DistributedTest._DistTestBase.test_reduce_group_sum  sX     %)$9$9$;!E8T$$!!R3u:>*+rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddt        t        j                  dgt        |      dz
  z  d             y r  )	r  r  r1  r  r  r
   r  r  r  r  s       rX   test_reduce_group_productz7DistributedTest._DistTestBase.test_reduce_group_product  sb     %)$9$9$;!E8T$$%%x||bTSZ!^%<a@rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_reduce_group_minz3DistributedTest._DistTestBase.test_reduce_group_min  s@     %)$9$9$;!E8T$$xt}}'8'8$1rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_reduce_group_maxz3DistributedTest._DistTestBase.test_reduce_group_max  s@     %)$9$9$;!E8T$$xt}}'8'8"b"rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y r  )r  r  r1  r  r  r  r  s       rX   test_reduce_full_group_sumz8DistributedTest._DistTestBase.test_reduce_full_group_sum  sX     %)$>$>$@!E8T$$!!R3u:>*+rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddt        t        j                  dgt        |      dz
  z  d             y r  )	r  r  r1  r  r  r
   r  r  r  r  s       rX   test_reduce_full_group_productz<DistributedTest._DistTestBase.test_reduce_full_group_product  sb     %)$>$>$@!E8T$$%%x||bTSZ!^%<a@rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_reduce_full_group_minz8DistributedTest._DistTestBase.test_reduce_full_group_min	  s@     %)$>$>$@!E8T$$xt}}'8'8$1rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_reduce_full_group_maxz8DistributedTest._DistTestBase.test_reduce_full_group_max	  s@     %)$>$>$@!E8T$$xt}}'8'8"b"rY   c
                    |D ]  t        d      D 
cg c]&  }
t        dz         j                  |k(  r|n|      ( c}
|r-t        d      D ]  }
|
   j                  |	|   d         |
<   ! | j	                  ddt
        j                  d   fdd   j                  g	       |k(  sD ]!  }| j                  |t        dz   |             #  | j                          y c c}
w )NrJ   rH   r   r  Fc                  :    t        j                  d          S r[   )r1  r
   )r  r  r  tensorss   rX   rB  zIDistributedTest._DistTestBase._test_reduce_twice_helper.<locals>.<lambda>G	  s    dkk
CX/ rY   )secondary_op_callr  )
rs  rq  rm  rn  ri  r1  r
   r  r  r  )rV   r  r  r  r  r  r  r  rn  r\  r  r   r  r  s     ` `       @@rX   _test_reduce_twice_helperz7DistributedTest._DistTestBase._test_reduce_twice_helper)	  s     Y
 #1X	  "#'*00(, "1X K%,QZ__[5Fq5I%J
K!!KKAJ' $+1:#3#3"4 "  3;") Y((sQw1WXY1Y6 MMO5s   +C2c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y r  )r  r  r1  r  r  r  r  s       rX   test_reduce_sum_twicez3DistributedTest._DistTestBase.test_reduce_sum_twiceR	  sX     %)$:$:$<!E8T**!!R3u:>*+rY   c                 D   | j                         \  }}}t        t        j                         t              }||   d   }t
        j                  j                  |       | j                  |||t        j                  j                  ddddt        |      dz
  z  z   d|	       y r  )r  r&   r1  r   r   rS   rn  r  r  r  r  r  rs  s         rX   test_reduce_sum_cuda_twicez8DistributedTest._DistTestBase.test_reduce_sum_cuda_twicee	  s     %)$:$:$<!E8T.t/B/B/DgNK#D)!,IJJ!!),**!!B#e*q.))
rY   z#Only Nccl supports reduce_scatter_vc           
         | j                          | j                         \  }}}t        t        j                         t
              }||   d   }|D cg c]  }|dz   	 }}t        |d |       }|||   z   }	t        |      }
d}d}dD ]=  }t        |
||      }|||	 j                  |       t        j                  ||   |
|
t        j                        j                  d      j                  |      }t        j                  |t        t        j                  ||            t        j                   j"                  ||      }|r|j%                          ddt'        |      dz
  z  z   }t        j                  ||   |
|
t        j                        }|j                  |      j                  |      }| j)                  ||       @ | j                          y c c}w )	Nr   rH   rJ   rE   TFr  rj  r   )r  r  r&   r1  r   r   r  rq  rm  rS   rl  ra  rn  reduce_scatterr  r$  r  r  r  r  r  )rV   r  r  r  r\  rp  r  input_split_sizes	start_lenend_lensum_lenr  r  	async_valr   
out_tensorr  r  r  s                      rX   test_reduce_scatter_v_cudaz8DistributedTest._DistTestBase.test_reduce_scatter_v_cuda~	  s    MMO$($:$:$<!E8T.t/B/B/DgNK#D)!,I49 :Sq : :-et45I"3D"99G+,GLL* >	&w	Ry)//=KK)$/' U2YT)_  ))V->?@MM%% HHJ!"bCJN&;!<"'++%d+WgU[[# #2"7"7"G"L"LY"W  _=7>8 MMOG !;s   GTc                    |r.|j                  ||   d         }|j                  ||   d         }|j                  g}| j                  ddt        j                  ||t        j
                  j                  |dd|
       |S )Nr   z:reduce_scatter_tensorFexpect_eventr  )rn  r  ri  r1  reduce_scatter_tensorr  r  rV   
tensor_out	tensor_inr  r  rn  r\  r  s           rX   _reduce_scatter_tensor_helperz;DistributedTest._DistTestBase._reduce_scatter_tensor_helper	  s     %NN;t+<Q+?@	'__[->q-AB
'--.M(**!!"+   rY   z-Only Nccl supports CUDA reduce_scatter_tensorc                 x   | j                         \  }}}t        t        j                         t              }d}t        j                  |t
        j                        }t        j                  t        |      |z        }| j                  ||||d|      }t        j                  ||z  |dz   |z        t        |      z  }| j                  ||       | j                          t        j                  |t        |      |f      }| j                  ||||d|      }| j                  ||       | j                          y )NrJ   rj  TrH   )r  r&   r1  r   r   rS   zerosint64aranger  r  r  r  r   )	rV   r  r  r  r\  ro  r  r  r  s	            rX   test_reduce_scatter_tensor_cudaz=DistributedTest._DistTestBase.test_reduce_scatter_tensor_cuda	  s   
 %)$:$:$<!E8T.t/B/B/DgNKDT=J SZ$%67I;;Ixt[J $ll4$;T8IJSQVZWOZ9MMO i#e*d1CDI;;Ixt[J
 Z9MMOrY   )r  r  profile_cudar  c          	        	 	fdg}
||
j                  |       t        j                  j                  j	                  |d      }|5  |
D cg c]	  } |        }}|r|D ]  }|j                           d d d        |rKt        j                         t        v r/t        t        j                         |z   |      }t        j                         t        j                  j                  k7  r$| j                  t        |      t        |
             |D ]  }| j                  |j                          | j                  |j"                  d       | j%                  |j&                  d       |Yt        j                         t        j                  j                  k7  s| j                  |j(                  |d|j(                   d|         y y y c c}w # 1 sw Y   ^xY w)Nc                        i S rk   ry   )r  r  r  s   rX   rB  z<DistributedTest._DistTestBase.call_dist_op.<locals>.<lambda>	  s    D 3F 3 rY   T)use_cudary  rH   r   zevent shape: z vs tensor )r  rS   rz  r   r   r  r1  r  PROFILING_SUPPORTED_BACKENDSr   r3  r4  r5  r  r  r)  r  r  r  cpu_timer  )rV   profiling_title_postfixr  r  r  r  r  r  r  r  op_callsr#  op_callworksr5  r   r   s      `    ``       rX   ri  z*DistributedTest._DistTestBase.call_dist_op	  s    44H , 12$)NN$;$;$C$C%T %D %! ' $2:;w;; % $		$$  0 0 26R R,$$&)@@BW
 '')T__-C-CC$$S[#h-@ AOOAJJ/$$QWWa0++AJJ:
 &1 002doo6L6LL((NN)+ANN+;;}oV !S| <$ $s   GGGGG c                 4   |D ]  }||k(  r|n|}t        |dz   |
      j                  |      }|r|j                  |	|   d         }|j                  t        j
                  k(  r!t	        j                  |      j                  g}n|j                  g}| j                  d|t        j                  |||||       |dk(  s|st        j                         t        v s| j                  d|t        j                  ||||d|	        | j                          y )NrH   rj  r   :all_reduce)async_opr  T)r  r  r  )rq  rm  rn  rk  rS   	complex64view_as_realr  ri  r1  r  r  !CUDA_PROFILING_SUPPORTED_BACKENDSr  )rV   r  r  r  r  r  r  r  rn  r\  rk  r  r  
curr_valuer   r  s                   rX   _test_all_reduce_helperz5DistributedTest._DistTestBase._test_all_reduce_helper
  s     &-1S[\l
&sQwe<BB:N#[[T):1)=>F<<5??2%*%7%7%?%E%E$FM%+\\NM!!!OO%"/ " 	 1H((*.OO%%%  !)%)&3 & 
9&P MMOrY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y r  r  r  r1  r  r  r  r  s       rX   test_all_reduce_sumz1DistributedTest._DistTestBase.test_all_reduce_sumT
  sX     %)$:$:$<!E8T((!!R3u:>*+rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z   d       y NrJ   rE   rH   T)r  r  r  s       rX   test_all_reduce_sum_asyncz7DistributedTest._DistTestBase.test_all_reduce_sum_asyncc
  s`     %)$:$:$<!E8T((!!R3u:>*+ ) 	rY   z;Only Gloo and NCCL backends will have CUDA allReduce testedc                 H   t         j                  j                  | j                         | j	                         \  }}}t        t        j                         t              }| j                  |||t        j                  j                  ddddt        |      dz
  z  z   d|	       y )NrJ   rE   rH   TrS   rn  r  r  r  r&   r1  r   r   r  r  r  r  rV   r  r  r  r\  s        rX   test_all_reduce_sum_cudaz6DistributedTest._DistTestBase.test_all_reduce_sum_cudas
  s     JJ!!$)),$($:$:$<!E8T.t/B/B/DgNK((!!R3u:>*+
rY   c                 L   t         j                  j                  | j                         | j	                         \  }}}t        t        j                         t              }| j                  |||t        j                  j                  ddddt        |      dz
  z  z   d|d
       y r  r  r  s        rX   test_all_reduce_sum_cuda_asyncz<DistributedTest._DistTestBase.test_all_reduce_sum_cuda_async
  s     JJ!!$)),$($:$:$<!E8T.t/B/B/DgNK((!!R3u:>*+ ) rY   c                    | j                         \  }}}| j                  |||t        j                  j                  t        dd      t        dd      t        dd      t        dd      t        |      dz
  z  z   t        j                         y )NrJ   rz   rE      rH   rj  )	r  r  r1  r  r  complexr  rS   cfloatr  s       rX   test_all_reduce_sum_complexz9DistributedTest._DistTestBase.test_all_reduce_sum_complex
  s|     %)$:$:$<!E8T((!!1B1RCJN!CDll ) 	rY   c                     t         j                  j                  t         j                  j                  t         j                  j                  t         j                  j
                  t         j                  j                  t         j                  j                  g}| j                         \  }}}|D ]Q  }| j                  t        d      5  t        j                  t        dt        j                        ||       d d d        S y # 1 sw Y   ^xY wNzall_reduce does not supportrH   rj  )r1  r  r  r  r  BANDBORBXORr  r  r  r  rq  rS   r  )rV   unsupported_opsrZ  r  r`  unsupported_ops         rX   'test_all_reduce_complex_unsupported_opszEDistributedTest._DistTestBase.test_all_reduce_complex_unsupported_ops
  s    
 !!!!%%""!!""O '+&<&<&>#FHe"1 ++ =  OO%au||<nh  s   1DD	c                    t         j                  j                  | j                         | j	                         \  }}}t        t        j                         t              }| j                  |||t        j                  j                  t        dd      t        dd      t        dd      t        dd      t        |      dz
  z  z   d|t         j                  
       y )NrJ   rz   rE   r  rH   Trj  )rS   rn  r  r  r  r&   r1  r   r   r  r  r  r  r  r  r  s        rX    test_all_reduce_sum_cuda_complexz>DistributedTest._DistTestBase.test_all_reduce_sum_cuda_complex
  s     JJ!!$)),$($:$:$<!E8T.t/B/B/DgNK((!!1B1RCJN!CDll ) rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddt        t        j                  dgt        |      dz
  z  d             y r  )	r  r  r1  r  r  r
   r  r  r  r  s       rX   test_all_reduce_productz5DistributedTest._DistTestBase.test_all_reduce_product
  sb     %)$:$:$<!E8T((%%x||bTSZ!^%<a@rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_all_reduce_minz1DistributedTest._DistTestBase.test_all_reduce_min
  s@     %)$:$:$<!E8T((xt}}'8'8$1rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_all_reduce_maxz1DistributedTest._DistTestBase.test_all_reduce_max
  s@     %)$:$:$<!E8T((xt}}'8'8"b"rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y r  )r  r  r1  r  r  r  r  s       rX   test_all_reduce_group_sumz7DistributedTest._DistTestBase.test_all_reduce_group_sum
  sX    
 %)$9$9$;!E8T((!!R3u:>*+rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddt        t        j                  dgt        |      dz
  z  d             y r  )	r  r  r1  r  r  r
   r  r  r  r  s       rX   test_all_reduce_group_productz;DistributedTest._DistTestBase.test_all_reduce_group_product
  sb    
 %)$9$9$;!E8T((%%x||bTSZ!^%<a@rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_all_reduce_group_minz7DistributedTest._DistTestBase.test_all_reduce_group_min  s@    
 %)$9$9$;!E8T((xt}}'8'8$1rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_all_reduce_group_maxz7DistributedTest._DistTestBase.test_all_reduce_group_max$  s@    
 %)$9$9$;!E8T((xt}}'8'8"b"rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddddt        |      dz
  z  z          y r  )r  r  r1  r  r  r  r  s       rX   test_all_reduce_full_group_sumz<DistributedTest._DistTestBase.test_all_reduce_full_group_sum.  sX     %)$>$>$@!E8T((!!R3u:>*+rY   c                     | j                         \  }}}| j                  |||t        j                  j                  ddt        t        j                  dgt        |      dz
  z  d             y r  )	r  r  r1  r  r  r
   r  r  r  r  s       rX   "test_all_reduce_full_group_productz@DistributedTest._DistTestBase.test_all_reduce_full_group_product=  sb     %)$>$>$@!E8T((%%x||bTSZ!^%<a@rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_all_reduce_full_group_minz<DistributedTest._DistTestBase.test_all_reduce_full_group_minL  s@     %)$>$>$@!E8T((xt}}'8'8$1rY   c           	          | j                         \  }}}| j                  |||t        j                  j                  ddd       y r  )r  r  r1  r  r  r  s       rX   test_all_reduce_full_group_maxz<DistributedTest._DistTestBase.test_all_reduce_full_group_maxU  s@     %)$>$>$@!E8T((xt}}'8'8"b"rY   c                 H   | j                         \  }}}t        |t        j                         d      }|D ]d  \  }}|D cg c]
  } ||       }	}t        j                  |	d   t        j
                  j                  |       | j                  |	d   |d          f y c c}w )NrH   )
num_inputsr   )r  r)   r1  r   r  r  r  r  )
rV   fnrZ  r  r  testsinputsoutputsinputr  s
             rX   _test_sparse_all_reduce_sumz9DistributedTest._DistTestBase._test_sparse_all_reduce_sum_  s    %)%;%;%="FHd.d))+E $) 92892e999
DMM,=,=xH  WQZ899s   Bz+Only Gloo backend support sparse all reducec                 (    | j                  d        y )Nc                     | S rk   ry   ts    rX   rB  zJDistributedTest._DistTestBase.test_sparse_all_reduce_sum.<locals>.<lambda>n  s    q rY   r  r  s    rX   test_sparse_all_reduce_sumz8DistributedTest._DistTestBase.test_sparse_all_reduce_sumj  s     ,,[9rY   c                 (    | j                  d        y )Nc                 >    | j                         j                         S rk   )clonern  r  s    rX   rB  zODistributedTest._DistTestBase.test_sparse_all_reduce_sum_cuda.<locals>.<lambda>u  s    qwwy~~7G rY   r!  r  s    rX   test_sparse_all_reduce_sum_cudaz=DistributedTest._DistTestBase.test_sparse_all_reduce_sum_cudap  s    
 ,,-GHrY   c           	          ddt        dd      gddt        dd      gdd| dz
  z  z   dd| dz
  z  z   t        dd      t        dd      | dz
  z  z   gt        j                  t        j                  t        j                  gfS )NrJ   rz   rE   r  rH   )r  rS   ra  r  
group_sizes    rX   $_all_reduce_coalesced_sum_test_caseszBDistributedTest._DistTestBase._all_reduce_coalesced_sum_test_casesx  s     Awq!}%RR)j1n--j1n--AqMGBOzA~$FF
 ekk5<<8	 	rY   c                 z    ddgddgdd| dz
  z  z  dd| dz
  z  z  gt         j                  t         j                  gfS )NrH   rJ   rz   r{   rS   ra  r(  s    rX   (_all_reduce_coalesced_product_test_caseszFDistributedTest._DistTestBase._all_reduce_coalesced_product_test_cases  sP     AAQ:>**Aj1n0E,EFekk*	 rY   c                 V    ddgddgddgt         j                  t         j                  gfS NrH   r{   rJ   rz   r,  r(  s    rX   $_all_reduce_coalesced_min_test_caseszBDistributedTest._DistTestBase._all_reduce_coalesced_min_test_cases  3     AAAekk*	 rY   c                 V    ddgddgddgt         j                  t         j                  gfS r/  r,  r(  s    rX   $_all_reduce_coalesced_max_test_caseszBDistributedTest._DistTestBase._all_reduce_coalesced_max_test_cases  r1  rY   c                    | j                         \  }}}| j                  t        d      5  t        j                  t        dt        j                        gt        j                  j                  |       d d d        y # 1 sw Y   y xY wr  )
r  r  r  r1  all_reduce_coalescedrq  rS   r  r  r  )rV   rZ  r  r`  s       rX   1test_all_reduce_coalesced_max_complex_unsupportedzODistributedTest._DistTestBase.test_all_reduce_coalesced_max_complex_unsupported  sl     '+&<&<&>#FHe''
4QR ))"1ELL9:DMM<M<Mx  s   A
A>>Bc           
         t         j                  j                  | j                  t         j                  j                  | j
                  t         j                  j                  | j                  t         j                  j                  | j                  i|   } |t        |            \  }}	}
}|D ]4  }||k(  r|n|	}t        ||      D cg c]  \  }}t        |dz   ||       }}}|r$|D cg c]  }|j                  ||   d          }}g }|D ]i  }|j                  t        j                   k(  r/|j#                  t        j$                  |      j&                         O|j#                  |j&                         k | j)                  ddt         j*                  ||||       t        ||
      D cg c]  \  }}t        |dz   ||       }}}| j-                  ||       7 | j/                          y c c}}w c c}w c c}}w )NrH   rj  r   r  Fr  )r1  r  r  r*  r  r-  r  r0  r  r3  r  r  rq  rn  rk  rS   r  r  r  r  ri  r5  r  r  )rV   r  r  r  r  rn  r\  test_case_funcmaster_valuesworker_valuesexpected_valuesdtypesr  curr_valuesrk  r  r  r   r  r   r  r  s                         rX   !_test_all_reduce_coalesced_helperz?DistributedTest._DistTestBase._test_all_reduce_coalesced_helper  s    !!4#L#L%%t'T'T!!4#L#L!!4#L#L	
 N ESE
EAM=/6  </3s{m '*&+&>"s "#'3e<  ELMqvvk$&7&:;MGM "% ;F||u6%,,U-?-?-G-M-MN%,,V\\:	;
 !!!--"/ "  25V_1M$-~ "#'>G$  $   *:;7<: MMO7
 N $s   
G7.G=3Hc                     | j                         \  }}}| j                  |||t        j                  j                  dd        y NF)rn  r\  )r  r>  r1  r  r  r  s       rX   test_all_reduce_coalesced_sumz;DistributedTest._DistTestBase.test_all_reduce_coalesced_sum  F    $($:$:$<!E8T22!!  3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   !test_all_reduce_coalesced_productz?DistributedTest._DistTestBase.test_all_reduce_coalesced_product  sF    $($:$:$<!E8T22%%  3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   test_all_reduce_coalesced_minz;DistributedTest._DistTestBase.test_all_reduce_coalesced_min  rB  rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   test_all_reduce_coalesced_maxz;DistributedTest._DistTestBase.test_all_reduce_coalesced_max  sB    $($:$:$<!E8T22xt}}'8'8uRV 3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   #test_all_reduce_coalesced_group_sumzADistributedTest._DistTestBase.test_all_reduce_coalesced_group_sum  D     %)$9$9$;!E8T22xt}}'8'8uRV 3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   'test_all_reduce_coalesced_group_productzEDistributedTest._DistTestBase.test_all_reduce_coalesced_group_product  sH     %)$9$9$;!E8T22%%  3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   #test_all_reduce_coalesced_group_minzADistributedTest._DistTestBase.test_all_reduce_coalesced_group_min  rK  rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   #test_all_reduce_coalesced_group_maxzADistributedTest._DistTestBase.test_all_reduce_coalesced_group_max%  rK  rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   (test_all_reduce_coalesced_full_group_sumzFDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_sum-  B    $($>$>$@!E8T22xt}}'8'8uRV 3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   ,test_all_reduce_coalesced_full_group_productzJDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_product4  sF    $($>$>$@!E8T22%%  3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   (test_all_reduce_coalesced_full_group_minzFDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_min@  sF    $($>$>$@!E8T22!!  3 rY   c                     | j                         \  }}}| j                  |||t        j                  j                  dd        y r@  )r  r>  r1  r  r  r  s       rX   (test_all_reduce_coalesced_full_group_maxzFDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_maxL  rT  rY   c                    |D ]  }t        |dz   d|      }t        |dz   ||      }	||k(  r|D 
cg c]  }
t        |dz   |
|       c}
ng }|r;|j                  ||   d         }|D cg c]  }|j                  ||   d          }}|t        j                  k(  r-|D cg c]!  }t        j                  |      j
                  # }}n|D cg c]  }|j
                   }}| j                  ddt        j                  ||||d|	       | j                  ||	        | j                          y c c}
w c c}w c c}w c c}w )NrH   r   rj  r   z:scatterF)r  scatter_listr  r  r  )rq  rn  rS   r  r  r  ri  r1  scatterr  r  )rV   r  r  r  rn  r\  rk  rO  r   r  r  r  r   r  s                 rX   _test_scatter_helperz2DistributedTest._DistTestBase._test_scatter_helperT  sR     :&tax5A"/q$e"L t| GLL]4!8Qe<L 
 #[[T):1)=>FELMqvvk$&7&:;MGMEOO+JQ$RQU%7%7%:%@%@$RM$R6=$>QWW$>M$>!!LL!("!&"/ " 
   95:8 MMO1 M N$R$>s   D3,D8#&D=Euccz&CPU tensor ops not supported by UCP TLc                 Z   | j                         \  }}}t        j                  dg      }|j                         dz  }|dk(  r9|D cg c]  }|j                         |z   }}t	        j
                  |d|       nt	        j
                  |d       | j                  |||z         |j                         dz  }|dk(  r8|D cg c]  }|j                         |z   }}t	        j
                  ||       nt	        j
                  |       | j                  |||z         y c c}w c c}w )NrH   r   r   )r  r\  r  )r\  )r  rS   r   r%  r1  r]  r  )rV   r  r[  r  oneoutputr  r\  s           rX   test_scatter_checksz1DistributedTest._DistTestBase.test_scatter_checksu  s     &*%;%;%="E9d**aS/C YY[2%Fqy9>?A		a??VFV+VS4Z0 YY[2%Fqy9>?A		a??V,?V$VS4Z0  @  @s   D#D(c                 R    | j                         \  }}}| j                  |||       y rk   )r  r^  r  s       rX   test_scatterz*DistributedTest._DistTestBase.test_scatter  s+     %)$:$:$<!E8T%%eXt<rY   zOnly Nccl supports CUDA gatherc                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y NT)r  r&   r1  r   r   r^  r  s        rX   test_scatter_cudaz/DistributedTest._DistTestBase.test_scatter_cuda  sD    
 %)$:$:$<!E8T.t/B/B/DgNK%%eXtT;OrY   c                 r    | j                         \  }}}| j                  |||t        j                         y ri  )r  r^  rS   r  r  s       rX   test_scatter_complexz2DistributedTest._DistTestBase.test_scatter_complex  s4     %)$:$:$<!E8T%%eXt5<<%PrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|t        j                         y NTrj  )r  r&   r1  r   r   r^  rS   r  r  s        rX   test_scatter_cuda_complexz7DistributedTest._DistTestBase.test_scatter_cuda_complex  sR    
 %)$:$:$<!E8T.t/B/B/DgNK%%xt[ & rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r^  r  s       rX   test_scatter_groupz0DistributedTest._DistTestBase.test_scatter_group  s+     %)$9$9$;!E8T%%eXt<rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r^  r  s       rX   test_scatter_full_groupz5DistributedTest._DistTestBase.test_scatter_full_group  s+     %)$>$>$@!E8T%%eXt<rY   c                 P   |D ]  }t        |dz   |      }||k(  r|D cg c]  }t        |dz   d       c}ng }	|r;|j                  ||   d         }|	D 
cg c]  }
|
j                  ||   d          }	}
| j                  ddt        j                  |||	|dt        |	      dkD  r|	d   j                  gnd 	       ||k(  s|D cg c]  }t        |dz   |       }}t        |	|      D ]  \  }}| j                  ||         | j                          y c c}w c c}
w c c}w )NrH   r   r   z:gatherF)r  gather_listr  r  r  )
rq  rn  ri  r1  gatherr  r  r  r  r  )rV   r  r  r  rn  r\  rO  r   r  r  r   r  t1t2s                 rX   _test_gather_helperz1DistributedTest._DistTestBase._test_gather_helper  sI     1&tax6DHDL%@Q]4!8R0@VX  #[[T):1)=>FELMqvvk$&7&:;MGM!!KK '"!&8;Gq8H71:#3#3"4d " 
 4<LQ'RqdQh(B'R$'R"%g/?"@ 1B((R01+10 MMO+ A N (Ss   DDD#c                 B   | j                         \  }}}t        j                  dg      }|dk(  rX|D cg c]  }|j                          }}t	        j
                  ||z  d|       |D ]  }| j                  ||   ||z          nt	        j
                  ||z  d       |dk(  rW|D cg c]  }|j                          }}t	        j
                  ||z  |       |D ]  }| j                  ||   ||z          y t	        j
                  ||z         y c c}w c c}w )NrH   r   )r  rt  r  )rt  )r  rS   r   r%  r1  ru  r  )rV   r  r[  r  rb  ru  rt  r  s           rX   test_gather_checksz0DistributedTest._DistTestBase.test_gather_checks  s    &*%;%;%="E9d**aS/C qy49:qsyy{::C$JA;G >A$$[^S1W=> C$JA. qy49:qsyy{::C$JK@ >A$$[^S1W=> C$J' ; ;s   D+Dc                 R    | j                         \  }}}| j                  |||       y rk   )r  rx  r  s       rX   test_gatherz)DistributedTest._DistTestBase.test_gather  s+     %)$:$:$<!E8T$$UHd;rY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   rx  r  s        rX   test_gather_cudaz.DistributedTest._DistTestBase.test_gather_cuda  sD    
 %)$:$:$<!E8T.t/B/B/DgNK$$UHdD+NrY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  rx  r  s       rX   test_gather_groupz/DistributedTest._DistTestBase.test_gather_group  s+     %)$9$9$;!E8T$$UHd;rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  rx  r  s       rX   test_gather_full_groupz4DistributedTest._DistTestBase.test_gather_full_group&  s+     %)$>$>$@!E8T$$UHd;rY   c                    |D ]0  }t        |dz   ||      }|D 	cg c]  }	t        |dz   d|       }
}	t        j                  }|r;|j                  ||   d         }|
D cg c]  }|j                  ||   d          }
}|
d   j                  t
        j                  k(  r$t        j                  |
d         j                  g}n|
d   j                  g}| j                  dd||
||d|       |D 	cg c]  }	t        |dz   |	|       }}	t        |
|      D ]  \  }}| j                  ||        3 | j                          y c c}	w c c}w c c}	w )NrH   rj  r   r   :all_gatherFr  )rq  r1  r  rn  rk  rS   r  r  r  ri  r  r  r  )rV   r  r  r  rn  r\  rk  rO  r   r  r  	allgatherr   r  r  rv  rw  s                    rX   _test_all_gather_helperz5DistributedTest._DistTestBase._test_all_gather_helper1  st     -&taxUCMRS=2UCSS OO	#[[T):1)=>FELMqvvk$&7&:;MGM1:##u6%*%7%7
%C%I%I$JM%,QZ%5%5$6M!!!"/ " 	 FK$@AM$(AU;$  $ "'+;< -FB$$R,-3-8 MMO5 T N $s   E#E4Ec                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_gatherz-DistributedTest._DistTestBase.test_all_gatherR  +     %)$:$:$<!E8T(($?rY   z"Only Nccl supports CUDA all gatherc                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   test_all_gather_cudaz2DistributedTest._DistTestBase.test_all_gather_cudaY  D    
 %)$:$:$<!E8T.t/B/B/DgNK(($kRrY   c                 r    | j                         \  }}}| j                  |||t        j                         y ri  )r  r  rS   r  r  s       rX   test_all_gather_complexz5DistributedTest._DistTestBase.test_all_gather_complexb  4     %)$:$:$<!E8T(($ell(SrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|t        j                         y rm  )r  r&   r1  r   r   r  rS   r  r  s        rX   test_all_gather_cuda_complexz:DistributedTest._DistTestBase.test_all_gather_cuda_complexi  R    
 %)$:$:$<!E8T.t/B/B/DgNK((xt[ ) rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_gather_groupz3DistributedTest._DistTestBase.test_all_gather_groupt  +    
 %)$9$9$;!E8T(($?rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_gather_full_groupz8DistributedTest._DistTestBase.test_all_gather_full_group|  +     %)$>$>$@!E8T(($?rY   zOnly Nccl supports all_gather_vc           	         | j                          | j                         \  }}}t        t        j                         t
              }||   d   }|D cg c]  }|dz   	 }}t        |      }d}	dD ]  }
t        j                  ||   ||t        j                        j                  |	      j                  |      }t        |d|      }t        j                  t        t        j                  ||            |||
      }|
r|j!                          |	}t        |||      }| j#                  ||        | j                          y c c}w )Nr   rH   rJ   r  rj  r   r  )r  r  r&   r1  r   r   r  rS   rl  ra  rm  rn  rq  r  r  r$  r  r  )rV   r  r  r  r\  rp  r  output_split_sizesr  rq   r  r   r  r  r  r  s                   rX   test_all_gather_v_cudaz4DistributedTest._DistTestBase.test_all_gather_v_cuda  s5   
 MMO$($:$:$<!E8T.t/B/B/DgNK#D)!,I5:!;c#'!;!;,-GE* >	KK*40'7%++ U5\T)_  +7B)L
ooZ1CDE	 HHJ!&"/^y#   _=1>2 MMO; "<s   Ec                 L   |r.|j                  ||   d         }|j                  ||   d         }|j                  t        j                  k(  r!t        j                  |      j
                  g}n|j
                  g}| j                  ddt        j                  |||dd|	       |S )Nr   z:all_gather_into_tensorFr  )	rn  rk  rS   r  r  r  ri  r1  all_gather_into_tensorr  s           rX   _all_gather_into_tensor_helperz<DistributedTest._DistTestBase._all_gather_into_tensor_helper  s     %NN;t+<Q+?@	'__[->q-AB
5??2!&!3!3I!>!D!D E!* 1)++"+  
 rY   z.Only Nccl supports CUDA all_gather_into_tensorc           	         | j                         \  }}}t        t        j                         t              }d}t        j                  ||g      |z  }t        j                  t        |      |z  |g      dz  }| j                  ||||d|      }t        j                  |D cg c]  }t        j                  ||g      |z   c}      }	| j                  ||	       | j                          y c c}w NrJ   r   T)r  r&   r1  r   r   rS   r   r  r  r.  r  r  
rV   r  r  r  r\  ro  r  r  r  r  s
             rX   $test_all_gather_into_cat_tensor_cudazBDistributedTest._DistTestBase.test_all_gather_into_cat_tensor_cuda  s    
 %)$:$:$<!E8T.t/B/B/DgNKD

D$<047ISZ$%6$=>"EJ<<Ixt[J $iiu(U!T4L)AA)E(UVOZ9MMO )Vs   "!C-c           	         | j                         \  }}}t        t        j                         t              }d}t        j                  ||g      |z  }t        j                  t        |      ||g      dz  }| j                  ||||d|      }t        j                  |D cg c]  }t        j                  ||g      |z   c}      }	| j                  ||	       | j                          y c c}w r  )r  r&   r1  r   r   rS   r   r  r  stackr  r  r  s
             rX   &test_all_gather_into_stack_tensor_cudazDDistributedTest._DistTestBase.test_all_gather_into_stack_tensor_cuda  s    
 %)$:$:$<!E8T.t/B/B/DgNKD

D$<047ISZt$<=DJ<<Ixt[J $kkQV*WA5::tTl+Ca+G*WXOZ9MMO +Xs    !C+c           	         g }|D ]i  }|j                   t        j                  k(  r/|j                  t        j                  |      j
                         O|j                  |j
                         k | j                  ddt        j                  ||||       t        ||      D ]2  \  }}t        ||      D ]  \  }	}
t        j                  |	|
      r  y 4 y)z|
            Helper that runs all_gather_coalesced and returns true if output
            matches expectations.
            r  Fr  T)rk  rS   r  r  r  r  ri  r1  all_gather_coalescedr  rp   )rV   output_tensor_listsinput_tensorsr  r  r  input_tensorl1l2rv  rw  s              rX   $_run_all_gather_coalesced_and_verifyzBDistributedTest._DistTestBase._run_all_gather_coalesced_and_verify  s     M - =%%8!((););L)I)O)OP!((););<	=
 ))#+   13CD %B!"bk %FB ;;r2.$%% rY   c                    |t        dd      D ]  }t        d|      D cg c]  }t        ||||z   |       }}|D cg c])  }t        d|      D cg c]  }t        ||d|       c}+ }	}}|D 
cg c],  }
t        d|      D cg c]  }t        |||
|z   |       c}. }}
}| j                  |	|||      rJ d        | j                          y c c}w c c}w c c}}w c c}w c c}}
w )NrJ   r   rH   rj  r   z,output tensors do not match expected outputs)rs  rv  r  r  )rV   r  r  r  rk  test_case_id	tensor_idr  ru  r  	rank_iterr  s               rX   !_test_all_gather_coalesced_helperz?DistributedTest._DistTestBase._test_all_gather_coalesced_helper  sL   
 #$)!QK FL */q,)?	% & /%y$2B%%M % "'+  .31l-C	 !* 3 )9b+' +  */( & .31l-C	 !* 3 )9i)6KSX($ (  DD+]<Lh FEF 7F> MMO9%+(s5   C C"C)C"6C-	C("C-C"(C-allgather_coalescedz& does not support all_gather_coalescedc                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX    test_all_gather_coalesced_simplez>DistributedTest._DistTestBase.test_all_gather_coalesced_simple2  s+    
 %)$:$:$<!E8T225(DIrY   c                 r    | j                         \  }}}| j                  |||t        j                         y ri  )r  r  rS   r  r  s       rX   !test_all_gather_coalesced_complexz?DistributedTest._DistTestBase.test_all_gather_coalesced_complex:  s9    
 %)$:$:$<!E8T22xU\\ 3 rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_gather_coalesced_groupz=DistributedTest._DistTestBase.test_all_gather_coalesced_groupD  s+     %)$9$9$;!E8T225(DIrY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   $test_all_gather_coalesced_full_groupzBDistributedTest._DistTestBase.test_all_gather_coalesced_full_groupM  s+    
 %)$>$>$@!E8T225(DIrY   c                    | j                         \  }}}|t        j                  ddg      z  t        j                  dg      |dz   t        j                  ddg      z  t        j                  dg      t        j                  dg      g}|D cg c]~  }dt        j                  ddg      z  dt        j                  dg      z  dt        j                  ddg      z  dt        j                  dg      z  dt        j                  dg      z  g }}|D cg c]x  }|t        j                  ddg      z  t        j                  dg      |dz   t        j                  ddg      z  t        j                  dg      t        j                  dg      gz }}| j                  ||||      sJ | j	                          y c c}w c c}w )NrJ   r   rH   rz   r   )r  rS   r   r  r  )	rV   r  r  r  r  ru  output_tensors_listsrT  r  s	            rX   $test_all_gather_coalesced_with_emptyzBDistributedTest._DistTestBase.test_all_gather_coalesced_with_emptyU  s   
 %)$:$:$<!E8Tuzz1a&))

A3UZZA//

A3

A3M 	$  QF++QC(QF++QC(QC(	$  	$$ 	   

Aq6**JJsOUejj!Q00JJsOJJsO	  	  <<$m5Ex   MMO/	$	 s   BF>A=Gc           	         |@t        |      }t        j                  ||g|      |z  }t        j                  |D 	cg c]  }	t        j                  d|g|      |	z    c}	      }
t        j                  ||g|      dz  }|rE|j	                  ||   d         }|
j	                  ||   d         }
|j	                  ||   d         }|t        j
                  k(  r!t        j                  |      j                  g}n|j                  g}| j                  ddt        j                  ||||       | j                  ||
       | j                          y c c}	w )Nrj  rH   r   r   z:all_to_allF)r  r  )r  rS   r   r.  rn  r  r  r  ri  r1  all_to_all_singler  r  )rV   r  r  r  rn  r\  rk  ro  	in_tensorr  r  r  r  s                rX   *_test_all_to_all_single_equal_split_helperzHDistributedTest._DistTestBase._test_all_to_all_single_equal_split_helper|  sD    #5z!JJd|5ADH	"'))EJKUZZD	7!;K# #ZZtEBRG
 ){4/@/C DI&5&:&:;t;LQ;O&PO!+T1B11E!FJEOO+%*%7%7	%B%H%H$IM%.__$5M!!!**""/ "    _=MMO+ Ls   #Ec           	         |&t        |      }|D cg c]  }|dz   	 }	}|D 
cg c]  }
|dz   	 }}
t        j                  t        |	      |g|      |z  }t        j                  |dz   |z  |g|      }t        j                  |D cg c]!  }t        j                  |dz   |g|      |z  # c}      }|rE|j                  ||   d         }|j                  ||   d         }|j                  ||   d         }t        j                  ||||	|       | j                  ||       | j                          y c c}w c c}
w c c}w NrH   rj  r   r  )
r  rS   r   r  r.  rn  r1  r  r  r  )rV   r  r  r  rn  r\  rk  ro  r  	in_splitsru  
out_splitsr  r  r  s                  rX   ,_test_all_to_all_single_unequal_split_helperzJDistributedTest._DistTestBase._test_all_to_all_single_unequal_split_helper  sC    #5z,12qQU2	20561dQh6
6!JJI'=UKdR	"ZZ$(d):D(AO
"'))LQRqUZZ4 0>BR#  ){4/@/C DI&5&:&:;t;LQ;O&PO!+T1B11E!FJ&&	:y   _=MMO 36 Ss   D;E 
&Ec                 "   |Xt        |      }|D cg c]  }|dz   	 }	}t        |      D 
cg c]$  \  }}
t        j                  |	|   |g|      |z  & }}}
|D 
cg c]  }
t        j                  |dz   |g|        }}
|D cg c]!  }t        j                  |dz   |g|      |z  # }}|rl|D cg c]  }|j	                  ||   d          }}|D cg c]  }|j	                  ||   d          }}|D cg c]  }|j	                  ||   d          }}t        j                  |||       t        ||      D ]  \  }}| j                  ||        | j                          y c c}w c c}
}w c c}
w c c}w c c}w c c}w c c}w r  )
r  	enumeraterS   r   rn  r1  
all_to_allr  r  r  )rV   r  r  r  rn  r\  rk  ro  r  r  ru  
in_tensorsout_tensorsr  r   rv  rw  s                    rX   _test_all_to_all_helperz5DistributedTest._DistTestBase._test_all_to_all_helper  s    #5z,12qQU2	2 !*% 01 JJ	!d35ADH
 
 JODEEJJD1?  LQ$FGEJJq$/u=A$  $ HR!S1!&&T):1)=">!SJ!S>N(9:{4034($ ( JU"UA166+d*;A*>#?"UK"UZxH!+/?@ -FB$$R,-MMO) 3$ "T( #Vs(   E-)E2#E8&E=6FF>Fmpiz'Only MPI supports CPU all_to_all_singlec                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   "test_all_to_all_single_equal_splitz@DistributedTest._DistTestBase.test_all_to_all_single_equal_split  s+     %)$:$:$<!E8T;;E8TRrY   z)Only Nccl supports CUDA all_to_all_singlec                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   'test_all_to_all_single_equal_split_cudazEDistributedTest._DistTestBase.test_all_to_all_single_equal_split_cuda  sK    
 %)$:$:$<!E8T.t/B/B/DgNK;;rY   c                 r    | j                         \  }}}| j                  |||t        j                         y ri  )r  r  rS   r  r  s       rX   *test_all_to_all_single_equal_split_complexzHDistributedTest._DistTestBase.test_all_to_all_single_equal_split_complex  s9     %)$:$:$<!E8T;;xU\\ < rY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|t        j                         y rm  )r  r&   r1  r   r   r  rS   r  r  s        rX   /test_all_to_all_single_equal_split_cuda_complexzMDistributedTest._DistTestBase.test_all_to_all_single_equal_split_cuda_complex  sR    
 %)$:$:$<!E8T.t/B/B/DgNK;;xt[ < rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   $test_all_to_all_single_unequal_splitzBDistributedTest._DistTestBase.test_all_to_all_single_unequal_split  s+     %)$:$:$<!E8T==eXtTrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  r  r&   r1  r   r   r  r  s        rX   )test_all_to_all_single_unequal_split_cudazGDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_cuda  sK    
 %)$:$:$<!E8T.t/B/B/DgNK==rY   c                 r    | j                         \  }}}| j                  |||t        j                         y ri  )r  r  rS   r  r  s       rX   ,test_all_to_all_single_unequal_split_complexzJDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_complex  s9     %)$:$:$<!E8T==xU\\ > rY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|t        j                         y rm  )r  r&   r1  r   r   r  rS   r  r  s        rX   1test_all_to_all_single_unequal_split_cuda_complexzODistributedTest._DistTestBase.test_all_to_all_single_unequal_split_cuda_complex  sW    
 %)$:$:$<!E8T.t/B/B/DgNK==ll > rY   zOnly MPI supports all_to_allc                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_to_allz-DistributedTest._DistTestBase.test_all_to_all*  r  rY   z"Only NCCL supports CUDA all_to_allc                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   test_all_to_all_cudaz2DistributedTest._DistTestBase.test_all_to_all_cuda1  r  rY   c                 r    | j                         \  }}}| j                  |||t        j                         y ri  )r  r  rS   r  r  s       rX   test_all_to_all_complexz5DistributedTest._DistTestBase.test_all_to_all_complex:  r  rY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|t        j                         y rm  )r  r&   r1  r   r   r  rS   r  r  s        rX   test_all_to_all_cuda_complexz:DistributedTest._DistTestBase.test_all_to_all_cuda_complexA  r  rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   (test_all_to_all_single_equal_split_groupzFDistributedTest._DistTestBase.test_all_to_all_single_equal_split_groupL  s+    
 %)$9$9$;!E8T;;E8TRrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   -test_all_to_all_single_equal_split_group_cudazKDistributedTest._DistTestBase.test_all_to_all_single_equal_split_group_cudaT  sK     %)$9$9$;!E8T.t/B/B/DgNK;;rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   *test_all_to_all_single_unequal_split_groupzHDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_groupd  s+    
 %)$9$9$;!E8T==eXtTrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  r  r  s        rX   /test_all_to_all_single_unequal_split_group_cudazMDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_group_cudal  sK     %)$:$:$<!E8T.t/B/B/DgNK==rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_to_all_groupz3DistributedTest._DistTestBase.test_all_to_all_group|  r  rY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   test_all_to_all_group_cudaz8DistributedTest._DistTestBase.test_all_to_all_group_cuda  sD     %)$9$9$;!E8T.t/B/B/DgNK(($kRrY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   -test_all_to_all_single_equal_split_full_groupzKDistributedTest._DistTestBase.test_all_to_all_single_equal_split_full_group  s+     %)$>$>$@!E8T;;E8TRrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   2test_all_to_all_single_equal_split_full_group_cudazPDistributedTest._DistTestBase.test_all_to_all_single_equal_split_full_group_cuda  sK    
 %)$>$>$@!E8T.t/B/B/DgNK;;rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   /test_all_to_all_single_unequal_split_full_groupzMDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_full_group  s+     %)$>$>$@!E8T==eXtTrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   4test_all_to_all_single_unequal_split_full_group_cudazRDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_full_group_cuda  sK    
 %)$>$>$@!E8T.t/B/B/DgNK==rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r  r  s       rX   test_all_to_all_full_groupz8DistributedTest._DistTestBase.test_all_to_all_full_group  r  rY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r  r  s        rX   test_all_to_all_full_group_cudaz=DistributedTest._DistTestBase.test_all_to_all_full_group_cuda  sD    
 %)$>$>$@!E8T.t/B/B/DgNK(($kRrY   c           
      |   d}|D ]"  }t        j                  d      j                  d      }|r|j                  ||   d         }||k(  rk|j                  t	        j                         |z          t        j                  |||       t	        j                  |dz          t        j                  |       t        j                  |||       t        j                  |       | j                  t        t	        j                               t        |d         d|dd|dd	z   
       % | j                  d       y )N333333?rH           r   r  zdestination rank: rH  z, my rank: z3 (if you see this failure, please report in #14554)msgr  r@  )rS   DoubleTensorrm  rn  r  r1  r  r  r  r7  ra  r  )	rV   r  r  r  rn  r\  	WAIT_TIMErO  r9  s	            rX   _test_barrier_helperz2DistributedTest._DistTestBase._test_barrier_helper  s    I  % 2 21 5 ; ;C @$1$6$6{47H7K$LM4<!''		i(?@NN=$AJJy3/LL*NN=$ALL*11diik*mA./0aD8LOP 2 0 MM"M%rY   z MPI doesn't supports GPU barrierzSkipped internallyc                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r	  r  s        rX   test_barrier_cudaz/DistributedTest._DistTestBase.test_barrier_cuda  sD     %)$:$:$<!E8T.t/B/B/DgNK%%eXtT;OrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r	  r  s        rX   test_barrier_group_cudaz5DistributedTest._DistTestBase.test_barrier_group_cuda  sD     %)$9$9$;!E8T.t/B/B/DgNK%%eXtT;OrY   c                     | j                         \  }}}t        t        j                         t              }| j                  |||d|       y rh  )r  r&   r1  r   r   r	  r  s        rX   test_barrier_full_group_cudaz:DistributedTest._DistTestBase.test_barrier_full_group_cuda   sD     %)$>$>$@!E8T.t/B/B/DgNK%%eXtT;OrY   zcpu barrierz does not support CPU barrierc                 R    | j                         \  }}}| j                  |||       y rk   )r  r	  r  s       rX   test_barrierz*DistributedTest._DistTestBase.test_barrier
  s+    
 %)$:$:$<!E8T%%eXt<rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r	  r  s       rX   test_barrier_groupz0DistributedTest._DistTestBase.test_barrier_group  s+     %)$9$9$;!E8T%%eXt<rY   c                 R    | j                         \  }}}| j                  |||       y rk   )r  r	  r  s       rX   test_barrier_full_groupz5DistributedTest._DistTestBase.test_barrier_full_group  s+    
 %)$>$>$@!E8T%%eXt<rY   c                     |j                         D ]B  }|j                  t        j                         5  ||j                  z  }d d d        d |_        D y # 1 sw Y   xY wrk   )rs  r  rS   no_gradrV   r   r  s      rX   _model_stepz)DistributedTest._DistTestBase._model_step#  sW    ))+ &::) ,+,!%EJ	&, ,s   AA 	c                 "   |j                         D ]p  }|j                  t        j                         5  ||j                  z  }d d d        |j                  j	                  d       |j                  j                          r y # 1 sw Y   AxY wr  )rs  r  rS   r  requires_grad_zero_r  s      rX   _model_step_with_zero_gradz8DistributedTest._DistTestBase._model_step_with_zero_grad*  sr    ))+ '::) ,+,JJ--e4JJ$$&', ,s   BB	c                     t        t        j                  d         }||z  }t        j                  |d      }t        j                  |d      }t        j                         }||||fS )NrG  rJ   r{   )r   r   rJ  rS   rT   rN   MSELoss)rV   local_bsrK  	global_bs	input_cputargetr  s          rX   _prepare_dummy_dataz1DistributedTest._DistTestBase._prepare_dummy_data2  sY    RZZ56J"X-IIq1I[[A.F::<Di55rY   c                     |j                           ||      } |||      |z  }|j                          |"| j                  |j                  |             y y )Nmemory_format)trainbackwardr)  is_contiguous)	rV   r   	input_varr#  r  scale_factorr'  rc  ls	            rX   _test_DDP_helperz.DistributedTest._DistTestBase._test_DDP_helper<  sV     KKM9%FVV$|3AJJL( 4 4= 4 QR )rY   c                     | j                  t        |      t        |             t        ||      D ]  \  }}| j                  ||        y rk   )r  r  r  )rV   	param_gpu	param_DDPp_gpup_DDPs        rX   _assert_equal_paramz1DistributedTest._DistTestBase._assert_equal_paramF  sD    S^S^< #Iy 9 /u  ./rY   c           	      0   t        |      D ]  }| j                  |||||       |
||z  }
| j                  |||
|
|z    ||
|
|z    ||dk7  r||z  |z  nd|       |r#| j                  |       | j                  |       n"| j                  |       | j                  |       | j	                  t        |j                               t        |j                  j                                      |t        j                  |         }|	s|dk(  st        j                  d      st        j                         5 }t        j                  dk(  r?t        j                   ||       |j#                  d       t        j$                  |d      }nAt        j                   ||j&                         t        j$                  |j&                  d      }d d d         t        j(                         5 }t        j                   ||       |j#                  d       t        j$                  |d      }d d d        |j+                         D ]6  }| j-                  |j+                         |   j+                         |          8 y # 1 sw Y   }xY w# 1 sw Y   `xY w)	Nr&  r   rH   rJ   r<  r@   Fweights_only)rs  r.  r  r  r4  r  rs  r   rS   randpermr   r   r   r   rS  rT  saveseekr   r   TemporaryFile
state_dictr  )rV   
model_base	model_DDPr  r#  r  r   r  
batch_size	test_saveoffsetrK  	zero_gradr'  n_iteridxtmptmp_filesaved_modelks                       rX   _test_DDP_niterz-DistributedTest._DistTestBase._test_DDP_niterK  s]   " V} ,Q%%vt= &  >!H_F %%&6H#456FX$56:D/J)J6q"/ &  33J?33I>$$Z0$$Y/((..014	8H8H8S8S8U3V
 ennZ89 k.D.DY.O!446 	Q#<<72!JJy#6HHQK(-

3U(KI!JJy#((;(-

388%(PI	Q 	QG,Q\ '') GX

9h/a #jjF	G
 ))+ Y  !5!5!7!:K<R<R<TUV<WXY!	Q 	QG Gs   ;BI?.?J?J		Jc                 r   t         }t        j                  |      }|j                  |d          t        j                  |      }	|	j                  |d          t        j
                  j                  |	|||      }	|r|	j                          t        j                         5 }
t        j                  dk(  r?t        j                  |	|
       |
j                  d       t        j                  |
d      }	nAt        j                  |	|
j                          t        j                  |
j                   d      }	d d d        t#        |      }| j%                  |      \  }}}}| j'                  ||	|j                  |d         |j                  |d         ||||d	       | j)                          y # 1 sw Y   xxY w)Nr   
device_idsgradient_as_bucket_viewstatic_graphr@   Fr6  T)DDP_NETr  r  rn  rN   parallelr!   _set_static_graphr   r   rS  rT  rS   r9  r:  r   r   r  r$  rI  r  )rV   
gpu_subsetr  output_devicerM  rN  set_static_graph_twicer   	model_gpur>  rE  r   r!  r"  r#  r  s                   rX   _test_DistributedDataParallelz;DistributedTest._DistTestBase._test_DistributedDataParallel  sw    E e,INN:a=) e,INN:a=);;%(?)	 < I &++- ,,. 	I#<<7*JJy#.HHQK %

3U CIJJy#((3 %

388% HI	I :H151I1I(1S.Iy&$   z!}-JqM*
 MMO7	I 	Is   "BF--F6c                 ,   | j                         \  }}}t        }t        j                  |      }t        j
                  j                  ||      }d}| j                  |      \  }}	}
}| j                  |||	|
||||dd
       | j                          |S )NrM  rJ   FT)rB  )
r  rO  r  r  rN   rP  r!   r$  rI  r  )rV   rM  rZ  r[  r  r=  r>  r   r!  r"  r#  r  s               rX    _test_DistributedDataParallelCPUz>DistributedTest._DistTestBase._test_DistributedDataParallelCPU  s     '+&<&<&>#FIt !J j1I;;3J < I
 H151I1I(1S.Iy&$    !  MMOrY   z'nccl does not support DDP on CPU modelsc                 $    | j                          y rk   rY  r  s    rX   test_DistributedDataParallelCPUz=DistributedTest._DistTestBase.test_DistributedDataParallelCPU  s     113rY   c                 (    | j                  d       y )NTrX  r[  r  s    rX   ,test_DistributedDataParallelCPU_grad_is_viewzJDistributedTest._DistTestBase.test_DistributedDataParallelCPU_grad_is_view  s     11$1OrY   ddpz1 backend does not support DistributedDataParallelc                 R    | j                  t        d        | j                          y )Nc                  d    t         j                  j                  t        j                               S rk   )rN   rP  r!   r   ry   rY   rX   rB  zZDistributedTest._DistTestBase.test_DistributedDataParallel_requires_grad.<locals>.<lambda>  s    bkk&I&I"))+&V rY   )r#  r  r  r  s    rX   *test_DistributedDataParallel_requires_gradzHDistributedTest._DistTestBase.test_DistributedDataParallel_requires_grad  s#     V MMOrY   rG  c                      G d dt         j                        } |       j                  | j                        }t         j                  j                  || j                  g       y )Nc                         e Zd Zd fdZ xZS )MDistributedTest._DistTestBase.test_ddp_zero_output_features.<locals>.ToyModelc                     t         |           t        j                  dd      | _        t        j
                         | _        t        j                  dd      | _        y )NrE   r   )rL   rM   rN   rO   net1r   r   net2rU   s    rX   rM   zVDistributedTest._DistTestBase.test_ddp_zero_output_features.<locals>.ToyModel.__init__  s?    G$& "		"b 1DI "	DI "		"a 0DIrY   r`   )rc   rd   re   rM   rf   rg   s   @rX   ToyModelre    s    1 1rY   ri  rL  )rN   r   tor  rP  r!   )rV   ri  r   s      rX   test_ddp_zero_output_featuresz;DistributedTest._DistTestBase.test_ddp_zero_output_features
  sJ    1299 1 JMM$)),EKK//499+ 0 rY   zGloo-only testc                 B    G d dt         j                        } |       }t        j                   j                  j	                  |      }t        d      D ]H  } |       j                  d       | j                  t        d |j                         D                     J y )Nc                   &     e Zd Zd fdZd Z xZS )BDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.Modelc                 |    t         |           t        j                  t	        j
                  d            | _        y )N      ?)rL   rM   rN   r   rS   r   r   rU   s    rX   rM   zKDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.Model.__init__  s'    G$&\\%,,s*;<DFrY   c                 8    | j                   j                  d      S r   )r   powr  s    rX   r_   zJDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.Model.forward#  s    66::a=(rY   r`   rb   rg   s   @rX   Modelro    s    =)rY   rt     T)create_graphc              3   4   K   | ]  }|j                     y wrk   r   )r  r  s     rX   r  zFDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.<genexpr>/  s     P++Pr  )
rN   r   rS   rP  r!   rs  r)  r)  r-  rs  )rV   rt  r   	ddp_modelru  s        rX   test_ddp_create_graphz3DistributedTest._DistTestBase.test_ddp_create_graph  s    )		 ) GE))AA%HI1X  $$$$7P9M9M9OPPrY   c                 ^   t         j                  j                  | j                        }| j                  }t         j                  j	                  |      5  t         j
                  j                  j                  t         j
                  j                  ddd      j                  |      |g      }t        d      D ]Y  }|j                  j                  j                  }|!|j                  d       |j                          t        j                  |g      j!                         j                  |      } ||      j#                         }|j%                          |j                  j                  j                  }|j'                         }t)        j*                  |       t-        t.        j0                  d         }	|j3                  |	       t#        d t        |	      D              |	z  }
| j5                  |d   |
d	|
 d
| d| j                          \ 	 d d d        y # 1 sw Y   y xY w)NrH   FrF   rj  r   rG  c              3       K   | ]  }|  y wrk   ry   r  r  s     rX   r  z`DistributedTest._DistTestBase.test_DistributedDataParallel_non_default_stream.<locals>.<genexpr>Q       'Ea'E   r   r   zExpected gradient of 	 but got z	 on rank r  )rS   rn  Streamr  streamrN   rP  r!   rO   rs  r   weightr  r  r  r   ra  r  r)  r%  r1  r  r   r   rJ  div_r  )rV   r  r  netr  r  batchr  avgrK  expected_grads              rX   /test_DistributedDataParallel_non_default_streamzMDistributedTest._DistTestBase.test_DistributedDataParallel_non_default_stream2  s    ZZ&&tyy1F99D""6* hh''??HHOOAquO5::4@dV @  t A::,,11D'++E2

!LL$0668==dCEu:>>+DMMO::,,11D**,C OOC(!$RZZ%=!>JHHZ($''E53D'E$E
$RM$$D	%3M?)C5PYZ^ZcZcYde % )	  s   GH##H,rn  z@ backend does not support DDP communication hook on CUDA devicesc                 D   t         j                  t         j                  t        j                  t        j
                  t        j                  t        j                  g}t        j                  j                  t        j                  j                  g}|D ]  }t        j                  j                  j!                  t        j                  j#                  ddd      j%                  | j&                        | j&                  g      }|j)                         }| j+                  |j-                  d      d        |j/                  d |       |j)                         }| j+                  |j-                  d      |j0                          |D ]  }t        j                  j                  j!                  t        j                  j#                  ddd      j%                  | j&                        | j&                  g      }|j)                         }| j+                  |j-                  d      d        |j3                  |       |j)                         }| j+                  |j-                  d      t5        |              t        j                  j                  j!                  t        j                  j#                  ddd      j%                  | j&                        | j&                  g      }|j)                         }| j+                  |j-                  d      d        t7        d      D ]J  }t        j8                  dd| j&                        } ||      j;                         }|j=                          L |j)                         }| j+                  |j-                  dd      d       y )	NrH   FrF   rj  	comm_hookrJ   r  r  )defaultallreduce_hookfp16_compress_hookpowerSGDr   batched_powerSGD_hookquantization_hooksquantization_pertensor_hookquantization_perchannel_hookr1  BuiltinCommHookType	ALLREDUCEFP16_COMPRESSrS   rN   rP  r!   rO   rn  r  _get_ddp_logging_datar  r   register_comm_hookre   _register_builtin_comm_hookr   rs  r   r  r)  )	rV   hookscpp_builtin_hooksr   rx  ddp_logging_dataru  r   r  s	            rX   test_ddp_comm_hook_loggingz8DistributedTest._DistTestBase.test_ddp_comm_hook_loggingX  s    &&**&&..">>"??E ((22((66!
  
W!HH--EEHHOOAquO5::499E $		{ F 	 $-#B#B#D   !1!5!5k!BDI,,T48#,#B#B#D   !1!5!5k!BDDUDUV
W * 
O!HH--EEHHOOAquO5::499E $		{ F 	 $-#B#B#D   !1!5!5k!BDI55d;#,#B#B#D   !1!5!5k!BCIN
O ))AA15166tyyA II; B I  )>>@-11+>E1X  jjAdii8 ~))+ 
  )>>@ -11+rBBGrY   c                 B	   | j                   }t        j                  j                  |       t        j                  |       t        j                  j	                  |       t               t        j                  dd      j                         fg}t        rS|j                  t        j                  j                         t        j                  dddd      j                         f       |D ]  \  }	}
t        j                  j                  j                  ddd      5  t        j                  j                   j#                  t%        j&                  |	      j                         | j                   g||      }t        j                  j                   j#                  t%        j&                  |	      j                         | j                   g||      }|j)                         }|j)                         }|r=t+        |      }t+        |      }| j-                  t/        |      d       |d   g}|d   g}|r |j0                  |g|d	|i| n |j0                  |g|i |  ||g|i |}t3        |j)                         |j)                               D ]  \  }}| j5                  ||        t%        j&                  t+        |j)                                     }t7        d
      D ]:  }|j9                           ||
      }|j;                         }|j=                          < t?        j@                          t7        d
      D ]J  }|j9                           ||
      }|j;                         }|j=                          |jC                          L t?        j@                          t3        |j)                         |j)                               D ]  \  }}| j5                  ||        |re| jE                  |d   tG        tI        |j)                                            | j5                  |dd  t+        |j)                               dd         n)| jE                  |t+        |j)                                      t?        j@                          d d d         y # 1 sw Y   xY w)NrH   r   rz   TFenableddeterministic	benchmarkrK  r   optim_paramsru  )%r  rS   rn  r  manual_seedr   rT   HAS_TORCHVISIONr  torchvisionmodelsresnet50rD  cudnnflagsrN   rP  r!   r  r  rs  r  assertGreaterr  _register_fused_optimr  r  rs  rB  r  r)  r1  r  r  r  r  iter)rV   grad_as_bucket_viewrN  	optim_clsoptimize_subsetfunctional_optim_argsfunctional_optim_kwargsr  models_to_testr   r   ddp_model_with_optimizer_hookddp_model_with_no_hookhook_paramsno_hook_paramsoptimizer_no_hook
hook_paramallreduce_paramopt_hook_init_paramsru  r  r  s                         rX   $_test_ddp_hook_with_optimizer_parityzBDistributedTest._DistTestBase._test_ddp_hook_with_optimizer_parity  s[    99DJJ!!$'d#JJ""4(U[[D16689N %% ''002EKK1a4N4S4S4UV !/ p#^^))//  0  n#
 ))AA MM%0557(,		{4G)5	 B  2 .3XX->->-V-Ve,113$(II;0C%1	 .W .* #@"J"J"LK%;%F%F%HN&&*;&7)-n)=**3{+;Q?'21~&6*8*;)<
 'K5KK%2 *5 6	 L5KK%2 6 )2&).) 2)% 8;5@@B.99;8 F3
O ((_E	F ,0==:EEGH,(
 #1X (5??A;C@"wwy	( LLN #1X 1.88:4S9"wwy)..01 LLN 8;5@@B.99;8 F3
O ((_E	F '++03 &C&N&N&P!QR
 ((04 !>!I!I!KLQRP
 ++0 !>!I!I!KL LLN]n# n#p#n# n#s   M+RR	c                    t         j                  j                  | j                         t	               j                         }dg}t         j
                  j                  j                  j                  ||       t         j
                  j                  j                  || j                  g       t         j
                  j                  j                  j                  |d      }|D ]  \  }}| j                  d|d    |        t        t        |j                                     dz
  }d}t         j
                  j                  j                  j                  |d      }|D ]  }|dz  }	 | j                  ||       y )	Na.weightrj  T)named_paramsmodule.r   rH   F)rS   rn  r  r  r  rN   rP  r!   +_set_params_and_buffers_to_ignore_for_model_get_data_parallel_paramsr  r  r  rs  r  )rV   r   params_to_ignore	dp_paramsr   ru  num_ddp_paramsr  s           rX   test_get_data_parallel_paramsz;DistributedTest._DistTestBase.test_get_data_parallel_paramsk  sT   JJ!!$)),"$))+E *|HH55aa' HH55499+ 6  ))AA[[D \ I % Ka##g.>q.A-B$CTJK
 !e&6&6&8!9:Q>NE))AA[[\apu[vI 
UN3rY   c           
         t        j                  | j                         t         j                  j                  | j                         t         j                  j	                  | j                         t        j                  t        j                  dd      t        j                  dd      t        j                  dd            j                         g}t        r;|j                  t        j                  j                         j                                t        |      D ][  \  }}t        j                  |      }t
        j                   j#                  || j                  g|      } ||j%                         fi |}	|rt'        ||j%                         |       t
        j                   j#                  || j                  g|      }|st'        ||j%                         |       t)        |j%                         |j%                               D ]  \  }
}| j+                  |
|d        t         j,                  j.                  j1                  ddd      5  t3        d      D ]  }|d	k(  rt        j4                  d	dd
d
d      nt        j4                  ddd      } ||      j7                         j9                          |	j;                           ||      j7                         j9                          t)        |j%                         |j%                               D ]<  \  }
}| j+                  |
|d|        | j=                  |j>                  d u d|        > |	jA                  d        	 d d d        ^ y # 1 sw Y   jxY w)Nrz   rL  rM  optimizer_classrl  optimizer_kwargszParameters not initially equal!TFr  r  rH   r   rn  r  rE   zParams not equal at iteration z&Optim in backward grad is not None at set_to_none)!rS   r  r  rn  r  rN   rq  rO   r  r  r  r  r  r  r  r  rP  r!   rs  r   r  r  rD  r  r  rs  rT   r  r)  r  r)  r  rB  )rV   r  optim_kwargsinit_beforerM  r  jr   model_optim_in_bwdr  p1p2r  r   s                 rX   !_test_ddp_apply_optim_in_backwardz?DistributedTest._DistTestBase._test_ddp_apply_optim_in_backward  s    dii(JJ""499-JJ!!$)), bii1oryyA		!QPUUWN %%k&8&8&A&A&C&H&H&JK%n5 8:5%)]]5%9";; $		{,C < 
 "%"2"2"4EE0(11<<>)5
 &([[%H%H& $		{,C &I &"
 #0(11<<>)5 "%"2"2"46H6S6S6UV PFB$$R-NOP ^^))//  0  : #1X :  !Av "KK1dDH!&R6!B 
 c
(113

*#%
&)!,,.0B0M0M0O' 	FB !,, "B*H(L !OO "4"H L	 D91:: :;8::: :s   DN  N
	c                 "   t        j                  t        j                  j                  t        j                  j
                  gddg      D ]6  \  }}| j                  |      5  | j                  |ddi|       d d d        8 y # 1 sw Y   CxY w)NTF)r  r  Q?)r  r  r  )r/  productrS   r  r  AdamsubTestr  )rV   r  r  s      rX    test_ddp_apply_optim_in_backwardz>DistributedTest._DistTestBase.test_ddp_apply_optim_in_backward  s    *3*;*;%++"2"23dE]+ &	; \\I\6 ::"+&*D\$/ ;   s   #BB	c                 p    dD ]1  }| j                  t        j                  j                  ddi|d       3 y )Nr  r  r  F)r  r  r  rM  )r  rS   r  r  )rV   r  s     rX   :test_ddp_apply_optim_in_backward_grad_as_bucket_view_falsezXDistributedTest._DistTestBase.test_ddp_apply_optim_in_backward_grad_as_bucket_view_false  s<    , 66#kkoo"& +,1	 7 rY   c           
      *   t         j                  j                  | j                         dD ]  }| j	                  |      5  t        j
                  | j                         t         j                  j                  | j                         t               }dg}t         j                  j                  j                  j                  ||       |r6t        t         j                  j                  |j                         ddi       t         j                  j                  j                  |j                  | j                        | j                  g      }|s6t        t         j                  j                  |j                         ddi       t        j                  dd	      } ||      \  }}|j!                  d
d      |z  j#                         j%                          t'        t)        j*                               D cg c]  }d  }	}t)        j,                  |	|       |	d
   |	dd  }}
|D ]  }| j/                  |
j0                  j2                  |j0                  j2                         | j5                  t7        |
j8                  j                               t7        |j8                  j                                      | j5                  |
j0                  j:                  |j0                  j:                          	 d d d         y c c}w # 1 sw Y   xY w)Nr  )r  r  r  r  r  rj  rH   rE   r   )rS   rn  r  r  r  r  r  rN   rP  r!   r  r   r  r  rs  rT   	transposer  r)  rs  r1  r   all_gather_objectr  rP   r  r  r  rQ   rG   )rV   r  r   r  r  r   rP   rQ   ru  r  rank0_model	remainderms                rX   /test_ddp_apply_optim_in_backward_ignored_paramszMDistributedTest._DistTestBase.test_ddp_apply_optim_in_backward_ignored_params  sa   JJ!!$)),, (G\\k\: 'G%%dii0JJ**4995*,E(2|$HH%%==ii/ #4,1KKOO#(#3#3#5.2D\
  ((++CC

499-$(II; D C '4,1KKOO#(#3#3#5.2D\
  ++a,Cs8DAq[[A&*//1::< -2$2E2E2G,HIqdIFI**659-3AYqr
K& G++KMM,@,@!##**M(( !9!9!;<d133>>CS>T ((););QSSXXFGE'G 'G(G@ J?'G 'Gs    F:L<	LC1LLL	ra   c                 r    t        t        j                  t        j                  t        j                        S )N)param_dtypereduce_dtypebuffer_dtype)r#   rS   float16r  s    rX   _get_fp16_configz.DistributedTest._DistTestBase._get_fp16_config  s$    "!MM"]]"]] rY   c                 R   | j                   }t        j                  |       t        j                  j                  |       t        j                  j	                  |       t               }|j                  dt        j                  d             ddg}t        j                  j                  j                  j                  ||       | j                         }t        j                  j                  j                  |j                  |      |g|d      }|D cg c]  }d| 	 }}t        |      }d}t        j                   |j#                         |j%                               D ]  \  }	}
|	|v r<|dz  }| j'                  t)        |
d	             | j'                  t)        |
d
             F| j+                  |j,                  |
j.                  j0                         | j+                  t        j2                  |
j4                  j0                          | j+                  ||       y c c}w )NrI   r   r  TrL  mixed_precisionrM  r  r   rH   	_mp_param	_fp_param)r  rS   r  rn  r  r  rR   r   rN   rP  r!   r  r  rk  r  r/  chainnamed_parametersr  rH  hasattrr  r  r  rk  float32r  )rV   r  r   	to_ignore	mp_configr  r   expected_ignored	n_ignorednr   s              rX   .test_ddp_native_mixed_precision_ignored_paramszLDistributedTest._DistTestBase.test_ddp_native_mixed_precision_ignored_params  s   99Dd#JJ""4(JJ!!$'"$E!!(EJJqM:#X.IHH55aay --/I((##;; 6 )(,	 < C 7@@d74&)@I@"9~I#//#*>*>*@#BSBSBUV GA	>NI$$WQ%<=$$WQ%<=$$Y%:%:AKK<M<MN$$U]]AKK4E4EFG -y9 As   H$c                      j                   }t        j                  |       t        j                  j                  |       t        j                  j	                  |       t        j
                  dd      } j                          G  fddt        j                  j                        } |       }t        j                  j                  j                  |j                  |      |g|      } j                  |j                  j                  j                  j                          |j#                         D ]f  } j                  j$                  |j&                  j                          j                  t        j(                  |j*                  j                         h t-        d      D ]  }	 ||      j/                         }
|
j1                          |j3                         D ]a  \  }} j                  |j                  t        j(                         |j4                  |dk(  rCJ  j                  |j4                  j                  t        j(                         t-        t7        j8                  |j:                              D 	cg c]!  }	t        j<                  |j4                        # }}	t7        j>                  ||j4                         |d   |dd  }} j                  |j                  t        j(                         |D ]>  } j                  |j                  t        j(                          j                  ||       @ d |jA                  |	        y c c}	w )
NrE   rH   c                   .     e Zd Zd fdZfdZ xZS )ODistributedTest._DistTestBase._test_ddp_native_mixed_precision.<locals>.MyModelc                 ,   t         |           t        j                  j	                  dd      | _        | j                  dt        j                  dd             t        j                  j                  t        j                  dd      d      | _	        y )NrH   r   rI   rJ   rE   Fr   )
rL   rM   rS   rN   rO   r  rR   rT   r   r   rU   s    rX   rM   zXDistributedTest._DistTestBase._test_ddp_native_mixed_precision.<locals>.MyModel.__init__L  sj    G$&"XX__Q2DF((5;;q!3DE"XX//B*% 0 DFrY   c                 z   | j                   j                         }|D ](  }j                  j                  |j                         * j                  | j
                  j                  j                         j                  j                  |j                         | j                  |      | j                  z   S rk   )r  rs  r  r  rk  rI   r  r   )self_r^   rl  r   r  rV   s       rX   r_   zWDistributedTest._DistTestBase._test_ddp_native_mixed_precision.<locals>.MyModel.forwardT  s    "WW//1F# I(()>)>HI $$U\\%7%79O9OP$$Y%:%:AGGD 771://rY   r`   rb   )rW   r  rV   s   @rX   MyModelr  K  s    0 0rY   r  r  ru  zmodule.pr   r  )!r  rS   r  rn  r  rT   r  rN   r   rP  r!   rk  r  r   rI   rk  r  rs  r  r  r  r  rs  r  r)  r  r  r1  r   rm  
zeros_liker  rB  )rV   rM  set_grad_to_noner  r   r  r  r  r   ru  r  r  r  tensor_listgrestg_r  s   `                @rX    _test_ddp_native_mixed_precisionz>DistributedTest._DistTestBase._test_ddp_native_mixed_precisionA  s    99Dd#JJ""4(JJ!!$'++b!$C--/I0%((// 0& 	A((##;;T
 6 )(?	 < C SZZ..44i6L6LM ^^% C  !6!68I8IJ  0A0ABC 1X <3x||~ # 4 4 6 4HAu$$U[[%--@zz) J.(()9)95==I &+4+>+>s?P?P+Q%R' ! ",,UZZ8' ' UZZ@"-a.+ab/4((%--@"& 4B ,,RXXu}}E ,,Q344  *:;)<'s   &M%c                 *    | j                  dd       y )NFrM  r  r  r  s    rX   Gtest_ddp_native_mixed_precision_no_grad_as_bucket_view_no_set_grad_nonezeDistributedTest._DistTestBase.test_ddp_native_mixed_precision_no_grad_as_bucket_view_no_set_grad_none  s    11(-!& 2 rY   c                 *    | j                  dd       y )NTFr  r	  r  s    rX   Dtest_ddp_native_mixed_precision_grad_as_bucket_view_no_set_grad_nonezbDistributedTest._DistTestBase.test_ddp_native_mixed_precision_grad_as_bucket_view_no_set_grad_none  s    11(,!& 2 rY   c                 *    | j                  dd       y NTr  r	  r  s    rX   Dtest_ddp_native_mixed_precision_grad_as_bucket_view_set_grad_to_nonezbDistributedTest._DistTestBase.test_ddp_native_mixed_precision_grad_as_bucket_view_set_grad_to_none      11(,t 2 rY   c                 *    | j                  dd       y r  r	  r  s    rX   Gtest_ddp_native_mixed_precision_no_grad_as_bucket_view_set_grad_to_nonezeDistributedTest._DistTestBase.test_ddp_native_mixed_precision_no_grad_as_bucket_view_set_grad_to_none  r  rY   c           
         | j                   }t        j                  j                  dd      }	 |j                  }t        j                  j                  j                  t        j                  |      j                  |      |g|      }|j                  ||       t        j                  j                  j                  t        j                  |      j                  |      |g|      }t        d      D ]  }	|j                  j                  j                  |j                  j                  j                  fD ]&  }
|
|
j!                  d       |
j#                          ( t        j$                  |g      j'                         j)                  |      } ||      j+                         }|j-                          |j                  j                  j                  }|j/                         }t+        d t        t1        j2                               D              t1        j2                         z  } ||      j+                         }|j-                          |j                  j                  j                  }|j/                         }|	|k  s| j5                  |d   j7                         |d	| d
|d           | j5                  |d   |d   d|d    d|d            y # t
        $ r |}Y w xY w)NrH   r   rL  rm  r   r   rO  Fc              3       K   | ]  }|  y wrk   ry   r|  s     rX   r  zFDistributedTest._DistTestBase._test_ddp_hook_parity.<locals>.<genexpr>  s     @a@r~  r  zExpected hook grad of r  r  z,Expected hook grad to be close to allreduce z
, but got )r  rS   rN   rO   rm  AttributeErrorrP  r!   r  r  rk  r  rs  r   r  r  r  r  r   ra  rn  r  r)  r%  r1  r   r  item)rV   r   r   num_validated_itersr  r  rm  net_with_hooknet_without_hookr  r  r  r  r  r  r  	loss_hook	grad_hookavg_hooks                      rX   _test_ddp_hook_parityz3DistributedTest._DistTestBase._test_ddp_hook_parity  s   99D1%A& % 3 3 "HH--EEa ##D) 6+ F M
 ,,5t,D$xx00HHa ##D) 6+  I  
 3Z $ %++2277!((//44 "A }((/	" dV,22499$?'.224'..55::jjl@5)<)<)>#?@@4CVCVCXX  *%0446	""$)0077<<	$??,**$$ ++-%4]O9XVZ^L\] %  $$ D	J3t9+U_`him`n_op % A$ " & %&s   K# #K21K2c                 F    | j                  d t        j                         y )Nr  )r  r  r  r  s    rX   test_ddp_hook_parity_allreducez<DistributedTest._DistTestBase.test_ddp_hook_parity_allreduce  s     &&T8N8N&OrY   c                 $   t        j                         }t        |t              }t	        |      D cg c]  }|t        |         d    }}t        j                  j                  |      }| j                  |t        j                         y c c}w )Nr   r  )r1  r   r&   r   rs  r   rS   distributedr  r  r  r  )rV   rK  r\  rT  gpusrm  s         rX   ,test_ddp_hook_parity_allreduce_process_groupzJDistributedTest._DistTestBase.test_ddp_hook_parity_allreduce_process_group  sy     ,,.J.z7CK49*4EFqKA'*FDF!--77=M&&]AWAW&X Gs   Bc                     dD ]<  }t        j                  d dd|      }| j                  |t         j                         > y )Nr  rH   rJ   )rm  matrix_approximation_rankstart_powerSGD_iter
warm_startr  )r  PowerSGDStater  r   )rV   r)  powersgd_states      rX   test_ddp_hook_parity_powerSGDz;DistributedTest._DistTestBase.test_ddp_hook_parity_powerSGD  sO     , 	
!)!7!7"&./())	" **(x/E/E + 	rY   c                 v   t        j                  d t        j                  j                  d      }| j                  |t         j                         d}t        j                  d t        j                  j                  |d      }| j                  |t         j                  |       | j                  t        j                         k(  r9t        j                  d d d      }| j                  |t         j                         t        j                  d d d      }| j                  |t         j                         y )NrE   )rm  rE  start_localSGD_iterr  F)rm  rE  r.  post_local_gradient_allreduce)r   r   r  r   )	post_localSGDPostLocalSGDStater1  r  r  r  r   rK  r   )rV   r   r.  s      rX   "test_ddp_hook_parity_post_localSGDz@DistributedTest._DistTestBase.test_ddp_hook_parity_post_localSGD  s    "33"TZZ-=-=SUE &&-"B"B '  #%!33"))$7.3	E &&"55$7 '  $"5"5"77%77"&2 **m&F&F +  "33"TtE &&-"B"B ' rY   c                 |   t               }|r|d   nt        j                  d|d      }t        t	        j
                  |      j                  |      ||d|      }	|j                  |       t        j                  |d      j                  |      }
t        j                  |d      j                  |      }||	|
|fS )Nr   cuda:rH  MbP?)rL  rm  bucket_cap_mbrM  rJ   r{   )r   rS   r  r!   r  r  rk  rT   )rV   r  rm  devicesrL  global_batch_sizerM  r   r  rx  r  r#  s               rX   _prepare_single_device_modulez;DistributedTest._DistTestBase._prepare_single_device_module0  s     EE#*WQZuT!H=M0NF/e$''/%+#(?I HHVKK 11588@E[[!2A699&AF)UF22rY   c                     t               }t        t        j                  |      |d|      }t	        j
                  |d      }t	        j
                  |d      }||||fS )Nr5  )rm  r6  rM  rJ   r{   )r   r!   r  r  rS   rT   )rV   rm  r8  rM  r   rx  r  r#  s           rX   _prepare_cpu_modulez1DistributedTest._DistTestBase._prepare_cpu_moduleJ  s]     EE/e$+#(?	I KK 115E[[!2A6F)UF22rY   c           	      .   | j                         \  }}}t               }t        dk(  s	t        dk(  r|}d}	| j                  |||      \  }
}}}t        dk(  rt	        t        j                         t              }||   dd }|D cg c]#  }t        j                  dt        |      z         % }}|}t        |      }	| j                  ||||||      \  }
}}}|j                  ||       d }t        j                         5  j                         5  |j                           |       ddd       ddd       t        |      D ]!  } |
       ||	z  |dz   |	z   }|||	z  |dz   |	z   }|dz  d	k(  r$j                         5   ||||       ddd       n
 |||       t!        |
j#                         |j#                               D ]g  \  }}|j$                  s|dz  d	k(  r'| j'                  |j(                  |j(                         B| j+                  |j(                  |j(                         i t        j,                  d
|z          |t        j.                           }$ yc c}w # 1 sw Y   HxY w# 1 sw Y   MxY w# 1 sw Y   xY w)a#  
            This is the recommended way to implement accumulate grads.
            If ``ddp_comm_hook`` input was specified, it will also register that hook
            to the ``ddp_model``. The hook fed into this function should not change
            the resulting gradients.
            r  r!  rH   r  Nr4  c                     | j                           | |      }t        j                  ||j                  |j                              }|j                          y rk   )r(  r   mse_lossrk  r  r)  )r   r  r#  rc  r  s        rX   
step_modelzTDistributedTest._DistTestBase._test_accumulate_gradients_no_sync.<locals>.step_model  s:    uzz&&))FMM*BCrY   rJ   r   9  )r  r   r   r;  r&   r1  rS   r  r   r  r9  r  r  no_syncr(  rs  r  rs  r   r  r  r  r  r8  )rV   	num_itersddp_comm_hookrM  rZ  r  r  rK  r8  local_batch_sizer   rx  r  r#  r\  int_devicesr  r7  r?  	iteration	ddp_input
ddp_targetr  s                          rX   "_test_accumulate_gradients_no_syncz@DistributedTest._DistTestBase._test_accumulate_gradients_no_sync[  s    &*%;%;%="FHd')J %7f#4$.!#$ 262J2J/1H3/y% & 243F3F3H'R)$/3CNOa5<<#a&(89OO$.!#&w< 262T2T%+3/y% (,,X}E   %&&( %OO%e$%% #9- A	5%0!++tax;K.K	 $++tax;K.K
 q=A%"**, E"9iDE E y)Z@ 0 0 2I4H4H4JK 9DAq??  1})++AFFAFF;((89 !!$"23enn->?@9A9 P.% %% %$E Es6   (I,I>I1+I>J1I;	6I>>JJ	z2get_future is only supported on mpi, nccl and glooc                 $    | j                          y)V
            Runs _test_accumulate_gradients_no_sync using default inputs
            NrI  r  s    rX   !test_accumulate_gradients_no_syncz?DistributedTest._DistTestBase.test_accumulate_gradients_no_sync  s     335rY   c                 (    | j                  d       y)rK  TrX  NrL  r  s    rX   .test_accumulate_gradients_no_sync_grad_is_viewzLDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_grad_is_view  s     33D3QrY   c                     t               dt        dt        j                  dt        j
                  j                  t        j                     ffd}| j                  d|       y)z
            Runs multiple iterations on _test_accumulate_gradients_no_sync
            using allreduce hook and validates whether future result was properly
            passed as gradients in reducer.
            r  bucketra   c                     |j                         z  g}| j                  |      j                         j                  d       S )Nc                 (    | j                         d   S ra  r  futs    rX   rB  zxDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_hook.<locals>.allreduce_hook.<locals>.<lambda>  s    ciik!n rY   )rI   	allreduce
get_futurethen)r  rQ  r  rK  s      rX   r  zfDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_hook.<locals>.allreduce_hook  s=     "==?Z78&&w/Z\T45rY   r{   rB  rC  N	r   objectr1  
GradBucketrS   futuresFuturero   rI  )rV   r  rK  s     @rX   0test_accumulate_gradients_no_sync_allreduce_hookzNDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_hook  s[     ()J *.//%%ell3 33> 4 rY   c                     t               dt        dt        j                  dt        j
                  j                  t        j                     ffd}| j                  d|       y)ao  
            Runs multiple iterations on _test_accumulate_gradients_no_sync using allreduce
            hook that also uses then callbacks. In first then callback result is multiplied
            by 2, and the second callback divides the result by 2 * world_size. It validates
            whether final result was properly passed as gradients in reducer.
            r  rQ  ra   c                     | j                  |j                         g      j                         }d }fd}|j                  |      j                  |      S )Nc                 .    d| j                         d   z  S )NrJ   r   r  rT  s    rX   multzDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook.<locals>.allreduce_with_then_hook.<locals>.mult  s    sxxz!},,rY   c                 0    | j                         dz  z  S r   rc  )rU  rK  s    rX   divzDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook.<locals>.allreduce_with_then_hook.<locals>.div  s    88:Z88rY   )rV  rI   rW  rX  )r  rQ  rU  rd  rf  rK  s        rX   allreduce_with_then_hookzzDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook.<locals>.allreduce_with_then_hook  sL     ((&--/):;FFH-9 xx~**3//rY   r{   rY  NrZ  )rV   rg  rK  s     @rX   :test_accumulate_gradients_no_sync_allreduce_with_then_hookzXDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook  s\     ()J0 0*.//0%%ell30 33+C 4 rY   c                    d }d }| j                         \  }}}t        dd      }t        dk(  r;t        t	        j
                         t              }||   d   }|j                  |      }|j                  |g      j                         }	|	j                  |      j                  |      j                         }
t        ddt        |      z  dz  dz         }| j                  |
d   |       y )Nc                 N    | j                         D cg c]  }|dz  	 c}S c c}w )Nrz   rc  rU  r   s     rX   rd  z;DistributedTest._DistTestBase.test_get_future.<locals>.mult       '*xxz2!A222   "c                 N    | j                         D cg c]  }|dz   	 c}S c c}w r[   rc  rk  s     rX   r  z:DistributedTest._DistTestBase.test_get_future.<locals>.add	  rl  rm  rz   rJ   r  r   rH   )r  rq  r   r&   r1  r   rk  rV  rW  rX  r  r  r  )rV   rd  r  r  r  r  r  r\  rp  rU  resexpecteds               rX   test_get_futurez-DistributedTest._DistTestBase.test_get_future   s    33 %)$:$:$<!E8T!!Q'E& 243F3F3H'R'-a0	+$$eW-88:C((4.%%c*//1C$QCJ(:Q(>?HSVX.rY   c           
      (   | j                         \  }}}t        t        j                         t              }t        ||         }t        j                  dd      D ]  \  }}| j                  ||||       | j                  ||||d       | j                  ||t        j                  d      ||       |D cg c]#  }t        j                  dt        |      z         % }	}| j                  |	|t        j                  d      ||        y c c}w )N)FT)rR  r  rM  rN  T)rR  r  rM  rN  rT  rn  )rR  r  rS  rM  rN  r4  )r  r&   r1  r   r   r  r/  r  rV  rS   r  r   )
rV   rZ  r[  r  r\  r$  use_bucket_viewrN  r  	gpus_lists
             rX   r   z:DistributedTest._DistTestBase.test_DistributedDataParallel  s-    '+&<&<&>#FIt.t/B/B/DgNKD)*D1:1B1B}2 $- 22#,;!-	 3  22#,;!-+/ 3  22#"',,v"6,;!- 3  FJJU\\'CF*:;J	J22("',,v"6,;!- 3 =$: Ks   9(Dc                    t        j                  d       t        j                  t              j                         }t         j                  j                  |j                         d      }t               }t        j                  j                  || j                  g|      }t        j                  t        j                          dz  d      j                         }t        j                  t        j                          dz  d      j                         }t        j"                         }|j                         D ]2  }	| j%                  |	d u       | j%                  |	j&                  d u        4 t)        d      D ]i  }
|j+                          t-               5   ||      } |||      }d d d        |j/                        j1                          |j                         D ]  }	|	j2                  s| j%                  |	j&                  d u       | j5                  |	j&                  j7                         j9                                | j5                  |	j&                  j;                         j9                                 |j=                  |       |j?                          t        j                  d|
z          |t        j@                  t        j                          dz           }l |S # 1 sw Y   AxY w)	Nz  r  r  r  rJ   r{   r  r@  )!rS   r  r  r  rO  rn  r  r  rs  r   rN   rP  r!   r  rT   r1  r   r  r)  r  rs  rB  r   scaler)  r   rH  isnananyisinfr  updater8  )rV   grad_is_viewr   	optimizerscalerrx  r  r#  loss_fnr   rD  rc  r  s                rX   &_test_DistributedDataParallel_with_ampzDDistributedTest._DistTestBase._test_DistributedDataParallel_with_ampH  sF   e$MM'*//1E(8(8(:tDI  \F;;499+| < I KK 3 3 5 91=BBDE[[!4!4!6!:A>CCEFjjlG ))+ 0.$/0 Ry I##%Z 3&u-F"662D3 T"++- #--/ ?Ad(:;((););)=>((););)=>	? I&  !!$*-ennT-@-@-BQ-FGH;I> 93 3s   K((K2	c                 "   t         j                  j                  | j                         | j	                  d      }| j	                  d      }t        |j                         |j                               D ]  \  }}| j                  ||        y )NFr|  T)rS   rn  r  r  r  r  rs  r  )rV   ddp_model_grad_not_viewddp_model_grad_is_viewr  r  s        rX   6test_DistributedDataParallel_with_amp_and_grad_is_viewzTDistributedTest._DistTestBase.test_DistributedDataParallel_with_amp_and_grad_is_view  s     JJ!!$)),&*&Q&Q" 'R '# &*%P%P! &Q &" '224&113 '1   A&	'rY   c                    |rt         nt        }t        j                  |      }	|	j	                  |d          t
        j                  j                  t        j                  |            }
|
j	                  |d          t
        j                  j                  |
|      }
t        j                         5 }t        j                  dk(  r?t        j                  |
|       |j!                  d       t        j"                  |d      }
nAt        j                  |
|j$                         t        j"                  |j$                  d      }
d d d        t        j&                  |d      }t        j&                  |d      }t        j(                         }| j+                  |	|
|j	                  |d         |j	                  |d         ||||d|t-        j.                         |rd	nd       | j1                          y # 1 sw Y   xY w)
Nr   rj  r@   Fr6  rJ   r{   Tr   )BN_NETBN_NET_NO_AFFINEr  r  rn  rN   SyncBatchNormconvert_sync_batchnormrP  r!   r   r   rS  rT  rS   r9  r:  r   r   rT   r  rI  r1  r   r  )rV   rR  r  r   r!  rA  rS  r   r   rU  r>  rE  r"  r#  r  s                  rX   +_test_DistributedDataParallel_SyncBatchNormzIDistributedTest._DistTestBase._test_DistributedDataParallel_SyncBatchNorm  s    %F*:E e,INN:a=) ((??e@TUINN:a=);;j < I
 ,,. 	I#<<7*JJy#.HHQK %

3U CIJJy#((3 %

388% HI	I Iq1I[[A.F::<D   z!}-JqM*##% MMO?	I 	Is   3BG66G?c                    d}t         j                  j                  j                  t	        j
                  t              j                         | j                  g|      } |       }t         j                  j                  |j                         |      }t         j                  j                  j                  t	        j
                  t              j                         | j                  g|      } |       }| j                  |||      }	t        j                  t        j                         dz  d      j                         }
t        j                  t        j                         dz  d      j                         }t        j                          }t#        d      D ]  }| j%                  ||||
|       |j'                  |j                                | j%                  |	|||
|       t)        |j                         |j                               D ]+  \  }}| j+                  |j,                  |j,                         -  | j+                  |j.                  |j.                         y )Nr  r  r  rJ   r{   r  )rS   rN   rP  r!   r  r  rO  rn  r  r  r  rs  _create_post_localSGD_optimizerrT   r1  r   r  rs  _perform_a_train_steprv  r  r  r  r  )rV   create_averagerr|  learning_rater  r  r  net_using_post_localSGD_opt	averager2post_localSGD_optr  r#  r  ru  r  r  s                   rX   $_test_post_localSGD_optimizer_parityzBDistributedTest._DistTestBase._test_post_localSGD_optimizer_parity  s    M((##;;g&++- II;(4 < C
 '(H++//#.."2}/EC*/((*;*;*S*Sg&++- II;(4 +T +' ()I $ D D+]I! KK 3 3 5 91=BBDE[[!4!4!6!:A>CCEFjjlG2Y 7**3WeVL++CNN,<=**%/ "NN$&A&L&L&N 7FB $$RWWbgg677" X]]INN;rY   c                 0    t        j                  dd      S )Nr{   rE   r  )r  r  r  s    rX   _create_periodic_model_averagerz=DistributedTest._DistTestBase._create_periodic_model_averager  s    22!"MMrY   c                     t        j                  t        j                  j	                  |j                         |      |      S )Nr  )r  r  )post_localSGD_optimizerPostLocalSGDOptimizerrS   r  r  rs  )rV   r  r  r  s       rX   r  z=DistributedTest._DistTestBase._create_post_localSGD_optimizer  s3    *@@kkoocnn&6=oI! rY   c                     |j                           ||      } |||      }|j                          |j                          y rk   )rB  r)  r  )rV   r}  r  r  r  r#  rc  r  s           rX   r  z3DistributedTest._DistTestBase._perform_a_train_step  s6    !ZF66*DMMONNrY   c                    d}t         j                  j                  j                  t	        j
                  t              j                         | j                  g      } |       }| j                  |||      } |       }| j                  |||      }t        j                  t        j                         dz  d      j                         }	t        j                  t        j                         dz  d      j                         }
t        j                         }t        d      D ]  }| j                  ||||	|
        | j                  dk(  r&t        j                   d|j#                         i|       t        j$                          dd	| j                  d
i}t        j&                  ||      }|j)                  |d          | j+                  |j,                  d       | j/                  |j,                  |j,                         |d   d= | j1                  d|d          | j3                  t4        d      5  |j)                  |d          d d d        | j/                  |j,                  d       y # 1 sw Y   &xY w)Nr  rj  rJ   r{   r  r   optimizer_state_dictcuda:0r4  rH  map_locationr  z]Loaded state dict does not contain a step counter for an averager. Setting step counter to 0.)expected_warningexpected_regex)rS   rN   rP  r!   r  r  rO  rn  r  r  rT   r1  r   r  rs  r  r9  r<  r  r   load_state_dictr  r  r  r  assertWarnsRegexUserWarning)rV   r  
chkpt_filer  r  r  r  r  dummy_post_localSGD_optr  r#  r  ru  r  
checkpoints                  rX   )_test_post_localSGD_optimizer_step_reloadzGDistributedTest._DistTestBase._test_post_localSGD_optimizer_step_reload  s>    !M*/((*;*;*S*Sg&++-499+ +T +' '(H $ D D+]H! ()I&*&J&J+]I'# KK 3 3 5 91=BBDE[[!4!4!6!:A>CCEFjjlG2Y **%/ yyA~

+->-I-I-KLj LLN$dii]&;<LJ\JJ#33J?U4VW 	2X]]INN; 126:VZ0F%GH &&!, - '  
 (7756 Y^^Q/ s   7I11I:c                     t         j                  j                  | j                         | j	                  | j
                  d       y NFr  rS   rn  r  r  r  r  r  s    rX   #test_post_localSGD_optimizer_parityzADistributedTest._DistTestBase.test_post_localSGD_optimizer_parityN  s9     JJ!!$)),5544" 6 rY   c                     t         j                  j                  | j                         | j	                  | j
                  d       y NTr  r  r  s    rX   0test_post_localSGD_optimizer_parity_grad_is_viewzNDistributedTest._DistTestBase.test_post_localSGD_optimizer_parity_grad_is_viewZ  s9     JJ!!$)),5544! 6 rY   c                 r    t        ddt        j                         fg      }t        j                  |d      S )N)rJ   rJ   r{   r  )r   r1  r   r  r  )rV   r  s     rX   #_create_hierarchical_model_averagerzADistributedTest._DistTestBase._create_hierarchical_model_averagerf  s8    %0&1d>Q>Q>S:T1U%V""<<'=A rY   c                     t         j                  j                  | j                         | j	                  | j
                  d       y r  rS   rn  r  r  r  r  r  s    rX   9test_post_localSGD_optimizer_parity_with_hierarchical_sgdzWDistributedTest._DistTestBase.test_post_localSGD_optimizer_parity_with_hierarchical_sgdl  s9     JJ!!$)),5588" 6 rY   c                     t         j                  j                  | j                         | j	                  | j
                  d       y r  r  r  s    rX   Ftest_post_localSGD_optimizer_parity_with_hierarchical_sgd_grad_is_viewzdDistributedTest._DistTestBase.test_post_localSGD_optimizer_parity_with_hierarchical_sgd_grad_is_viewy  s9     JJ!!$)),5588! 6 rY   c                     t         j                  j                  | j                         t	               5 }| j                  | j                  |       d d d        y # 1 sw Y   y xY wrk   )rS   rn  r  r  rg  r  r  )rV   rF  s     rX   (test_post_localSGD_optimizer_step_reloadzFDistributedTest._DistTestBase.test_post_localSGD_optimizer_step_reload  sP     JJ!!$)), " h>>88(  s   AA#c                     | j                  t        j                         | j                  t        j                         y rk   )>_test_DistributedDataParallel_SyncBatchNorm_with_memory_formatrS   channels_lastchannels_last_3dr  s    rX   8test_DistributedDataParallel_SyncBatchNorm_Channels_LastzVDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Channels_Last  s4     OO## OO&&rY   c                    | j                         \  }}}t        j                         }d}t        |dz        }t        |dz        }t        }	t        j                  |	      j                  |      }
t        j                  j                  |
|g      }|dddg|t        j                  u rg ndgz   }t        j                  |dt        j                  ij                  |      j                  |      }t        j                  |dt        j                  ij                  |      j                  |      }t        j                          }| j#                  |
|||||||d|t        j                         |       | j%                          y )NrJ   rj  r{   rk  r&  T)r  r1  r   r   ONLY_SBN_NETr  r  rn  rN   rP  r!   rS   r  rT   ra  rk  r  rI  r  )rV   r'  rZ  r[  r  r  r   	bs_offsetr!  r   rU  r>  shapes	input_gpu
target_gpur  s                   rX   r  z\DistributedTest._DistTestBase._test_DistributedDataParallel_SyncBatchNorm_with_memory_format  sk    '+&<&<&>#FIt //1MHD1HIMA-.I Ee,11$7I;;tf < I  Aq)#u':'::F
 V75;;7d-0  V75;;7d-0 
 ::<D   ##%+ !  MMOrY   c           
         | j                         \  }}}t        j                         }|g}d}t        |dz        }t        |dz        }| j	                  |||||       | j	                  |||||t        j                  d             |D 	cg c]#  }	t        j                  dt        |	      z         % }}	| j	                  |||||t        j                  d             y c c}	w )NrJ   rR  r  r   r!  rA  rn  )rR  r  r   r!  rA  rS  r4  )r  r1  r   r   r  rS   r  r   )
rV   rZ  r[  r  rK  r$  r   r  r!  r  s
             rX   *test_DistributedDataParallel_SyncBatchNormzHDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm  s     '+&<&<&>#FIt,,.J 6DHD1HIJN+I<<!#  =  <<!# #ll62 =  =AAqELL3q6!12ADA<<!# #ll62 =  Bs   (C#c                     | j                         \  }}}t        j                         }|g}d}t        |dz        }t        |dz        }| j	                  |||||d       y )NrJ   F)rR  r  r   r!  rA  r   r  r1  r   r   r  )	rV   rZ  r[  r  rK  r$  r   r  r!  s	            rX   4test_DistributedDataParallel_SyncBatchNorm_No_AffinezRDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_No_Affine  su     '+&<&<&>#FIt,,.J 6DHD1HIJN+I<<!#  = rY   c                 \   | j                         \  }}}|g}t        j                  d      }t        j                  |      }|j                  |d          t        j                  j                  t        j                  |            }|j                  |d          t        j                  j                  ||      }t        |      dz  }t        j                         |z  }	t        j                  |	d      }
t        j                  |	d      }t        j                         }t        j                   j"                  j%                  d      5  | j'                  |||
j                  |d         |j                  |d         ||||	d	       | j)                          d d d        y # 1 sw Y   y xY w)NrJ   r   rj  FT)r  rN   r   r  r  rn  r  r  rP  r!   r  r1  r   rS   rT   r  rD  r  r  rI  r  rV   rZ  r[  r  r$  r   rU  r>  r   r!  r"  r#  r  s                rX   3test_DistributedDataParallel_SyncBatchNorm_2D_InputzQDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_2D_Input  sc    '+&<&<&>#FIt 6DNN1%E e,INN47# ((??e@TUINN47#;;IRV;WI4y1}H++-8IIq1I[[A.F::<D
 %%++E2  $$NN47+KKQ(
      s   AF""F+c                 >   | j                         \  }}}|g}t        j                  d      }t        j                  |      }|j                  |d          t        j                  j                  t        j                  |            }|j                  |d          t        j                  j                  ||      }d}t        j                         }	t        j                  |	d      }
t        j                  |	d      }t        j                         }t        j                  j                   j#                  d      5  | j%                  |||
j                  |d         |j                  |d         ||||	d	       | j'                          d d d        y # 1 sw Y   y xY w)NrJ   r   rj  rH   FT)r  rN   r   r  r  rn  r  r  rP  r!   r1  r   rS   rT   r  rD  r  r  rI  r  r  s                rX   Ctest_DistributedDataParallel_SyncBatchNorm_Single_Input_Per_ProcesszaDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Single_Input_Per_ProcessE  sV    '+&<&<&>#FIt 6DNN1%E e,INN47# ((??e@TUINN47#;;IRV;WIH++-IIq1I[[A.F::<D
 %%++E2  $$NN47+KKQ(
      s   <AFFc                 R   | j                         \  }}}t        j                  j                  t        j                  |      |g      }g }t        t        j                               D ]t  }t        j                  t        j                  ddd|dz   z        d|dz
  z  z  t        j                  ddd|dz   z        d|dz
  z  z  gd      }|j                  |       v t        j                  |D cg c]A  }|j                  ddd      j                         j                  t        j                   d	      C c}d      j                  |      }	t        d
      D ]:  } |||   j                  |            }
|
j#                         j%                          < |j&                  j(                  |j&                  j*                  }}t        j,                  j/                  ||	j#                  d             t        j,                  j/                  ||	j1                  d             y c c}w )Nrj  rJ   rH   rE   r  r  r   r   r   rO  )r  rN   rP  r!   r  rn  rs  r1  r   rS   r.  r   r  permute
contiguousviewnum_featuresmeanr)  r   running_meanrunning_vartestingassert_closer   )rV   rZ  r[  r  r   r+  r  input_var_rankr^   all_input_varyr  r  s                rX   Itest_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_Running_ValuezgDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_Running_Valuet  s    '+&<&<&>#FItKK77!!$'TF 8 E I4..01 1!&

1aA731q5>J

1aA731q5>J "   01 "II ' IIaA&113889R9RTVW  d4j  3Z $)D/..t45!!#$
 ))(( &L MM&&|]5G5G5JKMM&&{M4E4Ea4HI!s   3AH$c                     | j                         \  }}}|g}t        j                         }|dz   }t        |dz   |z  dz        }t        |dz   |z  dz        }| j	                  |||||       y )NrJ   rz   r  r  )	rV   rZ  r[  r  r$  r  r   r  r!  s	            rX   Dtest_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_gradientzbDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_gradient  s     '+&<&<&>#FIt6D !//1MaxHTAX-12I]Q.-?!CDI<<!#  = rY   c                    | j                         \  }}}t        j                  t              }|j	                         }t
        j                  j                  j                  |      }t        j                  j                  |j                  |      |g      }t        j                  ddt
        j                  t        j                  |            } ||      }| j                  |j                   t
        j                         |j#                         j%                          |j'                         D ]6  }| j                  |j(                  j                   t
        j                         8 y )Nrj  rJ   )rk  r  )r  r  r  r  rc  rS   rN   r  r  rP  r!   rn  rT   r  r  r  rk  r  r)  rs  r  )rV   rZ  r[  r  r   r   r  r  s           rX   /test_DistributedDataParallel_SyncBatchNorm_halfzMDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_half  s     '+&<&<&>#FItMM&)EJJLEHH**AA%HEKK77

48HVZU[7\E++a%--T@RSC*CSYY6GGI ))+ B  !1!15==ABrY   c           	         t        j                         }t        j                  t              }|r2t
        j                  j                  |j                  |      |g      }nt
        j                  j                  |      }d}| j                  |      \  }}}}|r"|j                  |      }|j                  |      }|j                  d       t        d      D ]  }	||z  }
| j                  |||
|
|z    ||
|
|z    |d       | j                  |       |j                         }|	dkD  r|	dk  s|	dz  dk(  r| j                  |j!                  d      d       | j                  |j!                  d      d       | j                  |j!                  d	      d       | j                  |j!                  d      |j!                  d
             | j                  |j!                  d	      |j!                  d
             | j#                  |j!                  d      |	       n&|	dkD  r!| j%                  |j!                  d      |	       |t'        j(                  |         } |S )Nrj  rJ   r  rH   r   rE   forward_compute_timebackward_compute_timebackward_comm_time"backward_compute_comm_overlap_timerF  )r1  rb  r  r  rO  rN   rP  r!   rn  r$  $_set_ddp_runtime_logging_sample_raters  r.  r  r  r  r   r  r  rS   r8  )rV   is_gpur  r>  r   r?  r  r#  r  rD  rA  r  s               rX   _test_ddp_logging_dataz4DistributedTest._DistTestBase._test_ddp_logging_data  sU   ==?Dg.IKK??NN4(dV @ 	 KK??	J	 H.2.F.Fx.P+Jvt

4(T*::1=Ry .: %%&6H#456FX$56 //	: $-#B#B#D 7bC!GqL++(,,-CDa ++(,,-DEq ++(,,-ABA ++(,,-DE(,,-QR ++(,,-AB(,,-QR $$%5%9%9+%FL1W ''(8(<(<[(I3O ennZ89].:` rY   c           
      2   d }t        j                  t         j                  j                         | j	                         \  }}}| j                  d      }|j                         }| j                  |j                  d      t        j                                | j                  |j                  d      t        j                                | j                  |j                  d      d       | j                  |j                  d      d	       | j                  |j                  d
      d       | j                  |j                  d      d       | j                  |j                  d      d       | j                  |j                  d      d       | j                  |j                  d      d       | j                  |j                  d      t        j                  |             | j                  |j                  d      d       t        |j                               }d}d}t        t        d |            }|D ]+  }	|dz  }||	j                         |	j!                         z  z  }- | j                  |j                  d      d       | j                  |j                  d      |       | j                  |j                  d      |       | j                  |j                  d      t#        |             | j                  |j                  d       |d             | j                  |j                  d       |d             | j                  |j                  d        |d!             | j                  |j                  d"       |d#             |j                  d      d$k(  rq| j                  |j                  d%       |d&             | j                  |j                  d'       |d(             d)}
| j                  |j                  d*      |
       | j                  |j                  d+      d        | j                  |j                  d,      d        | j                  |j                  d-      d        | j                  |j                  d.      d        | j                  |j                  d/      d        | j                  |j                  d0      d        | j                  |j                  d1d      d       | j                  |j                  d2      d       | j                  |j                  d3      t#        |             |j                  d4      }t        t%        t'        d5      D cg c]  }t#        |       c}            }| j                  |d6j)                  |             |j                  d7      }| j                  |d8j)                  |             | j+                  |j                  d9      d       | j+                  |j                  d:      d       | j+                  |j                  d;      d       | j+                  |j                  d:      |j                  d<             | j+                  |j                  d;      |j                  d<             |j                  d=      }|j                  d>      }|j                  d?      }|j                  d@      }|j                  dA      }| j+                  ||       | j+                  ||       | j+                  ||       | j+                  ||       t-               }|j/                          |j0                  j3                          t4        j6                  j9                  |dBC      }|j                         }t        |j                               }| j                  |j                  d      t;        dD             |d   j                         |d   j!                         z  |d   j                         |d   j!                         z  g}| j                  |j                  d      d6j)                  dE |D                     | j                  |j                  d      dF       y c c}w )GNc                 P    | t         j                  v rt         j                  |    S dS )Nr  r  r  s    rX   	parse_envzJDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.parse_env  s     *-*;rzz#FFrY   Fr  rK  r  module_namer   rL  r  rS  r   broadcast_buffersrH   bucket_cap_bytesi  find_unused_parametersr   rM  backend_namerF  r  c                     | j                   S rk   r   )	parameters    rX   rB  zIDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.<lambda>.  s    93J3J rY   r<  ra  total_parameter_size_bytesnum_parameter_tensorsbucket_sizesmaster_portr  master_addrr  torch_distributed_debugTORCH_DISTRIBUTED_DEBUGcuda_visible_devicesCUDA_VISIBLE_DEVICESr!  gloo_socket_ifnameGLOO_SOCKET_IFNAMEgloo_device_transportGLOO_DEVICE_TRANSPORTrJ   gloo_num_threadsnccl_socket_ifnamenccl_blocking_waitnccl_async_error_handling
nccl_debugnccl_nthreadsnccl_ib_timeoutunused_parameter_sizehas_rebuilt_bucketsrebuilt_bucket_sizes'prev_iteration_grad_ready_order_indicesrz   ,  rebuilt_per_bucket_param_indices avg_forward_compute_timeavg_backward_compute_timeavg_backward_comm_time&avg_backward_compute_comm_overlap_timeforward_compute_time_startbackward_compute_time_startbackward_compute_time_endbackward_comm_time_startbackward_comm_time_endg      ?)r6  g      8Ac              3   2   K   | ]  }t        |        y wrk   )r   )r  r^   s     rX   r  zJDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.<genexpr>  s     7Q#a&7s   zdouble, float)r1  set_debug_levelr4  INFOr  r  r  r  r   r   rb  r  r  rs  filternumelelement_sizer   reversedrs  rR  r  r   ra  r   rb  rN   rP  r!   r   )rV   r  ru  r  r>  r  rl  
num_params
param_sizer   default_gloo_threadsgrad_ready_orderr^   expected_orderbucket_indicesfwd_host_side_timebwd_comp_start_host_side_timebwd_comp_end_host_side_timebwd_comm_start_host_side_timebwd_comm_end_host_side_timer   r  s                         rX   test_ddp_logging_data_cpuz7DistributedTest._DistTestBase.test_ddp_logging_data_cpu  s   G   !5!56!335NAx3353AI(>>@-11,?ATATAVW-11&94==?K-11-@%H-11,?D -11/BBG-112EFJ-112DEGWX-112JKQO-112KLaP $$^4d6F6Fx6P -11+>C)..01FJJ&!JFSTF ;a
aggi!..*:::
; -11(;WE $$%ABJ -112IJJW-11.A3z?S $$]3Y}5M  $$]3Y}5M  $$%>?34  $$%;<01  ##N3v=  $(()=>23   $(()@A56 ()$  $(();<(
 -112FGN-112FGN-112MNPTU-11,?F-11/BDI-112CDdK
 -112I1MqQ-112GH!L $$%;<c*o  0339  "(E!H+EqCF+E"FGN-tyy/HI-112TUN^SXXn-EF ##$4$8$89S$TVWX## $$%@A1 ##$4$8$89Q$RTUV## $$%@A $$%MN ## $$%=> $$%MN
 "2!5!56R!S,<,@,@--) +;*>*>++' -=,@,@*-) +;*>*>?W*X'##+-J ##-/L ##+-J ##$ACUV JEKKMII;;EQT;UI(>>@)..01F $$%78#>O:P q	!F1I$:$:$<<q	!F1I$:$:$<<L  $$^4		7,77 -11(;_My ,Fs   5bc           
         | j                         \  }}}| j                  d      }|j                         }| j                  |j	                  d      t        |             | j                  |j	                  d      |       |j	                  d      }t        t        t        d      D cg c]  }t        |       c}            }| j                  |dj                  |             |j	                  d      }	| j                  |	d	j                  |             | j                  |j	                  d
      d       | j                  |j	                  d      d       | j                  |j	                  d      |j	                  d             | j                  |j	                  d      |j	                  d             |j	                  d      }
|j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }| j                  ||       | j                  ||       | j                  ||       | j                  ||
       y c c}w )NTr  rL  rS  r  rz   r  r  r  r  rH   r  r  r  r  r  r  r  r  )r  r  r  r  r   r   r  r  rs  rR  r  )rV   rZ  r[  r  r>  r  r"  r^   r#  r$  r%  r&  r'  r(  r)  s                  rX   test_ddp_logging_data_gpuz7DistributedTest._DistTestBase.test_ddp_logging_data_gpu  sR    '+&<&<&>#FIt3343@I(>>@-11,?TK-11/BDI/339  "(E!H+EqCF+E"FGN-tyy/HI-112TUN^SXXn-EF ##$4$8$89S$TVWX## $$%MNPQ ## $$%@A $$%MN ## $$%=> $$%MN
 "2!5!56R!S,<,@,@--) +;*>*>++' -=,@,@*-) +;*>*>?W*X'##+-J ##-/L ##+-J ##$ACUVQ ,Fs   *Ic           	      |   t         j                  j                  t              }d}| j	                  t
        |      5  d}| j                  |      \  }}}}t        j                         |z  }| j                  |||||z    ||||z    |d       |j                          d d d        t        ||       y # 1 sw Y   xY w)Nz,should be called before training loop startsrJ   rH   )rN   rP  r!   rO  r  r  r$  r1  rb  r.  rQ  r5   )	rV   r>  expected_errr   _batch_sizer  r#  r  rA  s	            rX   test_static_graph_api_cpuz7DistributedTest._DistTestBase.test_static_graph_api_cpu  s     ;;GDIIL''lC .373K3KH3U0UFD83 %%&6H#456FX$56 ++-.  $I|<!. .s   A!B22B;c                 T   d}t         j                  j                  |g      }t        j                  j                         }t        j                  j                  t        j                  |      |      }|j                  d   j                  j                  }| j                  ||       y ra  )rS   r#  r  r  r  r  rN   r  r  r  r  layer1bn1rm  r  )rV   process_idsrm  res50_modelres50_model_syncprocess_group_syncs         rX    test_SyncBatchNorm_process_groupz>DistributedTest._DistTestBase.test_SyncBatchNorm_process_group  s     K!--77FM%,,557K!//FFk*M  "2!8!8!;!?!?!M!M/?rY   c                     |t         j                  k7  r|t        d| d      |5 ||||       t        j                         |k(  r| j	                  ||       y y  |||       | j	                  ||       y )NzReduction fn z must specify dst!)r1  r  r  rb  r  )rV   r   r  r  reduction_fnr  s         rX   _run_reduction_testz1DistributedTest._DistTestBase._run_reduction_test  sy     t.3; =>P!QRRVS"-==?c)$$V_= * VR(  9rY   c                    t         j                  j                  | j                         | j                  dz  dk(  }t        j
                  j                  t        j
                  j                  fD ]  }t        j                  ||g      j                  | j                        }| j                  |t        j                  ddg      j                  | j                        |       t        j                  ddg      j                  | j                        }|j                         }| j                  |||        t        j
                  j                  t        j
                  j                  fD ]s  }t        j                  ||g      j                  | j                        }| j                  |t        j                  ddg      j                  | j                        |       u y )NrJ   r   FT)rS   rn  r  r  r1  r  r  r  r   rk  r;  r%  r  r  )rV   elementr  r  r  s        rX    test_nccl_backend_bool_allreducez>DistributedTest._DistTestBase.test_nccl_backend_bool_allreduce  si    JJ!!$)),ii!mq(G}},,dmm.?.?@ 	L$||Wg,>?BB499M(( %,,u~">"A"A$))"Lb
  %||T4L9<<TYYG"."4"4"6((K	L }}(($--*;*;< $||Wg,>?BB499M(( %,,d|"<"?"?		"JBrY   c                    t         j                  j                  | j                         ddgddgd}t        j                  || j                  dz           j                  | j                        }|j                         }t        t        j                               D cg c]2  }t        j                  ddg      j                  | j                        4 }}t        j                  ||       | j                  t        |      t        j                                t        |      D ]K  \  }}t        j                  ||dz           j                  | j                        }| j                  ||       M | j                  ||       y c c}w )NTFr   rH   rJ   )rS   rn  r  r  r   rk  r%  rs  r1  r   r  r  r  r  )	rV   r   r  input_tensor_copyru  r  r  r   rp  s	            rX    test_nccl_backend_bool_allgatherz>DistributedTest._DistTestBase.test_nccl_backend_bool_allgather-  sA    JJ!!$)),Tlt}5C <<DIIM(:;>>tyyIL , 2 2 4 t2245 eU^,//		:K  OOK6S-t/B/B/DE!+. .1 <<AE
366tyyA  H-.
 .=s    7Fc                    t         j                  j                  | j                         ddgddgd}t        j
                  j                  t        j
                  j                  fD ]  }t        j                  || j                  dz   dz           j                  | j                        }t        j                  ddg      j                  | j                        }| j                  |||t        j                  d       t        j                  ddg      j                  | j                        }|j                         }| j                  |||t        j                  d        t        j
                  j                  t        j
                  j                  fD ]  }t        j                  || j                  dz           j                  | j                        }| j                  dk(  r0t        j                  ddg      j                  | j                        n|j                         }| j                  |||t        j                  d        y )NTFr@  rH   rJ   r   rz  )rS   rn  r  r  r1  r  r  r  r   rk  r;  r
   r%  r  r  )rV   r   r  r  rp  r  s         rX   test_nccl_backend_bool_reducez;DistributedTest._DistTestBase.test_nccl_backend_bool_reduceC  s    JJ!!$)),Tlu~6C}},,dmm.?.?@ $||CQ!0C,DEHHS <<7::499E((xT[[VW(X  %||T4L9<<TYYG"."4"4"6(( /2t{{ )  }}(($--*;*;< Y$||C		A,>?BB499M yyA~ LL$.11$))<%++- 
 ((xT[[VW(XYrY   c           	      r   d}t        j                  t        |      D cg c]*  }| j                  dk(  rt	        j                         dk  nd, c}      j                  | j                        }t        j                  |d       t        t        j                               D cg c]G  }t        j                  t        |      D cg c]  }d c}      j                  | j                        I }}t        j                  ||       |d   }|dd  D ]  }| j                  ||        y c c}w c c}w c c}w )NrE   r   rp  Fra  rH   )rS   r   rs  r  randomrk  r1  r  r   r  r  )rV   tensor_sizeru  bcast_tensorr  rp  r   s          rX    test_nccl_backend_bool_broadcastz>DistributedTest._DistTestBase.test_nccl_backend_bool_broadcast_  s    K << #;/ /3ii1nV]]_s*%G
 bm  NN<Q/ t2245 U;-?@e@ADDTYYOK  OOK6"1~H%ab/ 3  23 As   /D*$!D4	D/"D4/D4c                 V    t        j                         dz   dz   }t        |      D cg c]3  }t        j                  d      j                   j                        |z  5 }}t        dz  dz
  d      }t        |      D cg c]3  }t        j                  d      j                   j                        |z  5 }}t        |d      }|j                  |j                  }}|z  dk7  rt        j                  |z
  z        n|z  }	 j                  ||	        j                  ||z         t        t        |            }
 j                  t!        |
      |        fd} ||       t        |      }|j                  |j                  }} j                  |t        j                  |z                j                  ||z         t        t        |            }
 j                  t!        |
      |        ||       t        |      }|j                  |j                  }} j                  |t        j                  |z                j                  ||z         t        t        |            }
 j                  t!        |
      |        ||       y c c}w c c}w )	NrO  rH   rJ   T)dataset	drop_lastr   c                    t              D cg c]1  }t        j                  dg      j                  j                        3 }}t        j                  |t        j                  | g      j                  j                               |D cg c]  }|j                          }}j                  t        t        |            d       y c c}w c c}w Nr   rH   )rs  rS   
LongTensorrk  r  r1  r  r   r  r  r  r  )local_num_samplesru  world_samplessamplerV   rK  s       rX   validate_global_samplesz^DistributedTest._DistTestBase.test_DistributedSampler_padding.<locals>.validate_global_samples  s     BGzAR!<=E$$aS),,TYY7! ! !5<<1B0C#D#G#G		#R >K K6 K K  S%7!8!<! !Ls   6CC)rK  )r1  r   rs  rS   r   rk  r  rk  r?   num_samples
total_sizemathceilr  r  r  r  )rV   dataset_sizer  rK  dataset_tiny_sizedataset_tinydist_samplerrP  local_dataset_sizeeffective_dataset_sizeindices_listrS  dist_sampler_added_samplesdist_sampler_added_samples_tinyrK  s   `             @rX   test_DistributedSampler_paddingz=DistributedTest._DistTestBase.test_DistributedSampler_paddingt  s    ,,.J +a/L@El@ST1uzz!}''		2Q6TGT !$J!Oa$7 ;9>?P9Q45

1  +a/L 
 .gNL((''  2  *,1 		<*4
BC!J. #
 .0FG/1BZ1OP\ 23LS.0AB	= $$56 *<G)L&*66*55  2 .		,:S0TU/1BZ1OP%? @ALS.0AB $$56 /A.V+/;;/::  2 !499->-K#L /1BZ1OP%D EFLS.0AB#$56M Us   8J!8J&c           	      z   t         j                         }t        j                  d   }|dk(  rD| j                  dz   t        | j                        z  }t        j                  j                  |       |dk(  r0|j                  t        t        j                  ddd                   t        t        j                               D cg c]  }d  }}t        j                   ||| j                  t#        |      z     |       t%        |      D ](  \  }}||t#        |      z     }	| j'                  ||	       * y c c}w )Nr   r  rH   rz   r   r  r  )COLLECTIVES_OBJECT_TEST_LISTr  r   rJ  r  r   rK  rS   rn  r  r  ri   rT   rs  r1  r   r  r  r  r  )
rV   rE  gather_objectsr=  	next_rankru  output_gatheredr  r  rp  s
             rX   _test_allgather_objectz4DistributedTest._DistTestBase._test_allgather_object  s    :>>@Njj+G& !YY]c$//.BB	

%%i0 & %%c%++a1*E&FG-243F3F3H-IJtJOJ""tyy3~+>>? $O4 03)!c..A*AB  h/0 Ks   	D8gpur   )OFFr  r5  )levelsc                 "    | j                         S rk   )rg  r  s    rX   !test_all_gather_object_default_pgz?DistributedTest._DistTestBase.test_all_gather_object_default_pg  s     ..00rY   )r5  ri  r  c                     t               }t        j                  |      }t        j                  |      }| j	                  |      S )Nr<  )rE  )r   r1  r  r  rg  rV   r  r=  rE  s       rX   test_all_gather_object_subgroupz=DistributedTest._DistTestBase.test_all_gather_object_subgroup  s>     )*G&&w/G~~g6H...AArY   c           	         t         j                         }t        j                  |      }t        j
                  d   }|dk(  rD| j                  dz   t        | j                        z  }t        j                  j                  |       |dk(  r0|j                  t        t        j                  dd|                   t        t        j                   |            D cg c]  }d  }}d}t        j"                  || j                  t%        |      z     ||k(  r|nd ||       ||k7  r<| j'                  |t        t        j                                D cg c]  }d  c}       n6t)        |      D ](  \  }	}
||	t%        |      z     }| j'                  |
|       *  G d d	      } |       }t        t        j                                D cg c]  }| }}| j+                  t,              5  t        j.                  t        t        j                                D cg c]  }d  c}|| j                     |
       d d d        y c c}w c c}w c c}w c c}w # 1 sw Y   y xY w)Nr   r  rH   rz   r  r   )object_gather_listr  r  c                       e Zd Zy)>DistributedTest._DistTestBase._test_gather_object.<locals>.BarN)rc   rd   re   ry   rY   rX   Barrs    s    rY   rt  r  )rc  r  r1  rb  r   rJ  r  r   rK  rS   rn  r  r  ri   rT   rs  r   gather_objectr  r  r  r#  r  r  )rV   pgrd  my_rankr=  re  ru  rf  gather_on_rankr  r  rp  rt  rQ   s                 rX   _test_gather_objectz1DistributedTest._DistTestBase._test_gather_object  s   9>>@NmmB'Gjj+G& !YY]c$//.BB	

%%i0 & %%c%++a7*K&LM-243F3Fr3J-KLtLOLNtyy3~+>>?n, $3" .(  #E$:M:M:O4P%Qqd%Q (8 4FAs-a#n2E.EFH$$S(34
  A).t/B/B/D)EFAaFNF"">2 &&#()<)<)>#?@aT@"499- 3 M &R G A s0   	I 	I
	I
(/I	I
 IIIc                 "    | j                         S rk   )ry  r  s    rX   test_gather_objectz0DistributedTest._DistTestBase.test_gather_object#  s     ++--rY   c                     t               }t        j                  |      }t        j                  |      }| j	                  |      S Nr<  )r   r1  r  r  ry  rn  s       rX   test_gather_object_subgroupz9DistributedTest._DistTestBase.test_gather_object_subgroup+  s;     )*G&&w/G~~g6H++H55rY   c                 V   t        |j                  j                         j                               }|D ]n  }t	        t        j                               D cg c]  }t        j                  |       }}t        j                  ||       |D ]  }| j                  ||        p y c c}w rk   )r  r   r<  rN  rs  r1  r   rS   r   r  r  )rV   r  net_module_statesr   ru  r  r   s          rX   validate_net_equivalencez6DistributedTest._DistTestBase.validate_net_equivalence6  s     $SZZ%:%:%<%C%C%E F& 016t7J7J7L1M,-E$$Q'  Q/) 0F$$VQ/00s   B&c                 L   d}| j                   }d}t        j                  |       t        j                  ||d      }t        j                  j
                  j                  |j                  |      | j                   gd      }t        j                  ||d      j                  |      }t        j                  |      |_
        t        |j                  j                         j                               }|D ]  }t        t        j                                D 	cg c]  }	t        j"                  |       }
}	t        j$                  |
|       t'        |
      D ]/  \  }}||k(  r| j)                  ||       | j+                  ||       1  t-        |j                  |j.                  |j0                  ||j2                         | j5                  |       ||k(  rE|j                         j                         }t7        ||      D ]  \  }}| j)                  ||        y y c c}	w )NrJ   rH   FrF   rL  r6  )r   rm  broadcast_bucket_sizer  params_and_buffers_to_ignore)r  rS   r  rN   rO   rP  r!   rn  r  r  r   r  r<  rN  rs  r1  r   r   r  r  r  r  r   rm  r  parameters_to_ignorer  r  )rV   r   r  rank_to_broadcastr   r  	new_modelr  r   ru  r  r  r   expected_statesrp  s                  rX   test_ddp_sync_module_statesz9DistributedTest._DistTestBase.test_ddp_sync_module_statesB  s    C99D !d#IIc3U3E((##;;

4 dii[ < C 		#s7<<TBIy1CJ $SZZ%:%:%<%C%C%E F& 
716t7J7J7L1M,-E$$Q'  Q/!*;!7 7IAvDy((F3 ++Av67
7  zz!//&)&?&?%-0-E-E ))#. (("+"6"6"8"?"?"A#&'8/#J 2KAx$$Q12 )-s   H!c                    d}d}d}| j                   }t        j                  ||d      }t        j                  ||| j                         |z  }t        j                  j
                  j                  |j                  |      | j                   gd      }d}| j                   d	kD  r|d
z  }|j                  d      5  t        |      D ]  }	 ||      j                         }
|
j                          t        j                  ||| j                         |z  }t        t        |j                                     }| j                  ||j                          |j#                          t        j                  j%                  | j                           	 d d d        |j                  d      5  t        |      D ]  } ||      j                         }
|
j                          t'        j(                         }|dk\  r|dz  }t        j                  ||| j                         |z  |z  t'        j(                         z  }t        t        |j                                     }| j                  ||j                          |j#                          t        j                  j%                  | j                           	 d d d        y # 1 sw Y   0xY w# 1 sw Y   y xY w)Nr   rH   r   FrF   r  r  rz   r   rJ   )divide_by_initial_world_sizeT)r  rN   rO   rS   r   rP  r!   rn  rR  rs  r  r)  r  r  rs  r  r  rB  synchronizer1  r   )rV   r   r  
grad_scaler  r   r   r  n_itersru  r  r  r  r  effective_wss                  rX   test_ddp_grad_div_uneven_inputsz=DistributedTest._DistTestBase.test_ddp_grad_div_uneven_inputss  sK    CEJ99DIIc3U3E**UC		:ZGC((##;;

4 dii[ < C Gyy1}1u= =w =As8<<>DMMO
 %*JJsC		$JZ$WM cnn&6!78E$$]EJJ?MMOJJ**$))*<==" t< =w =As8<<>DMMO#'#6#6#8LAv$)

3DII>$%&' ++-	%.M
 !cnn&6!78E$$]EJJ?MMOJJ**$))*<== =#= ="= =s   >CK-D
KKKc                    d}d}d}t         j                  j                  | j                         t	        j
                  ||d      }t        j                  ||| j                        }t         j                  j                  j                  |j                  | j                        | j                  g      }|t        j                  |      }|5 }	t        |      D ](  }
 ||      j                         }|j                          * 	 ddd       t        j                          d	}t!        |	d
      }t        d |D              }| j#                  ||       |D ]9  }| j%                  |j&                         | j#                  |j(                  |       ; t        j                          d}t!        ||	d
      }t        d |D              }| j+                  |d       |D ]  }| j#                  |j(                  |         t         j                  j                  j                  |j                  | j                        | j                  gd
      }t        d      D ](  }
 ||      j                         }|j                          * |5 }	 ||      j                         }|j                          ddd       t!        ||	d
      }| j+                  t-        |      d       | j+                  |d   j.                  d       | j#                  |d   j(                  |       |D ]  }| j%                  |j&                          t!        d|	      }| j#                  t-        |      d       |	S # 1 sw Y   fxY w# 1 sw Y   xY w)a  Runs DDP based model training and captures profiles.
            This test will do two profiler runs.
            1. An inital basic run to check if profiler events are correctly captured.
            2. A second profiling pass after running some iterations of DDP, to check robustness of thread local state.

            args
                profiler_ctx : Profiler context manager for pass 1
                profiler_ctx2 : Profiler context manager for pass 2.
                    This can be left out as None, in which case a deepcopy
                    of profiler_ctx is used.
            Returns:
                prof: Instantiated profiler object that can be used for post analysis.
            rz   rE   ru  FrF   r  rj  Nr  Tr  c              3   4   K   | ]  }|j                     y wrk   r  r  s     rX   r  zDDistributedTest._DistTestBase._test_ddp_profiling.<locals>.<genexpr>  s     6!agg6r  r`  c              3   4   K   | ]  }|j                     y wrk   r  r  s     rX   r  zDDistributedTest._DistTestBase._test_ddp_profiling.<locals>.<genexpr>  s     @!agg@r  rH   rL  r  r   search_unused_parameters)rS   rn  r  r  rN   rO   randrP  r!   r  r  rs  r  r)  r1  r  r   r  r)  r  r   r  r  r  )rV   r  profiler_ctx2r  r   rB  r   r   r  r   ru  r  all_reduce_event_namer   r  r   broadcast_event_namebroadcast_eventss                     rX   _test_ddp_profilingz1DistributedTest._DistTestBase._test_ddp_profiling  s    ECIJJ!!$)),IIc3U3E**UC		:C((##;;

499% II; < C $ $l ; $y) $As8<<>DMMO$$
 (,'7'7'9&:+$F!()>`deF6v66K[)4 D/  -BCD '+&6&6&8%9#D 23Gimn@/?@@K##K3) C  -ABC ((##;;

499% II;'+ < C
 1X  3x||~    $3x||~  ))>`deF##CK3##F1IOOQ7VAY^^-BC 0/0 ))CTJFS[!,K_$ $B   s   7M.'M;.M8;Nz'Currently failing in NVIDIA internal CIc                 v    t         j                  j                  j                         }| j	                  |      S r  )rS   rz  r   r   r  r"  s     rX   $test_ddp_profiling_autograd_profilerzBDistributedTest._DistTestBase.test_ddp_profiling_autograd_profiler  s2     %*NN$;$;$C$C$E!++9N+OOrY   c                    t         j                  j                  j                  }t         j                  j                  j                  }t         j                  j                  ||g      }| j                  |      }t        j                         dk7  ry t        |      }| j                  t        |      d       | j                  |      }| j                  t        |d         d       | j                  t        |d         d       |d   d   }| j                  |d	   d
|        | j                  |d   d|        |d   d   }| j                  |d	   d|        | j                  |d   d|        y )Nr~  r  r  r   rV  rJ   r  rH   r  rO  r  rk  FloatInt)rS   r   r    r  r   r   r  r1  r  r   r  r  r  r  )	rV   cpu_actcuda_actr'  r   r  	nccl_metaa0a1s	            rX   !test_ddp_profiling_torch_profilerz?DistributedTest._DistTestBase.test_ddp_profiling_torch_profiler  sm    nn5599G~~66;;H!&!7!7GXCV!7!W++9K+LD!V+  6d;s#34a8==>NOI S;!78!<S6!23Q7 ;'*BR 013rdDR['"?;'*BR 011RDBR[%t=rY   et_filec                 D   t        |      5 }t        j                  |      }ddd       d   D cg c]  }|d   dk(  s| }}| j                  t	        |      d       |d   D cg c]  }|d   dk(  s| }}| j                  t	        |      d       t        t              }|D ]  }|j                  dg       }| j                  t	        |      d	       |D 	ci c]  }	|	d   |	d
    }
}	|
j                  dd      }| j                  |d       | j                  |
j                  dd      d       ||   j                  |
       |dv r| j                  |
d   d       | j                  |
d   d       | j                  |
d   d       | j                  |
j                  dd      d	       | j                  |
j                  dd      d	       | j                  d|
j                         v        | j                  d|
j                         v        | j                  |
j                  dd      d	       | j                  |
j                  dd      d        | j                  t	        |d         d       | j                  t	        |d         d       |d   d	   }| j                  |d   d|        | j                  |d   d |        |d   d   }| j                  |d   d|        | j                  |d   d!|        y# 1 sw Y   xY wc c}w c c}w c c}	w )"zTorch profiler includes nccl metadata in an inserted operator called "record_param_comms"
            We test for basic fields in theese nodes in the Execution Trace.
            Nnodesr   z## process_group:init ##rH   r   rz   attrsr   rq   collective_namer  rk  >   r  pg_namer  pg_descr  pg_sizerJ   in_msg_nelemsr   out_msg_nelemsin_split_sizeout_split_sizeglobal_rank_startglobal_rank_striderV  r  rO  r  r  r  )r   r   r   r  r  r  r   r  r   r  r  r  r)  keys)rV   r  r   etr  pg_cfg_nodenccl_meta_nodesr  
attrs_listrP   r  r  r  r  s                 rX   _validate_execution_trace_ncclz<DistributedTest._DistTestBase._validate_execution_trace_nccl"  s	    g "!YYq\"&(k]QvYB\5\1]K]##C$4a8*,W+[Q6FZ9Zq[O[S115'-M % IUU7B/
""3z?A68BC16AgJ.CC 99%6;##Hb1##EIIgr$:B?h'..u5x'  y!137  y!1<@  y!115''		/2(FJ''		2BB(GK5::< ?@ 0EJJL @A  +>!CQG  +?!DaH-I2 S{!;<a@Sv!67; {+A.BR 013rdDR['"?{+A.BR 011RDBR[%t=W" "][ Ds'   LLL'L5LLLzTests nccl metadata primarily.c                 X   | j                  t        j                         d       t        j                  ddd      }|j                          |j                  }t               j                  |      }t        j                  j                  t        j                  t        j                  g      }t        j                  j                  t        j                  t        j                  g|      }| j                  ||       t!        d	|j                          | j#                  |       y )
Nr  r   z.et.jsonF)r   r   r  )r~  execution_trace_observer)r  r  zExecution trace saved at )r  r1  r  r   r   r   r   r   register_callbackrS   r   r   r    r  r   r  r   r  )rV   fpr  r  torch_profiler_ctx1torch_profiler_ctx2s         rX   "test_ddp_profiling_execution_tracez@DistributedTest._DistTestBase.test_ddp_profiling_execution_traceT  s     T--/8,,U:eTBHHJggG');;GDB #(.."8"8,002B2G2GH #9 # #(.."8"8,002B2G2GH)+ #9 # $$01 % 
 -bggY78//8rY   c                     d}d}d}t        j                  ||d      }t        j                  ||| j                        }t        j                  |      }|j                  | j                        }t        t        j                               D ci c]  }|d|dz   z   }}t        |j                               }	t        j                  j                  |j                         |	      }
t        |	      D ]J  }|
j!                           ||      }|j                         }|j#                          |
j%                          L || j                     }t        j                   j&                  j)                  |j                  | j                        | j                  g
      }t        j                  j                  |j                         |t        j                         z  	      }|j+                         5  t        |      D ]t  }|j!                           ||      }|j                         }|j#                          t        j                  j-                  | j                         |j%                          v 	 d d d        t/        |j1                         j3                         |j4                  j1                         j3                               D ]  \  \  }}\  }}| j7                  ||        y c c}w # 1 sw Y   }xY w)Nrz   rE   r  FrF   r  rJ   rH   r  rj  )rN   rO   rS   r  r  r  r  rn  rs  r1  r   r  rN  r  r  rs  rB  r)  r  rP  r!   rR  r  r  r<  ru   r   r  )rV   r  r   r  r   r   local_modelr  rank_to_iter_mappinglocal_iterslocal_optimru  r  r  rB  r  	ddp_optimlocal_tensordist_tensors                      rX   test_ddp_join_model_equivalencez=DistributedTest._DistTestBase.test_ddp_join_model_equivalencev  sw    EC MIIc3U3E**UC		:C--.K%**4995K16t7J7J7L1M$)-a4!8n$$  $ 299;<K++//+*@*@*B}/UK;' #%%'!#&wwy  "# -TYY7I((##;;

499%499+ < C   "}t7J7J7L'L ( I  %y) %A'')c(C779DMMOJJ**$))*<NN$%% 8;&&(..0#**2G2G2I2O2O2Q8 <3!L#3A{   {;<?$*% %s   K/4BK44K=c           	         |j                   }|j                  }| j                  }|j                  }t        j
                  j                  |       t        j                          t        j                  j                  j                  |j                  |      |gd|      }|j                  >|j                  |j                  |j                         t        d|j                          ||   }	t	        j                   |	gt        j
                  j#                               }
t        j$                  |
t        j&                  j(                         |
j+                         }d}|j,                  rA||	k(  r%| j/                  t0        d| j                   d      }n!| j/                  t0        d	      }n
t3               }|5  |j5                  |j,                  
      5  t7        |	      D ]  }||z  dk7  r|j9                         }n
t3               }|5  t;        |t<              r || j?                         }n ||      j?                         }|jA                          | jC                  |       t        j
                  jE                  |       d d d        |dz  } 	 d d d        d d d        |j,                  r| jG                  ||       n| jI                  ||       t        j
                  jE                  |       |j,                  s6| jK                  |jL                         t	        j                   |jL                  g| j                        }t7        t        jN                               D cg c]  }t	        jP                  |       }}t        jR                  ||       t        jN                         dz
  }| jU                  |h|D ch c]  }|j+                          c}       | jW                  |       |jY                         }| jK                  |j[                  d             t        j                          y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY wc c}w c c}w )NrH   )rL  r6  r  zregistered hook r  )r  r   Rank  exhausted all inputs1Detected at least one rank that exhausted inputs.r   join_uneven_inputs).r   r   r  r   rS   rn  r  r1  r  rN   rP  r!   r   r  r   r   r   current_devicer  r  r  r  r   r  r  r   rR  rs  rA  rn   r   r  r)  r  r  r  r  r)  _authoritative_rankr   r   r  assertSetEqualr  r  r   )rV   	test_caseiteration_mappingfind_unused_paramsr   r   r  r   r  rB  num_iters_tensormin_num_iterstotal_itersr:  r  contextr  final_rank_tensorru  r  max_rankr   r  s                          rX   _run_uneven_inputs_testz5DistributedTest._DistTestBase._run_uneven_inputs_test  s    OOE--C99D%33MJJ!!$'LLN ((##;;

4  6'9	 < C ~~)&&y	G((89: *$/I  %||EJJ$=$=$?  OO,1B1BC,113MK33 I-$($:$:$dii[8M&N%M
 %)$:$:$K%M
 !, )XX/8/S/S   ) #9- ) },1&)kkmG&1mG$ 
@)#u5'*Cy}}'*3x||~ MMO ,,S1 "JJ22$2?
@ $q(%))). 33  m< '']C JJ""$"/ 77 7 78$)LL,,-dii%!
 #4#6#6#89 $$%67  ->?..014##J[ I6 I --c2#&#<#<#>  0 4 45I JK) 81
@ 
@) )) )R !JsI   >Q4QA<Q 	QQ/Q'Q,
 Q
QQ	QQ$c                     G d dt         j                  j                        }t         j                  j	                  | j
                         t        }t        j                  j                  t        j                  |            j                  | j
                        } |       j                  | j
                        }t        j                  dd      j                  t         j                  j                               }||fD ]$  }t         j                  j                  j                  || j
                  g      }d}| j
                  dk7  r'|}| j                  t         d| j
                   d	      }n|dz  }| j                  t         d
      }d}	|5  |j#                  d      5  t%        |      D ]>  }
 ||      j'                         }|j)                          | j+                  |       |	dz  }	@ 	 d d d        d d d        | j-                  |	|       | j/                  |       ' y # 1 sw Y   8xY w# 1 sw Y   <xY w)Nc                   &     e Zd Zd fdZd Z xZS )bDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn.<locals>.ModelWithCommc                 \    t         |           t        j                  ddd      | _        y )NrJ   r   FrF   rL   rM   rN   rO   r  rU   s    rX   rM   zkDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn.<locals>.ModelWithComm.__init__  s"    G$&!yyBU;DHrY   c                 R    | j                  |      }t        j                  |       |S rk   )r  r1  r  r]   s     rX   r_   zjDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn.<locals>.ModelWithComm.forward"  s     AOOA&HrY   r`   rb   rg   s   @rX   ModelWithCommr    s    <rY   r  rE   rJ   rj  r   r   r  r  r  Tr  rH   )rS   rN   r   rn  r  r  r  r  r  r  r  rT   r  rP  r!   r  r  rR  rs  r  r)  r  r  r  )rV   r  model_bn
comm_modelmodel_inputr   r  rB  r:  r  ru  r  s               rX   -test_ddp_uneven_inputs_stop_iteration_sync_bnzKDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn  s     JJ!!$)),H''>>h'd499o  '--dii8J++b!,11%**2K2K2MNK"J/ 5))AA $		{ B  !"99> -I$($:$:$dii[8M&N%M
 !. 1I$($:$:$K%M " #tD #!&y!1 #A#(#5#9#9#;D MMO ,,U3FA	###   M2--e4=5*# ## #s%   H8'AH,5H8,H51H88I	c                 z	   d}d}t        j                  t        j                  ddd      t        j                         t        j                  ddd      t        j                         t        j                  ddd      t        j                               }t        j                  ||d      }t               } G d	 d
t         j                        } |d      } |d      }| j                  }	t        d|t        j                  |d|	      d      t        d|t        j                  |||||	      d      t        d|t        j                  |||	      d      t        d|t        j                  |d|	      |	fd      t        d|t        j                  |d|	      |	fd      g}
t        d|t        j                  d t        j                  |||	      d      t        d|t        j                  t        j                  d dddd      t        j                  |||	      d      g}|
j!                  |       t"        rRt$        j&                  j)                         }|
j+                  t        d|t        j                  dddd      d             g }t-        |
      D ]D  \  }}|j+                  t        |j.                  |j0                  |j2                  |dz                F g }|
D ]I  }|j+                  t        |j.                  |j0                  |j2                  |j4                  d             K |
j!                  |       |
j!                  |       ddg}g d}dg}t7        j8                         dkD  r|j+                  d       g }|D ]  }|D ]  }|D ]  }t:        j=                  t?        d|      |      }|dkD  r+|jA                         D ]  }	|	dkD  s	||	xx   |dz  z  cc<    |jC                  t:        j=                  t?        |t7        j8                               ||z                |j+                  |          tE        jF                  |
|      D ]\  \  }}| j                  dk(  r(tI        d|j.                   d|j4                   d|        | jK                  ||d |j.                  v !       ^ y )"Nr   rH   r  r          FrF   c                   *     e Zd Z fdZd Zd Z xZS )ODistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModulec                 l    t         |           t               | _        t               | _        || _        y rk   )rL   rM   r   t0rv  unused_params_rank)rV   r  rW   s     rX   rM   zXDistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModule.__init__d  s(    G$&"fDG"fDG.@D+rY   c                 Z    | j                   j                  | j                  j                  fS rk   )r  r   rv  r  s    rX   task_parametersz_DistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModule.task_parametersj  s     GGIItwwyy11rY   c                     || j                   k7  r | j                  | j                  |            S | j                  |      S rk   )r  rv  r  )rV   r^   r  s      rX   r_   zWDistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModule.forwardm  s?      4#:#:: 
+ "WWQZrY   )rc   rd   re   rM   r  r_   rf   rg   s   @rX   UnusedParamModuler  c  s    A2rY   r  r   batch_norm_netrJ   r  )r   r   r   r   large_conv_modelsmall_model&unjoined_rank_with_unused_params_model$joined_rank_with_unused_params_modelsmall_model_allreduce_hook)r   r   r   r   r   r   small_model_power_sgd_hook)rm  r'  r(  r)  use_error_feedbackresnet_modelrz   T)r   r   r   r   r   )rJ   rz   rE   zRunning test: z' sync interval
                        z0 with iteration mapping
                        unused_params_model)r  )&rN   rq  rr  r   rO   r   r   r  r   rS   r   r  r  r  r   r*  extendr  r  r  r  r  r  r   r   r   r   r1  r   r  fromkeysrs  r  r{  r/  r  r   r  )rV   r   r  large_modelr  bn_netr  r  r  r  r  models_with_hookr  models_with_syncr  
test_inputthrow_on_early_term_testsbaseline_num_itersiteration_offsetsnum_uneven_ranksiteration_mappingsnum_early_join_ranksbaseline_iterrA  mappingr  r  s                              rX   r+  z4DistributedTest._DistTestBase.test_ddp_uneven_inputsO  s    CE--		!R#			"b!$			"c1%	K ))C59K!^FBII " 6Gq5I23DQ3G099D #) 

5!D9"#	 #+%

5%c$G"#	 #&%

5#d;"#	 #A@E1T:DA"#	 #?>E1T:DA"#	9"NL #5% //

5#d;"# #5%!//"00&*23 -.#(+0 

5#d;"# 4 !!"23 *11::<%%&+*!JJq!T48&'	  "!*>!: : ''&'__(..&NN&'!e	 )+%, 	
)00&'__(..&NN&0&>&>37	 !!"23!!";< #$Q * !s""$q( ''*!#
 )9 ;$%7 ;M"3 ;"&--a9M0NP]"^ 0!3(/ A#'!8$+DMVq[$@MA   MM%0DdFYFYF[*\^knt^tu +11':;;;  3<2C2C 23 .- 99>*9>>*: ;"001 2*+/
 ,,%(=(O - rY   c                    t        j                  | j                         t         j                  j                  j                  t         j                  j                  dd      j                  | j                        | j                  g      }t        j                  d      | j                  z  }d}t        j                         }|j                  d      5  t        |      D ]  }|j                  j                  j                  }|!|j!                  d       |j#                           ||      }|j%                         }|j'                          t%        d t        |      D              |z  }	| j)                  |j                  j                  j                  j+                         |	        	 d d d        |j,                  }
| j/                  |
j0                         | j3                  |       y # 1 sw Y   BxY w)NrH   rj  r   F)enablec              3       K   | ]  }|  y wrk   ry   r|  s     rX   r  zSDistributedTest._DistTestBase.test_ddp_uneven_input_join_disable.<locals>.<genexpr>&  r}  r~  )rS   r  r  rN   rP  r!   rO   rn  r   r1  r   rR  rs  r   r  r  r  r  r  r)  r  r  _join_configrH  r  r  )rV   r  r   r  rK  ru  r  r  r  r  join_configs              rX   "test_ddp_uneven_input_join_disablez@DistributedTest._DistTestBase.test_ddp_uneven_input_join_disable  s    dii(((##;;1%**4995499+ < C **Q-$))+CG,,.J' Sw SA::,,11D'++E2

c(C779DMMO %('E53D'E$E
$RM$$SZZ%6%6%;%;%@%@%BMRSS **K[//0))#.#S Ss   CG11G:c                    d G fddt         j                        } |       }t        j                   j                  j	                  |j                  | j                        | j                  g      }t        j                  d      }| j                  t              5  |j                         5   ||      }|j                         }|j                          d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)NzIntentional errorc                   ,     e Zd Zd fdZfdZ xZS )VDistributedTest._DistTestBase.test_ddp_uneven_input_exception.<locals>.ExceptionModulec                     t         |           t        j                  t	        j
                  dd            | _        y )NrH   Tr   )rL   rM   rN   r   rS   r   r  rU   s    rX   rM   z_DistributedTest._DistTestBase.test_ddp_uneven_input_exception.<locals>.ExceptionModule.__init__8  s)    G$&!#ejj$.O!PDJrY   c                     t              rk   )r  )rV   ru  	error_strs     rX   r_   z^DistributedTest._DistTestBase.test_ddp_uneven_input_exception.<locals>.ExceptionModule.forward<  s    $Y//rY   r`   rb   )rW   r  s   @rX   ExceptionModuler  7  s    Q0 0rY   r  rj  rH   )rN   r   rS   rP  r!   rn  r  r   r  r  rR  r  r)  )rV   r  exception_moduler  r   r  r  r  s          @rX   test_ddp_uneven_input_exceptionz=DistributedTest._DistTestBase.test_ddp_uneven_input_exception-  s     ,I0")) 0  /0((##;; %%dii0dii[ < C **Q-C''
I> $XXZ $c(C779DMMO$$ $$ $$ $s$    C71)C+C7+C4	0C77D c           	         t         j                         }| j                  dz   t        | j                        z  }t
        j                  d   }|dk(  rt        j                  j                  |       d}|dk(  r0|j                  t        t        j                  ddd                   t        r.|j                  t        t        j                  dd                   | j                  |k(  r|n|D cg c]  }d  c}}|dk7  rq|d   g}| j                  |k7  r| j                  |d   |d          t        j                   |d|t        j"                  d      	       | j%                  |d   |d          |dk7  rt        j                  j'                         t        | j                        k(  rq|d   g}| j                  |k7  r| j                  |d   |d          t        j                   |d|t        j"                  |      	       | j%                  |d   |d          |dk(  rt        j                  j'                         t        | j                        k(  rq|d   g}| j                  |k7  r| j                  |d   |d          t        j                   |d|t        j"                  |      	       | j%                  |d   |d          |d   g}| j                  |k7  r| j                  |d   |d          t        j                   |d|
       | j%                  |d   |d          | j                  |k7  r| j                  ||       t        j                   |d|
       | j%                  ||       y c c}w )NrH   r   r  r   rz   r  i
cpu)r  r  r  )r  r  )rc  r  r  r   rK  r   rJ  rS   rn  r  r  ri   rT   r;   r  r1  rd  r  r  r  )	rV   r  rd  re  r=  src_rankru  objectssingle_obj_lists	            rX   _test_broadcast_object_listz9DistributedTest._DistTestBase._test_broadcast_object_listJ  s   9>>@N Q#doo*>>Ijj+G& 

%%i0H& %%c%++a1*E&FG %%c%++a*C&DE 99( $23qd3  & #*1:,99(''(:N1<MN**#%U@S   !3^A5FG
 & UZZ%<%<%>#dooBV%V#*1:,99(''(:N1<MN**#%Y@W   !3^A5FG & UZZ%<%<%>#dooBV%V#*1:,99(''(:N1<MN**#%Y@W   !3^A5FG  'qzlOyyH$##OA$6q8IJ&&AUK_Q/1BC yyH$##G^<&&wAUCWn5] 4s   1	M0r5  zCTest is failing, see https://github.com/pytorch/pytorch/pull/113620c                 "    | j                         S rk   )r$  r  s    rX   test_broadcast_object_listz8DistributedTest._DistTestBase.test_broadcast_object_list  s     3355rY   c                     t               }t        j                  |      }t        j                  |      }| j	                  |      S r}  )r   r1  r  r  r$  rn  s       rX   $_test_broadcast_object_list_subgroupzBDistributedTest._DistTestBase._test_broadcast_object_list_subgroup  s;     )*G&&w/G~~g6H33H==rY   c                 @    G d dt         j                        }| j                  }t        j                  ddgddg      D ]  \  }} || j                        j                         j                  |      j                  j                  dt        j                  d| j                  z   | j                               t        j                  j                               }t        fdj                         D              }j                  j                         D 	cg c]  \  }}	| d	|  }
}}	j                  j!                         D 	cg c]  \  }}	| d	|  }}}	t        j                   j"                  j$                  j'                  |
|z          t        j                   j"                  j%                  |g|||
      }t        j(                  ddd      j                  |      |j*                  _        t-        j.                  |j*                        j1                  | j                        }t        j2                  dt        j
                        j                  |      | j                  dz   z  }t5        d      D ]Y  }	 ||      j7                         j9                           ||      j7                         j9                          t;        |j*                  j                  j                         |j                  j                               D ]+  \  }}| j=                  |j>                  |j>                         - t;        |j*                  j@                  j                         |j@                  j                               D ]-  \  }}| jC                  |j>                  |j>                  k(         / |D ]  }| jE                  |j>                  d u        ! \ t        j0                  jG                  | j                          y c c}	}w c c}	}w )Nc                   $     e Zd Z fdZd Z xZS )LDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.TestModelc                     || _         t        | 	          t        j                  ddd      | _        | j                   dk(  rt        j                  ddd      | _        y t        j                  ddd      | _        y )NrH   FrF   r   rE   )r  rL   rM   rN   rO   r   r   rV   r  rW   s     rX   rM   zUDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.TestModel.__init__  s^     $DIG$&!yyAE:DH yyA~#%99Q#?#%99R%#@rY   c                 J    | j                  |      }| j                  |      }|S rk   r   r]   s     rX   r_   zTDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.TestModel.forward  s!    AAHrY   rb   rg   s   @rX   	TestModelr+    s    	ArY   r/  FTignore_bufferr   r  c              3   D   K   | ]  \  }}|j                   u r|  y wrk   )r   )r  r  r   r   s      rX   r  zLDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.<genexpr>  s)      &+V*  &s    r#  )rL  r  r  rN  rH   rF   rj  ru  )$rN   r   r  r/  r  ra  rk  r   rR   rS   r  r  rs  r  named_modulesr  r  rP  r!   r  rO   r   r  r  rn  r   rs  r  r)  r  r  r  r   rH  r)  r  )rV   rN  r/  rp  find_unusedr  proxy_paramsmodel_fc2_name
param_nameru  proxy_param_namesbuf_nameproxy_buffer_namesr_  r  r   materialized_paramlocal_paramsynced_paramproxy_paramr   s                       @rX   _test_ddp_ignore_params_argz9DistributedTest._DistTestBase._test_ddp_ignore_params_arg  s~   BII " 		I4=4E4Et}5 B90/ "$)),22477	B 		))#U[[TYYtyy%Q  $EII$8$8$:;!% &/4/B/B/D& " */)C)C)E%%
A &&a
|4%! % (-yy'>'>'@&#! &&az2&" & !!99ee,/AA hh''?? ){+6&7!- @  "$1ae!<!?!?	!J

 #mmCJJ7<<TYYGjj%++699)D		TUVq BAHLLN++-$((*335 <?

113[__5O5O5Q< T7*K (();)@)@+BRBRST 69

113[__5O5O5Q6 P1k (():):k>N>N)NOP (4 B(8(8D(@AB%B. 

&&dii&8EB9%&s   !PPc                 L    | j                  d       | j                  d       y )NFrN  T)r>  r  s    rX   test_ddp_ignore_params_argz8DistributedTest._DistTestBase.test_ddp_ignore_params_arg  s&     ,,%,@,,$,?rY   c           
          G d dt         j                        }t        j                   j                  j	                   |       j                  | j                        | j                  g      }t        d      D ]y  }t        j                  dd      }|dkD  r8	  ||      j                         j                          | j                  dd	       V ||      j                         j                          { t%        j6                          y # t        $ r}t        |      }t        ||       t        t         t"        g}t%        j&                         t$        j(                  j*                  k(  r|j-                  t.               n3d
j1                  dg      }|j-                  d| j                   d|        |D ]  }	| j3                  |	|v d|	 d|         | j                  t4        |v        Y d }~xd }~ww xY w)Nc                   &     e Zd Zd fdZd Z xZS )`DistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception.<locals>.ToyModelc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        y r  rL   rM   rN   rO   rg  rh  rU   s    rX   rM   ziDistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception.<locals>.ToyModel.__init__  s6    G$& "		"bu =DI "		"bu =DIrY   c                 $    | j                  |      S rk   )rg  r]   s     rX   r_   zhDistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception.<locals>.ToyModel.forward  s    99Q<'rY   r`   rb   rg   s   @rX   ri  rD    s    >
(rY   ri  rj  rJ   rH   rE   r   Tz'DDP unused parameters error not raised.r  znet2.weightdid not receive grad for rank : 	Expected 
 to be in )rN   r   rS   rP  r!   rn  r  rs  r  r  r)  rH  r  r   r5   !ddp_prev_reduction_unfinished_str$ddp_recommend_find_unused_params_str ddp_outputs_not_used_in_loss_strr1  r3  r4  ri  r  ddp_suggest_debug_mode_strrR  r)  "ddp_find_unused_params_enabled_strr  )
rV   ri  r_  r  r   r   r  expected_strsunreduced_paramsss
             rX   0test_ddp_unused_params_rebuild_buckets_exceptionzNDistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception  s   (299 ( ((##;;
		*		{ < C 1X !.jjB'q5C//1, (( "K HLLN++-C!.F LLN7 ( T!!f/S9=@<)  //1T__5H5HH)001KL/3yy-/I,)00"@2N^M_ ` "/ VA OOAH	!Jse6TUV(()Ks)RSS'Ts   $D

	G6CG11G6c                 .    G d dt         j                        }t        j                  j	                  | j
                          |       j                  t        j                  j                               }dD ]  }t        j                   j                  j                  t        j                  |      | j
                  gd|      }t        j                  dd| j
                        }t        d	      D ]  } ||      }|dz  }|j                          !  y )
Nc                   &     e Zd Zd fdZd Z xZS )VDistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params.<locals>.ToyModelc                 "   t         |           t        j                  ddd      | _        t        j
                  t        j                  d            | _        | j                  | j                  _        t        j                  dd      | _	        y )NrE   r   FrF   )
rL   rM   rN   rO   rg  r   rS   r  rG   rh  rU   s    rX   rM   z_DistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params.<locals>.ToyModel.__init__=  s]    G$& "		"ae <DI "U[[^ <DI &*YYDIIN "		"a 0DIrY   c                 @    | j                  |      j                         S rk   )rh  r  r]   s     rX   r_   z^DistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params.<locals>.ToyModel.forwardH  s    99Q<++--rY   r`   rb   rg   s   @rX   ri  rW  <  s    	1.rY   ri  r  T)rL  r  rN  r  rE   r  ru  )rN   r   rS   rn  r  r  rk  r  rP  r!   r  r  rT   rs  r)  )rV   ri  r   staticrx  r   ru  r  s           rX   &test_ddp_shared_grad_acc_unused_paramszDDistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params7  s    
.299 . JJ!!$)),JMM%**";";"=>E' $!HH--EEMM%( $		{+/!'	 F 	 kk"b;q $A$S>DBJDMMO	$$rY   c                     d G d d      } fd} fd} fd} fd}||t         |t        |t        |t        |t        |i G  fdd	t
        j                  j                        }t
        j                  j                  j                   |       j                   j                         j                  g
      fd}t        d t              D              } ||t                t              D 	cg c]  }	t        j                  dd       }}	 ||t                |t        j                  dd            } |||       d}
d}t        j                  |
|      }t        j                  |
|      }t        ||      } ||t        |             t        ||      } ||t        |             t         d   |t         d   |i} ||t        |             y c c}	w )NrJ   c                       e Zd ZddgZd Zy)DDistributedTest._DistTestBase.test_ddp_device.<locals>.TensorWrapperr   moved_to_gpuc                      || _         d| _        y r  )r   r_  )rV   r   s     rX   rM   zMDistributedTest._DistTestBase.test_ddp_device.<locals>.TensorWrapper.__init__c  s    DF(-D%rY   N)rc   rd   re   	__slots__rM   ry   rY   rX   TensorWrapperr^  `  s     .1	.rY   rb  c           	      &   j                  t        |              j                  dt        | D ch c]  }|j                   c}             j                  | d   j                  j                  j
                         | d   | d   z   S c c}w NrH   r   )r)  r  r  r  indexr  )r^   r   expected_lenrV   s     rX   tuple_and_list_validatorzODistributedTest._DistTestBase.test_ddp_device.<locals>.tuple_and_list_validatorj  sr    A5  C1(=a(=$>?  1!2!2DII>tad{" )>s   Bc                    j                  | j                  t               j                  | j                  j                  j
                  | j                  j                  j
                         j                  | j                  j                  j
                  j                         | j                  | j                  z   S rk   )r  _fieldsEXPECTED_FIELDSrP   r  re  rQ   r  r^   rV   s    rX   namedtuple_validatorzKDistributedTest._DistTestBase.test_ddp_device.<locals>.namedtuple_validatorp  sw      O<  !1!1133::3C3CD  !1!1499=ssQSSy rY   c                     j                  | j                  xs" t        | j                  j                        dk(         | j                  j                  j                        | _        d| _        | j                  S )Nr   T)r)  r_  r   r   r  rk  r  rk  s    rX   custom_type_validatorzLDistributedTest._DistTestBase.test_ddp_device.<locals>.custom_type_validatorv  sQ     L3qsszz?e3KMccffTYY'!%ss
rY   c           	         j                  t        d   | j                         v        j                  t        d   | j                         v        j                  dt	        | j                         D ch c]  }|j                   c}             j                  | t        d      j                  j                  j                         | t        d      | t        d      z   S c c}w rN  )	r)  rj  r  r  r  rN  r  re  r  )r^   r   rV   s     rX   dict_validatorzEDistributedTest._DistTestBase.test_ddp_device.<locals>.dict_validator|  s     2affh >? 2affh >?  C188:(Fa(F$GH  ?1#5!6!=!=!C!CTYYO+,q1C/DDD )Gs   5C&c                   ,     e Zd Z fdZfdZ xZS )?DistributedTest._DistTestBase.test_ddp_device.<locals>.ToyModelc                 \    t         |           t        j                  ddd      | _        y r  r  r  rW   s    rX   rM   zHDistributedTest._DistTestBase.test_ddp_device.<locals>.ToyModel.__init__  s"    G$& "		"bu =EIrY   c                 r    j                  t        ||              |   |      }| j                  |      S rk   )r)  rn   r  )r  r^   expected_type
fwd_tensorrV   
validatorss       rX   r_   zGDistributedTest._DistTestBase.test_ddp_device.<locals>.ToyModel.forward  s8     OOJq-$@A!:M!:1!=J 99Z00rY   rb   )rW   rV   rx  s   @rX   ri  rr    s    >1 1rY   ri  rj  c                 t    t        d      D ])  } | |      }|j                         j                          + y )Nr{   )rs  r  r)  )r   
input_typeru  r  r   s       rX   
train_iterzADistributedTest._DistTestBase.test_ddp_device.<locals>.train_iter  s4    q )AZ0CGGI&&()rY   c              3   H   K   | ]  }t        j                  d d         yw)rE   N)rS   rT   )r  ru  s     rX   r  z@DistributedTest._DistTestBase.test_ddp_device.<locals>.<genexpr>  s     IB+Is    "rE   r   r   rH   )r   r  TestNamedTupleInput_0r   r  rS   rN   r   rP  r!   rk  r  rs  rT   r  typerj  )rV   rb  rg  rl  rn  rp  ri  r{  r   ru  r  r   rP   rQ   rf  r   rx  s   `             @@@rX   test_ddp_devicez-DistributedTest._DistTestBase.test_ddp_device[  s    L. .#!E 4/.%';%';nJ
1588?? 
1 HH%%==
dii(dii[ > E) IU<5HIICsE" 16l0CD15;;r2&DCDsD!  B 34CsM* EC

5#&A

5#&A'1-CsDI&'1-CsDI&  "A"AC sDI&1 Es   <G c                     d}d}t        j                  || j                        t        j                  || j                         G  fddt         j                  j                        }t         j                  j
                  j                   |       j                   j                         j                  g      }t              } ||t        |             t              } ||t        |             y )Nr   rE   r  c                   .     e Zd Z fdZfdZ xZS )KDistributedTest._DistTestBase.test_ddp_namedtuple.<locals>.NamedTupleModulec                 X    t         |           t        j                  dd      | _        y )NrE   rH   r  rt  s    rX   rM   zTDistributedTest._DistTestBase.test_ddp_namedtuple.<locals>.NamedTupleModule.__init__  s     G$& "		"a 0EIrY   c           	      |   j                  t        ||      d| dt        |              j                  |j                  t
               j                  |j                         j                  |j                         | j                  t        j                  |j                  |j                              S )NzExpected type r  )r)  rn   r~  r  ri  rj  rP   rQ   r  rS   r  )r  r  rv  rP   rQ   rV   s      rX   r_   zSDistributedTest._DistTestBase.test_ddp_namedtuple.<locals>.NamedTupleModule.forward  s    OO"5-8(yeN $$U]]OD$$Q0$$Q0 99UYYuww%@AArY   rb   )rW   rP   rQ   rV   s   @rX   NamedTupleModuler    s    1	B 	BrY   r  rj  )rS   r  r  rN   r   rP  r!   rn  r}  r~  r   )rV   r  r   r  r   r   rP   rQ   s   `     @@rX   test_ddp_namedtuplez1DistributedTest._DistTestBase.test_ddp_namedtuple  s     EC

5#dii8A

5#dii8AB B588?? B  HH%%== "''		2		{ > E (1-C#tCy!'1-C#tCy!rY   c                    | j                         \  }}}t        j                         }|dk  r| j                  d        G d dt        j
                        }t        j                  j                  j                   ||      d      }t	        j                         }d}t        |      D ]  }	|	|dz
  k  r|j                  }
nt        }
 |
       5  t        j                  d      }|j                  |      }t        j                  d      } |||      }|j                          d d d         | j!                  t#        d	 |j%                         D               d
       t        j&                  |j%                         D cg c]  }|j(                  j+                  d       c}      }|dk(  r,t        |      D cg c]  }t        j,                  |       }}ng }t        j.                  ||d       |dk(  r4|dd  D ]+  }| j!                  t        j0                  |d   |      d       - y y # 1 sw Y   xY wc c}w c c}w )NrJ   z&This test requires at least two ranks.c                   $     e Zd Z fdZd Z xZS )gDistributedTest._DistTestBase.test_grads_same_across_ranks_with_no_sync.<locals>.SimpleConditionalModelc                    t         |           || _        t        j                  dd      | _        t        j                  dd      | _        t        j                  dd      | _        t        j                  dd      | _        d| _	        y rd  )
rL   rM   r  rN   rO   nn1nn2nn3nn4r   r-  s     rX   rM   zpDistributedTest._DistTestBase.test_grads_same_across_ranks_with_no_sync.<locals>.SimpleConditionalModel.__init__  s`    G$& $DI!yyADH!yyADH!yyADH!yyADH!"DJrY   c                     | j                   dk(  r8d| _         | j                  dk(  r| j                  |      S | j                  |      S d| _         | j                  dk(  r| j	                  |      S | j                  |      S rN  )r   r  r  r  r  r  rV   r  s     rX   r_   zoDistributedTest._DistTestBase.test_grads_same_across_ranks_with_no_sync.<locals>.SimpleConditionalModel.forward  si    zzQ%&
99>#'88E?2#'88E?2%&
99>#'88E?2#'88E?2rY   rb   rg   s   @rX   SimpleConditionalModelr    s    #3rY   r  Tr  rH   )rH   c              3   8   K   | ]  }|j                   d u   y wrk   )r  )r  r   s     rX   r  zZDistributedTest._DistTestBase.test_grads_same_across_ranks_with_no_sync.<locals>.<genexpr>  s     C1$Cs   z0Gradients can't be None for any model parameter.r   r   )rt  r  z)Gradients are not the same for all ranks.)r  r1  r   skipTestrN   r   rS   rP  r!   r  rs  rA  r   r  r_   r)  r)  ry  rs  r.  r  r  r   ru  allclose)rV   rZ  r[  r  rK  r  r   r>  grad_accumulationmicrobatch_idxr  r  rc  r#  r  r   gradsru  gathered_gradsr  s                       rX   )test_grads_same_across_ranks_with_no_synczGDistributedTest._DistTestBase.test_grads_same_across_ranks_with_no_sync  s   &*&<&<&>#FIt,,.JA~FG3 38 HH%%==&t,T > E zz|H !"'(9": $!$5$99#mmG)GY $!JJu-E"]]51F"ZZ.F#FF3DMMO$ $$ OOC0@0@0BCCCB II8H8H8JK1qvv{{2KLE qyCHCT!Ua%"2"25"9!U!U!#KK>qAqy'+ AOO~a'8!<C +$ $ L "Vs   AH. "H;;I .H8	c           
      L   d}d}t        j                         }t        j                  j	                  | j
                         t        j                  j                  j                  t               j                  | j
                        | j
                  gd      }t        j                  ||| j
                        }t        j                  ||| j
                        }t        d      D ]  }|dz  dk(  r	 ||      }n ||      }|j                         }	|	j                          |j                  j!                         }
|dz  dk(  r3t        j"                  |dg| j
                  t        j$                  	      }n2t        j"                  ||g| j
                  t        j$                  	      }|
}| j'                  ||        t        j                  j                  j                  t               j                  | j
                        | j
                  gd
      }t        d      D ]g  }|dk(  r' ||      j                         }	|	j                          /	  ||      j                         }	|	j                          | j)                  dd       i t        jF                          y # t*        $ r}t-        |      }t/        ||       d}t0        t2        t4        d| j
                   d| g}t        j6                         t         j8                  j:                  k(  r|j=                  t>               n3djA                  dg      }|j=                  d| j
                   d|        |D ]  }| jC                  ||v d| d|         | j)                  tD        |v        Y d }~yd }~ww xY w)Nr  rE   Tr  r  ru  rJ   r   r  rk  FDDP error not raisedrH   6Parameter indices which did not receive grad for rank rI  r  lin2.weightrH  rJ  rK  )$r1  r   rS   rn  r  r  rN   rP  r!   r  rT   r   rs  r  r)  reducer_get_local_used_mapr   int32r  rH  r  r   r5   rL  rM  rN  r3  r4  ri  r  rO  rR  r)  rP  r  )rV   r  r   rK  r   random_input
ones_inputr  r  r  local_used_maprp  variable_usage_tensorr   r  unused_param_indexrQ  rR  rS  s                      rX   'test_ddp_control_flow_same_across_rankszEDistributedTest._DistTestBase.test_ddp_control_flow_same_across_ranks0  s%   
 EC,,.JJJ!!$)),HH%%==#%**4995 II;'+ > E
 !;;uc$))DLE3tyyAJ1X Bq5A:-C
+Cwwy "'!B!B!Dq5A:$||#Q		 H  %||#Z0%++ H
 )7%  !6A+B2 HH%%==#%**4995 II;', > E
 1X  G6 .224DMMOG$\26682 ((/EFA GD LLN5 ( T!!f/s;-.*=@<TUYU^U^T__abtauv	)  //1T__5H5HH)001KL/3yy-/I,)00"@2N^M_ ` "/ VA OOAH	!Jse6TUV(()Ks)RSS-Ts   &J$$	N#-C+NN#c                    t         j                  j                  | j                         t         j                  j
                  j                  t               j                  | j                        | j                  gd      }t        j                  dd| j                        }t        j                  dd| j                        }d}| j                  t        |      5  t        d      D ];  }|dz  dk(  r	 ||      }n ||      }|j                         }|j                          = 	 d d d        t        ||       | j                  t        d	      5  t        d      D ];  }|dz  dk7  r	 ||      }n ||      }|j                         }|j                          = 	 d d d        t        |d
       y # 1 sw Y   xY w# 1 sw Y   "xY w)NTrL  rN  r  rE   r  z1Your training graph has changed in this iterationrJ   r   af  Expected to have finished reduction in the prior iteration before starting a new one. This error indicates that your training graph has changed in this iteration, e.g., one parameter is used in first iteration, but then got unused in the second iteration. this is not compatible with static_graph set to True.
Parameter indices which did not receive grad forz#Expected to have finished reduction)rS   rn  r  r  rN   rP  r!   r  rT   r   r  r  rs  r  r)  r5   )rV   r   r  r  r.  r  r  r  s           rX   test_invalid_static_graphz7DistributedTest._DistTestBase.test_invalid_static_graph  s    JJ!!$)),HH%%==#%**4995 II;! > E
 !;;r2dii@LB499=J OL''lC $q $A1uz#L1#J/779DMMO$$ $E<8 ''C	 $ q $A1uz#L1#J/779DMMO$$$ $E+PQ?$ $$ $s   A
F3A
F?3F<?Gc           
         dd G fddt         j                        }t        j                         }t        j
                  j                  | j                         t        j                   j                  j                   || j                        j                  | j                        | j                  gd      }t	        j                  | j                        }t	        j                  | j                        }t        d      D ]  }|d	z  d
k(  r	 ||      }n ||      }|j                         }|j                          |j                  j!                         }	|d	z  d
k(  r3t	        j"                  |d
g| j                  t        j$                        }
n2t	        j"                  |dg| j                  t        j$                        }
|	}| j'                  ||
        t        j                   j                  j                   || j                        j                  | j                        | j                  gd      }t        d	      D ]g  }|d
k(  r' ||      j                         }|j                          /	  ||      j                         }|j                          | j)                  dd       i t        jF                          y # t*        $ r}t-        |      }t/        ||       d}t0        t2        t4        d| j                   d| g}t        j6                         t        j8                  j:                  k(  r|j=                  t>               n3djA                  dg      }|j=                  d| j                   d|        |D ]  }| jC                  ||v d| d|         | j)                  tD        |v        Y d }~yd }~ww xY w)Nr  rE   c                   ,     e Zd Z fdZfdZ xZS )\DistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks.<locals>.ToyModelc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        || _        y r  rL   rM   rN   rO   r  r  r  r-  s     rX   rM   zeDistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks.<locals>.ToyModel.__init__  =    G$& "		"bu =DI "		"bu =DI $DIrY   c                 D   t        j                  |t        j                  |j                              xr | j                  dk(  }|r3| j                  t        j                  | j                  |                  S t        j                  | j                  |            S )Nr  rH   )	rS   rp   r   r  r  r  r   r   r  )rV   r^   r  r  r   s      rX   r_   zdDistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks.<locals>.ToyModel.forward  su     Auzz%QXX'NO + IIN %
 (#yy		!)=>> vvdiil33rY   rb   )rW   r  r   s   @rX   ri  r    s    %4 4rY   ri  Tr  r  ru  rJ   r   r  rH   Fr  r  rI  r  r  rH  rJ  rK  )$rN   r   r1  r   rS   rn  r  r  rP  r!   rT   r   rs  r  r)  r  r  r   r  r  rH  r  r   r5   rL  rM  rN  r3  r4  ri  r  rO  rR  r)  rP  r  )rV   ri  rK  r   r  r  r  r  r  r  rp  r  r   r  r  rQ  rR  rS  r  r   s                     @@rX   ,test_ddp_control_flow_different_across_rankszJDistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks  sD   
 EC4299 4( ,,.JJJ!!$)),HH%%==#((3 II;'+ > E
 !;;uc$))DLE3tyyAJ1X Bq5A:-C
+Cwwy "'!B!B!Dq5A:$||#Q		 H  %||#Q		 H )7%   !6A/B6 HH%%==#((3 II;', > E
 1X G6 .224DMMOG$\26680 ((/EF?GB LLN3 ( T!!f/s;-.*=@<TUYU^U^T__abtauv	)  //1T__5H5HH)001KL/3yy-/I,)00"@2N^M_ ` "/ VA OOAH	!Jse6TUV(()Ks)RSS+Ts   &K	OC+OOc                 .   d}| j                   |k(  rt        nt        D cg c]  }d  c}}t        j                         }|d | }d}t	        |      |k  r(|j                  ||          |dz  }t	        |      |k  r(d g}t        j                  |||       | j                  |d   t        | j                   t	        t              z            | j                  t        d      5  t        j                  g ||       d d d        y c c}w # 1 sw Y   y xY w)Nr   rH   ra  zMExpected argument scatter_object_output_list to be a list of size at least 1.)
r  rc  r1  r   r  r  scatter_object_listr  r  r  )rV   r!  ru  r\  rK  r  output_obj_lists          rX   test_scatter_object_listz6DistributedTest._DistTestBase.test_scatter_object_list   s   H 99( -$@AqdA 
 ,,.J'4LAl#j0##LO4Q l#j0  $fO$$_lQ",II$@ AA ''_ I ((\xH	I I% B$I Is   	D$DDc           	      
   t        j                  dgt         j                        t        j                  dgt         j                        t        j                  dgt         j                        t        j                  dgt         j                        t        j                  dgt         j                        t        j                  dgt         j                        g}|D cg c]  }|j	                          }}|S c c}w )Nr   rj  r  )rS   rl  ra  rb  	to_sparse)rV   r  r   tensors_sparses       rX   3_generate_sparse_tensors_for_bucket_assignment_testzQDistributedTest._DistTestBase._generate_sparse_tensors_for_bucket_assignment_test8   s    RD4RD5RD4RD5RD4RD5G 6==akkm=N=!! >s   %D c                 D   t        j                  t        d      t         j                  j                        }dt
        j                  d<   t        j                  t        j                         t        d            }t        j                  j                  | j                         t        d      }t        j                  j                  j                  |j!                  | j                        | j                  g|	      }d
}| j#                  t$        |      5  | j'                         }|r$t        j(                  |dg|j*                         nt        j(                  |dg       d d d        |rt-        ||       t        j.                  |       y # 1 sw Y   -xY w)Nr*  r  r  r=  r  r  r   r=  r  r   r  zNo support for sparse tensors.i  )logger)r1  r  r	   r2  r3  r   rJ  r  rS   rn  r  r  r	  rN   rP  r!   rk  r  r  r  "_compute_bucket_assignment_by_sizer  r5   r  )rV   
use_loggerr  group_to_user  r.  r  s          rX   '_test_compute_bucket_assignment_by_sizezEDistributedTest._DistTestBase._test_compute_bucket_assignment_by_sizeE   sF   !"-t||7H7HJ
 69BJJ12>>((*Ia4HL JJ!!$)), .a0C((##;;tyy! II;* < C <L''lC LLN  ;;&cjj ;;& '\: LL$# s   AFFc                 (    | j                  d       y NFr  r  r  s    rX   Btest_compute_bucket_assignment_by_size_sparse_error_without_loggerz`DistributedTest._DistTestBase.test_compute_bucket_assignment_by_size_sparse_error_without_loggero   s     88E8JrY   c                 (    | j                  d       y NTr  r  r  s    rX   ?test_compute_bucket_assignment_by_size_sparse_error_with_loggerz]DistributedTest._DistTestBase.test_compute_bucket_assignment_by_size_sparse_error_with_loggert   s     88D8IrY   c                    |rd}| j                  t        |      }||fS t        j                         t        j                  j
                  k(  }| j                  dk(  rit        j                  |      t        j                  j                  k(  r|sd}| j                  t        |      }||fS d }| j                  t              }||fS d}| j                  t        |      }||fS )Nz'DDP expects same model across all ranksr   #caught collective operation timeoutzappears not to match)r  r  r1  r3  r4  r5  r  r  r2  r5  r#  )rV   r  r  r.  r  is_detail_dbg_modes         rX   2_determine_expected_error_verify_model_across_rankzPDistributedTest._DistTestBase._determine_expected_error_verify_model_across_ranky   s     H,,\<HL((!%!5!5!74??;Q;Q!QyyA~$$\2dll6G6GG.#HL00|LC $$ $(L++L9C $$  6,,\<H$$rY   c                 *   t        j                  t        d      t         j                  j                        }dt
        j                  d<   t        j                  t        j                         t        d            }t        j                  j                  | j                         | j                  |      \  }}t        d      }t        j                  j                  j!                  |j#                  | j                        | j                  g|	      }t        j$                  | j                  dk(  rd
ndd      |j&                  _        |5  |r9t+        |j,                  t/        |j1                               |j2                         n-t+        |j,                  t/        |j1                                      t        j4                  |       d d d        |r| j                  dk7  rt7        ||       t        j4                  |       y # 1 sw Y   <xY w)Nr*  r  r  r  r  r   r  r   r  rO  rE   rH   )r1  r  r	   r2  r3  r   rJ  r  rS   rn  r  r  r  r	  rN   rP  r!   rk  rO   r   r  r   rm  r  rs  r  r  r5   )rV   r  r  r  r  r.  r  s          rX   _test_verify_model_across_rankz<DistributedTest._DistTestBase._test_verify_model_across_rank   s   !"-t||7H7HJ
 69BJJ12>>((*Ia4HL JJ!!$)), $ W W!C
 .a0C((##;;tyy! II;* < C  YYdii1ns"aHCJJN  +8))40@+A3:: 9))40@+A
 \*+ dii1n'\: LL$)+ +s   A>H		Hc                 (    | j                  d       y r  r  r  s    rX   )test_verify_model_across_rank_with_loggerzGDistributedTest._DistTestBase.test_verify_model_across_rank_with_logger   s     //4/@rY   c                 (    | j                  d       y r  r  r  s    rX   ,test_verify_model_across_rank_without_loggerzJDistributedTest._DistTestBase.test_verify_model_across_rank_without_logger   s     //5/ArY   c                 &   |5  t         j                  j                  j                  |j	                  | j
                        | j
                  g|      }t        j                  |       d d d        t        j                  |       y # 1 sw Y   xY w)Nr  )rS   rN   rP  r!   rk  r  r1  r  )rV   r  r  	ddp_groupr  s        rX   $_run_test_ddp_model_with_diff_paramszBDistributedTest._DistTestBase._run_test_ddp_model_with_diff_params   sp     (hh''??FF499%499+Y @ 
 Y'( LL$( (s   A&BBc                    t        j                  t        d      t         j                  j                        }dt
        j                  d<   t        j                  t        j                         t        d            }t        j                  j                  | j                         | j                  |      \  }}t        | j                        }| j                  ||||       y )Nr*  r  r  r  r  rE   r  r1  r  r	   r2  r3  r   rJ  r  rS   rn  r  r  r  r	  r  rV   r  r  r  _expected_errr  s         rX   &test_ddp_model_diff_shape_across_rankszDDistributedTest._DistTestBase.test_ddp_model_diff_shape_across_ranks   s     !"-t||7H7HJ
 69BJJ12>>((*Ib4IL JJ!!$)),!%!X!X"C
 .dii8C55S,
rY   c                    t        j                  t        d      t         j                  j                        }dt
        j                  d<   t        j                  t        j                         t        d            }t        j                  j                  | j                         | j                  |d	      \  }}t        | j                  | j                  d
k(  	      }| j                  ||||       y )Nr*  r  r  r  r  rE   r  T)r  rH   r  r  s         rX   +test_ddp_model_diff_num_params_across_rankszIDistributedTest._DistTestBase.test_ddp_model_diff_num_params_across_ranks!  s     !"-t||7H7HJ
 69BJJ12>>((*Ib4IL JJ!!$)),!%!X!Xd "Y "C .		DIINC 55	rY   c                 
    |       }t        j                  |      }t        j                  j                  j                  t        j                  |      j                  | j                        | j                  gd      }t        j                  dd      }|t        k(  r ||      d   \  }} ||      d   \  }	}
n ||      \  }} ||      \  }	}
|
j                         }|j                          |t        k(  r| j                  |j                  j                  j                  j                  j                   d u        | j#                  |j                  j                  j                  j                  j                   |j                  j                  j                  j                          n| j                  |j                  j                  j                  j                   d u        | j#                  |j                  j                  j                  j                   |j                  j                  j                          d }d }|j%                          |j%                          t'        d      D ]  }|t        k(  r ||      d   \  }} ||      d   \  }	}
n ||      \  }} ||      \  }	}
|dk  r+||z  }|	|
z  }|j                         }|j                         }n |j                         }|
j                         }|j                          |j                          |dk(  r|t        k(  r_|j                  j                  j                  j                   }|j                  j                  j                  j                  j                   }nJ|j                  j                  j                   }|j                  j                  j                  j                   }| j#                  ||       n|dk\  r|t        k(  r| j#                  |j                  j                  j                  j                  j                   |       | j#                  |j                  j                  j                  j                   |       nj| j#                  |j                  j                  j                  j                   |       | j#                  |j                  j                  j                   |       t)        |j+                         |j+                               D ]/  \  }}|j                   }|j                   }| j#                  ||       1  t-        j.                          y )NTr  rE   r  ru  rJ   rH   )r  r  rS   rN   rP  r!   rn  r  rT   r   r  r)  r)  r   rP   r  r  r  rB  rs  r  rs  r1  r  )rV   
module_clsrM  r   	local_netr  r   rP   rQ   a_distb_dist	loss_distsaved_a_local_gradsaved_a_dist_gradr  r   t_distr  r;  
dist_param
local_grad	dist_grads                         rX   _test_output_unused_in_lossz9DistributedTest._DistTestBase._test_output_unused_in_loss&!  s-   LEe,I((##;;e$))$))4 II;'+ < C ++b"%C
 -- ~m41!$S-!8 ~1!$S

I  --

 1 1 3 3 : : ? ?4 GH  JJ%%''..33Y5E5E5G5G5N5N5S5S 

 3 3 8 8D @A  !4!4!9!99;;;M;M;R;RS!% $MMO!1X 1<!11$S>-8DAq%(Xm%<NFF$S>DAq%(XNFFq5 AA#f_F557D &

I 557D &

I""$6!%55-6-=-=-?-?-F-F-K-K*,/JJ,=,=,?,?,F,F,K,K)-6[[-?-?-D-D*,/JJLL,?,?,D,D)$$%79JK!V!%55((JJ--//66;;=N ((%,,..55::<N (()<)<)A)ACTU((););)@)@BTU 25((*CNN,<2 <-[* "-!1!1J *I$$Z;<Y1<f LLNrY   c                 B    t         }dD ]  }| j                  ||        y Nr  )r   r  rV   r  r  s      rX   'test_output_unused_in_loss_tuple_modulezEDistributedTest._DistTestBase.test_output_unused_in_loss_tuple_module!  s,     3J'4 R#00=PQRrY   c                 B    t         }dD ]  }| j                  ||        y r  )r   r  r  s      rX   &test_output_unused_in_loss_dict_modulezDDistributedTest._DistTestBase.test_output_unused_in_loss_dict_module!  s,     *J'4 R#00=PQRrY   c                 ^   t        j                  dd      j                  | j                        }t	               j                  | j                        }t        j                  |      }t         j                  j                  j                  || j                  gd      } ||      j                         } ||      j                         }t        j                  j                  j                         |      j                          t        j                  j                  j                         |      j                          t        |j!                         |j!                               D ]B  \  \  }}\  }}	|j"                  }
|	j"                  }| j%                  |
|d| d|
 d| d|        D y )	NrH   rJ   Tr  z
DDP param z with grad z0
                    does not match local param z with grad
                    )rS   r   rk  r  r   r  r  rN   rP  r!   r  _C
_functionsUndefinedGradr)  r  r  r  r  )rV   r^   r  r  r  	local_outdist_param_namer  local_param_namer;  r  r  s               rX   ,test_undefined_grad_parity_unused_parameterszJDistributedTest._DistTestBase.test_undefined_grad_parity_unused_parameters!  sn    

1a ##DII.A%((499%Cc*I((##;; II;'+ < C
 a&**,C!!((*IHH--/4==?HH--/	:CCERU$$&	(B(B(DS N-*/N0@+ 'OO	(--
  "?"3;yk J00@/A BL$rY   c                 0    G d dt         j                        }t        j                  d       t        j                  j                  | j                          || j                        j	                  | j                        }t        j                   j                  j                  || j                  g|d|      }t        j                  dd| j                        }t        d      D ]*  } ||      }|j                         }	|	j                          , |S )	Nc                   $     e Zd Z fdZd Z xZS )RDistributedTest._DistTestBase._test_different_graph_across_ranks.<locals>.ToyModelc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        || _        y r  r  r-  s     rX   rM   z[DistributedTest._DistTestBase._test_different_graph_across_ranks.<locals>.ToyModel.__init__!  r  rY   c                     | j                   dk(  r3| j                  t        j                  | j	                  |                  S t        j                  | j	                  |            S ra  )r  r  r   r   r  r]   s     rX   r_   zZDistributedTest._DistTestBase._test_different_graph_across_ranks.<locals>.ToyModel.forward!  sD    yyA~#yy		!)=>> vvdiil33rY   rb   rg   s   @rX   ri  r  !  s    %4rY   ri  rv  T)rL  r  rM  rN  r  rE   r  )rN   r   rS   r  rn  r  r  rP  r!   rT   rs  r  r)  )
rV   r  rN  ri  r   rx  r  ru  r  r  s
             rX   "_test_different_graph_across_ranksz@DistributedTest._DistTestBase._test_different_graph_across_ranks!  s    4299 4 e$JJ!!$)),TYY',,TYY7E))AA II;'=(,) B I !;;r2dii@L2Y  -wwy  rY   c                    | j                  d      }| j                  |j                         j                  dd             | j                  d      }| j	                  |j                         j                  dd             t        |j                         |j                               D ]  \  }}| j                  ||        y )NTr  r	  r   r@  )r	  rH  r  r   r)  r  rs  r  )rV   
base_modelstatic_modelr  r  s        rX   !test_different_graph_across_ranksz?DistributedTest._DistTestBase.test_different_graph_across_ranks!  s     @@'+ A J 002667LaP  BBPTBULOO224889NPQR J113\5L5L5NO '1  A&'rY   zUMacOS uses uv transport which does not have as robust error handling as tcp transportc                 6   t        j                  d      | j                  z  g}t        d      D ]*  }t	        j
                  t        j                  |             , t        d      }t	        j                  |       t        d      D ]*  }t	        j
                  t        j                  |             , t	        j                  |d       d}d}| j                  |k(  r:| j                  t        d	| d
      5  t	        j                  |       d d d        nV| j                  |k7  rGd	| j                   d| }| j                  t        |      5  t	        j                  |       d d d        | j                  d       y # 1 sw Y   xY w# 1 sw Y   (xY w)NrE   rJ   r  r@  Tr  wait_all_ranksrH   r   r    failed to pass monitoredBarrierb successfully reached monitoredBarrier, but received errors while waiting for send/recv from rank    )rS   r   r  rs  r1  r  r.  r	   monitored_barrierr  r  r  )rV   r  ru  r  failed_rankr!  	err_regexs          rX   test_monitored_barrier_glooz9DistributedTest._DistTestBase.test_monitored_barrier_gloo!  sj    zz"~		12G2Y 4		' 234  *G""732Y 4		' 234""74H KHyyH$++ E+6V"W < **7;< < k) DII; ' z# 
 ++L)D <**7;<
 MM"M%!< << <s   :FFFFc                 2   d}d}t        j                  ddg      }| j                  |k(  ry | j                  dk(  r:| j                  t        d| d      5  t        j
                  ||       d d d        y t        j
                  ||       y # 1 sw Y   y xY w)NrH   r  r   ro  r  r		  )r1  r  r  r  r  r	  )rV   r	  r  rE  s       rX   $test_monitored_barrier_gloo_subgroupzBDistributedTest._DistTestBase.test_monitored_barrier_gloo_subgroup"  s     KG~~QF3HyyK'yyA~++ E+6V"W > **8W=> > &&x9> >s   BBc           	         t        j                  t        t        t	        | j
                                    t        d      t         j                  j                        }t        j                  t        t        t	        | j
                                    t         j                  j                        }t        j                  d| j                        | j                  z  g}|j                  |      j                  t        d             | j                  dk7  r~t        j                         t         j                   j"                  k(  rd	}nd
}| j%                  t&        |      5  |j                  |      j                  t        d             d d d        n|rLdj)                  t        dt	        | j
                              D cg c]  }t+        |       c}      }d| d}nd}d| d}t        d      }	| j%                  t&        |      5  |j-                  |	|       d d d        | j/                  d       y # 1 sw Y   xY wc c}w # 1 sw Y   -xY w)Nr~  r  )ro  r  r=  rn  rE   r  r   r   zTimed out waitingr  r  r  rH   Ranks r		  r  r	  r	  r@  )r1  r  r  rs  r   rK  r	   r2  r5  r3  rS   r   r  rV  r  r3  r4  r5  r  r  rR  r   r	  r  )
rV   r	  nccl_pggloo_pgr  r	  r  rank_strexpected_first_fail_rank!monitored_barrier_timeout_secondss
             rX   &_test_monitored_barrier_allreduce_hangzDDistributedTest._DistTestBase._test_monitored_barrier_allreduce_hang+"  s   nn5T__!567 ""-))G nn5T__!567))G zz"TYY7$))CDG g&++Ia,@A yyA~
 '')T__-C-CC 3I EI++L)D L%%g.33Ic4JKL L
 "#yy).q#doo2F)GHAQH H #)
2R SI/0,"'(@'AAa bI4=c4J1++L)D --9. . 
 MM"M%'L L I s   &+IIII
Ic                 (    | j                  d       y )NFr	  )r	  r  s    rX   %test_monitored_barrier_allreduce_hangzCDistributedTest._DistTestBase.test_monitored_barrier_allreduce_hang`"  s     77u7MrY   c                 N    dt         j                  d<   | j                  d       y )Nr  TORCH_NCCL_DUMP_ON_TIMEOUTTr	  )r   rJ  r	  r  s    rX   4test_monitored_barrier_allreduce_hang_wait_all_rankszRDistributedTest._DistTestBase.test_monitored_barrier_allreduce_hang_wait_all_ranksh"  s&    
 8;BJJ34 77t7LrY   c           	      F   t        j                  t        t        t	        | j
                                          }t        d      }| j                  dk(  r?| j                  t        d| j                   d      5  |j                  |       d d d        y y # 1 sw Y   y xY w)Nr	  r   r  r  z timed out in monitoredBarrier)r1  r  r  rs  r   rK  r	   r  r  r  r	  )rV   rm  r  s      rX   *test_monitored_barrier_gloo_rank_0_timeoutzHDistributedTest._DistTestBase.test_monitored_barrier_gloo_rank_0_timeoutr"  s     !NNeC<P6Q1RSM*GyyA~++ E$))4R"S = "33G<= = = =s   ;BB c                    d}t        d      }d}| j                  |k(  r9| j                  t        d|       5  t	        j
                  |       d d d        y | j                  dk(  rHd| j                   d| }| j                  t        |      5  t	        j
                  |       d d d        y y # 1 sw Y   y xY w# 1 sw Y   y xY w)NrJ   r  r   r  r@  rH   r
	  )r	   r  r  r  r1  r	  )rV   expected_first_failed_rankr  r!  r	  s        rX   $test_monitored_barrier_failure_orderzBDistributedTest._DistTestBase.test_monitored_barrier_failure_order}"  s     *+&*GHyyH$++ E*D)E"F < **7;< < aDII; ' z# 
 ++L)D <**7;< <  	< << <s   B1B=1B:=Cc                 `   | j                   dk(  rt        d      }dj                  t        dt	        | j
                              D cg c]  }t        |       c}      }d| d}| j                  t        |      5  t        j                  |d	       d d d        y y c c}w # 1 sw Y   y xY w)
Nr   r  r  r  rH   r	  r		  Tr	  )r  r	   rR  rs  r   rK  r   r  r  r1  r	  )rV   r  r  r	  r	  s        rX   %test_monitored_barrier_wait_all_rankszCDistributedTest._DistTestBase.test_monitored_barrier_wait_all_ranks"  s    
 yyA~#C099eAs4???S6T%Uc!f%UV$XJ.NO	++L)D Q**74PQ Q	 %UQ Qs   B=B$$B-r  c                    t               }t        j                  j                  j	                  |j                  | j                        | j                  g      }ddd}|j                         \  }}|j                  |      }| j                  ||       t               }dg}t        j                  j                  j                  j                  ||       t        j                  j                  j	                  |j                  | j                        | j                  g      }ddi}|j                         \  }}|j                  |      }| j                  ||       t               }t        j                  j                  j	                  |j                  | j                        | j                  g      }|j                         \  }}| j                  dk(  rt        t        |d                |j                  t        j                  j                  t        j                  d            t        j                  j                  t        j                  d            g       | j!                  t"        d      5  |j                  |       d d d        |d d }| j!                  t"        d	      5  |j                  |       d d d        |j                  t        j                  j                  t        j                  d            t        j                  j                  t        j                  d            g       y # 1 sw Y   xY w# 1 sw Y   xY w)
Nrj  r  zb.weightr@  r   rH   zExpected param to name mappingrd  zParam with name)r  rS   rN   rP  r!   rn  r  _build_params_for_reducer"_build_debug_param_to_name_mappingassertDictEqualr  r   r~  r  r   r   r  r  )rV   r   r  expected_mapping
net_paramsru  param_to_name_mappingr  s           rX   *test_ddp_build_debug_param_to_name_mappingzHDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping"  s    #$E((##;;

499% II; < C $.*=99;MJ$'$J$J:$V!  !13HI #$E *|HH55aa' ((##;;

499% II; < C !":99;MJ$'$J$J:$V!  !13HI
 #$E((##;;

499% II; < C  99;MJyyA~d:a=)*HH&&uzz!}5HH&&uzz!}5 ''
4TU C66zBC $CRJ''
4EF C66zBC HH&&uzz!}5HH&&uzz!}5C CC Cs   :L=/M	=M	Mc                 V    G d dt         j                        } |       }t        j                   j                  j	                  |j                  | j                        | j                  g      }ddi}|j                         \  }}|j                  |      }| j                  ||       y )Nc                   &     e Zd Zd fdZd Z xZS )cDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad.<locals>.Netc                     t         |           t        j                  dd      | _        | j                  j
                  j                  d       y )NrE   F)rL   rM   rN   rO   r  rG   r  rU   s    rX   rM   zlDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad.<locals>.Net.__init__"  s8    G$&!yyR0DH HHMM007rY   c                 $    | j                  |      S rk   r  r]   s     rX   r_   zkDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad.<locals>.Net.forward"  s    88A;&rY   r`   rb   rg   s   @rX   r   r2	  "  s    8'rY   r   rj  r   z
lin.weight)
rN   r   rS   rP  r!   rn  r  r)	  r*	  r  )rV   r   r   r  r,	  r-	  ru  r.	  s           rX   8test_ddp_build_debug_param_to_name_mapping_requires_gradzVDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad"  s    	'bii 	' EE((##;;

499%499+ < C <   99;MJ$'$J$J:$V!24DErY   c           
         t        j                         t         j                  j                  k(  } G d dt        j
                         G fddt        j
                        } |       }g }|r|j                         D ][  \  }}||j                  j                  j                  k(  s*|j                  d      D ]  \  }}	| d| }
|j                  |
        ] t        j                  j                  j                  j                  ||       |j                  j                  j                   |j                  j                   j"                  g}nOt%        |j                  j                  j'                               |j                  j                   j"                  gz   }g }g }i }d}|j                         D ]  \  }}|j                  d      D ]g  \  }}	| d| }
|||
<   |
|vr|d	z  }||v r|j                  |
       1|r$||j                  j                  j                  k7  sW|j                  |
       i  t        j                  j                  j                  |j)                  | j*                        | j*                  g
      }d\  }}t        j,                  ||      }t/        d      D ]k  }|dk(  r) ||      }|j1                         }|j3                          1	  ||      }|j1                         }|j3                          | j5                  dd       m y # t6        $ r}t9        |      }||j;                  d      d  }|D ]B  }| j5                  ||v xs |       | j5                  t9        ||         |v d||    d|        D |D ]  }| j=                  ||v         |D ]  }| j=                  ||v         Y d }~d }~ww xY w)Nc                   &     e Zd Zd fdZd Z xZS )^DistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.SubModulec                     t         |           t        d      | _        t	               | _        t               | _        t        j                  ddd      | _
        y )Nr   r{   rE   FrF   )rL   rM   r	  embedding_netr  r  r   r   rN   rO   	lin_layerrU   s    rX   rM   zgDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.SubModule.__init__#  sB    G$&)DQ)GD&-/DH*nDG%'YYq"5%ADNrY   c                     | j                  |      }| j                  |      }| j                  j                  |      }|S rk   )r   r<	  r  rP   r]   s     rX   r_   zfDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.SubModule.forward#  s5    
Aq)A

1A HrY   r`   rb   rg   s   @rX   	SubModuler9	  #  s    BrY   r>	  c                   *     e Zd Zd fdZd Z xZS )\DistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.MyModelc                 :    t         |                   | _        y rk   )rL   rM   
sub_module)rV   r>	  rW   s    rX   rM   zeDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.MyModel.__init__#  s    G$&&/kDOrY   c                 $    | j                  |      S rk   )rB	  r]   s     rX   r_   zdDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.MyModel.forward#  s    ??1--rY   r`   rb   )rW   r>	  s   @rX   r  r@	  #  s    2.rY   r  F)recurser#  r   rH   rj  )rE   rJ   rJ   zExpected error was not raised!zdid not receive gradzDid not find index z for )r1  r3  r4  ri  rN   r   r2  rB	  r;	  r  r  r  rS   rP  r!   r  r  rQ   r  modulesrn  r  r   rs  r  r)  r)  r  r   findrH  )rV   ignore_sparsedebug_mode_offr  r   sparse_embedding_fqnsr  r   parameter_name_paramfqnunused_modulesexpected_unused_param_fqnsused_param_fqnsfqn_to_param_indexre  r  r  r   r   r  r  r  r   unused_param_substrunused_param_fqnused_param_fqnsparse_param_fqnr>	  s                               @rX   -_test_ddp_multiple_nested_unused_params_errorzKDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error #  s   !113t7J7JJNBII  .")) . IE$&!+0+>+>+@ >'K!1!1!?!?!I!II6<6M6MV[6M6\ >2NF%0M>2B"CC188=>> !!99ee0 $$2266$$((**"
 "&e&6&6&D&D&L&L&N!O$$((**S " *,& O!#E',':':'< 8#V.4.E.Ee.E.T 8*NF(M>*:;C.3&s+"77
/299#> !.%)9)9)G)G)Q)QQ+223788 ((##;;

499% II; < C JE3**UC(C1X 'Q6c(C779DMMO!Q!#h"wwy< /OPO'Q ( VF./7M0N0P.Q+ 1K 	, OO 04G G !2#1 !OO #$67G$H I#6!7"56HIY6Z5[[`aq`r s	 /> TN ,,^?R-RST 1F V, ,,-=AT-TUV5Vs   %(L""	O+BOOc                 (    | j                  d       y )NFrG	  rU	  r  s    rX   ,test_ddp_multiple_nested_unused_params_errorzJDistributedTest._DistTestBase.test_ddp_multiple_nested_unused_params_errorr#  s     >>U>SrY   c                 (    | j                  d       y )NTrW	  rX	  r  s    rX   8test_ddp_multiple_nested_unused_params_err_ignore_paramszVDistributedTest._DistTestBase.test_ddp_multiple_nested_unused_params_err_ignore_paramsx#  s     >>T>RrY   c                 L   | j                   }t        j                  j                  |       t	               j                         }t        j                  |      }t        j                  j                  j                  ||g      }t        j                  ddd      j                         }t        j                  |      }t        j                  j                  j                  ||g      }t        j                  dd|      }t        j                  dddd|      }|||f|||fg}|D ]f  }	|	\  }
}}| j                   d	k(  s|
j                          |j                          t        d
      D ]   }| j                   |
|       ||             " h | j                  d       y )Nrj  rJ   r   Fr!  track_running_statsrE   r  r{   r   ru  r	  r@  )r  rS   rn  r  r   r  r  rN   rP  r!   r  rT   evalrs  r  r  )rV   r  r   r  syncbn_modellocal_syncbn_modelr   
inp_syncbnr  test
test_modeltest_local_modeltest_inpru  s                 rX   test_ddp_inferencez0DistributedTest._DistTestBase.test_ddp_inference#  s    99DJJ!!$'EJJLE--.KHH%%== 6 > E ++Dedf  "&|!< 88,,DD$ E L ++b!D1CRAq>JS)1:>E  9=6
,h99>OO%$))+"1X ((&x02B82L MM"M%rY   c                    | j                   }t        j                  j                  |       t	        j
                  ddd      j                  |      }t        j                  j                  j                  ||g      }t        j                  j                  j                         5 }t        d      D ]Q  }t        j                  dddd      j                  |      } ||      }|j                         }|j                          S 	 d d d        t        d	k(  rt!        d
      }nt!        d      }| j#                  g |       |j$                  }	| j                   dk(  r|	j'                          t        j                  j                  j                         5 }t        d      D ]Q  }t        j                  dddd      j                  |      } |	|      }|j                         }|j                          S 	 d d d        t        d	k(  rt!        d
|      }nt!        d|      }| j)                  g |       y y # 1 sw Y   0xY w# 1 sw Y   LxY w)NrJ   r   Fr]	  rj  ru  rE   r{   r  _all_gather_baser  r   )r  rS   rn  r  rN   r  rP  r!   rz  r   r   rs  rT   r  r)  r   r   r  r   r_	  r  )
rV   r  r   r   ru  r   r  r  all_gather_callsmodel_inferences
             rX   !test_ddp_sync_bn_training_vs_evalz?DistributedTest._DistTestBase.test_ddp_sync_bn_training_vs_eval#  s    99DJJ!!$' $$Q5QVVE HH%%==eQUPV=WE((002 $dq $A++b!Q277=C*C779DMMO	$$ & #67I4#P #6|T#J $45 $llOyyA~$$&^^,,446 ($"1X (#kk"aA6;;DA-c2"wwy	(( f$':;Mt'T$':<'N$  %56 %$ $(( (s   'A H6A I6I Ic                 L   t               j                  | j                        }t        j                  j
                  j                  || j                  g      }d}| j                  t        |      5  |j                  i i        d d d        t        ||       y # 1 sw Y   xY w)Nrj  zmust be callable)r  rn  r  rS   rN   rP  r!   r  	TypeErrorr  r5   )rV   r   r.  s      rX   test_ddp_python_error_loggedz:DistributedTest._DistTestBase.test_ddp_python_error_logged#  s     #$))$))4EHH%%== II; > E .L''	<@ 1((R01 $E<81 1s   2BB#c           	         | j                   }t        j                  j                  |        G d dt        j                  j
                        }fd |       j                  |      }t        j                  |      }t        j                  j                  j                  ||g      }t        j                  j                  j                  ||gd      }t        j                  dd      }t        t        t        d	}|j                         D ]  }t        d
      D ]  } |||      }	 |	      }
|
j!                          | j#                  |        |||      }| j%                  t'        |||                 |      }|j!                          | j#                  |       t)        |j+                         |j+                               D ]  \  }}| j-                  ||          y )Nc                   &     e Zd Zd fdZd Z xZS )\DistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.NestedOutputModulec                 \    t         |           t        j                  ddd      | _        y )NrO  rH   FrF   r  rU   s    rX   rM   zeDistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.NestedOutputModule.__init__#  s"    G$&!yyae<DHrY   c                 8   |dk(  r3| j                  |      | j                  |      | j                  |      ffS |dk(  r3| j                  |      | j                  |      | j                  |      ggS |dk(  r%| j                  |      d| j                  |      idS y )Nr   r  r  r   r   r5	  )rV   r   output_types      rX   r_   zdDistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.NestedOutputModule.forward$  s    "g- HHSM $ $   %. HHSM $ $   %.!%# #TXXc]"   /rY   r`   rb   rg   s   @rX   NestedOutputModulerr	  #  s    =rY   rv	  c                 B   d}t        | t        j                        r| j                         S t        | t              r"| j                         D ]  }| |      z  } |S t        | t        t        f      r| D ]  }| |      z  } |S t        dt        |              )Nr  zUnknown model output type )
rn   rS   ro   r  r  rN  r   r  r  r~  )model_outputr  rq   r^   get_losss       rX   ry	  zRDistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.get_loss$  s    lELL9'++--d3!-!4!4!6 0/0   udm<) ,+,  %'A$|BTAU%VWWrY   rj  Tr  rE   rO  )r  r   r  ru  )ru	  )r  rS   rn  r  rN   r   r  r  rP  r!   rT   r  r   r  r  rs  r)  r  r)  rn   r  rs  r  )rV   r  rv	  r   model_static_graphr   type_mappingru	  ru  r  r  
out_staticloss_staticr   p_staticry	  s                  @rX   "test_ddp_static_graph_nested_typesz@DistributedTest._DistTestBase.test_ddp_static_graph_nested_types#  s    99DJJ!!$'UXX__ < '(--d3E!%u!5HH%%== 6 > E "'!2!2!J!J 6! "K "
 ++b#&CL
  ,002 6q 6A=C#C=DMMO$$U+!3C[!QJOOJz<;T$UV"*:"6K((*$$%78),((*,>,I,I,K* 6H ((H5666rY   c                 X   t         j                  j                  | j                          G d dt        j
                        } |       j                  | j                        }t        j                  dd| j                        }t        j                  ddgddg      D ]  \  }}t        || j                  g| j                  ||      }t        d	      D ]Q  } ||      }| j                  |d
   j                         |d
   |d   z   j                         }	|	j                          S  y )Nc                   &     e Zd Zd fdZd Z xZS )SDistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_grad.<locals>.MyModelc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        y r  r   rU   s    rX   rM   z\DistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_grad.<locals>.MyModel.__init__T$  s6    G$&!yyRe<DH!yyRe<DHrY   c                     | j                  t        j                  | j                  |                  }|j	                         }|j                         }|j                  rJ ||fS rk   )r   r   r   r   r%  detachr   )rV   r^   r  s      rX   r_   z[DistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_grad.<locals>.MyModel.forwardY$  sM    !45A	A
A ..q6MrY   r`   rb   rg   s   @rX   r  r	  S$  s    =
"rY   r  rH   rE   r  TF)rL  rS  r  rN  ru  r   )rS   rn  r  r  rN   r   rk  rT   r/  r  r!   rs  rH  r   r  r)  )
rV   r  r   r   r3  rN  r_  ru  r  os
             rX   $test_ddp_returns_tensor_with_no_gradzBDistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_gradJ$  s     JJ!!$)),"")) " ILL+E++aDII6C/8/@/@ue}0 !+l . $		{"&))+6!- q !Ac(C$$SV%9%9:Q#a&--/AJJL	!!rY   c                     G d dt         j                        }t        j                  j	                  | j
                          |       j                         }dD ]  }t        j                   j                  j                  || j
                  g|      }t        j                  ddd      }t        d	      D ]V  } |||d
      }|j                         }|j                          | j                  |j                  j                                X  t        j                   j                  j                  || j
                  gd      }t        j                  ddd      }t        d	      D ]3  }	 ||d|	dz  dk(        }|j                         }|j                          5 | j                  |j                  j                                y )Nc                   &     e Zd Zd fdZd Z xZS )RDistributedTest._DistTestBase.test_detect_ddp_is_actually_static.<locals>.ToyModelc                     t         |           t        j                  ddd      | _        t        j                  dd      | _        y r  rF  rU   s    rX   rM   z[DistributedTest._DistTestBase.test_detect_ddp_is_actually_static.<locals>.ToyModel.__init__y$  s4    G$& "		"bu =DI "		"b 1DIrY   c                     |r3|r | j                  | j                  |            S | j                  |      S | j                  | j                  |            S rk   )rh  rg  )rV   r^   r3  dynamics       rX   r_   zZDistributedTest._DistTestBase.test_detect_ddp_is_actually_static.<locals>.ToyModel.forward~$  sD    ""#'99TYYq\#::#'99Q</#yy166rY   r`   rb   rg   s   @rX   ri  r	  x$  s    2
7rY   ri  r  r  rH   rE   rn  r  ru  F)r3  r	  TrJ   r   )rN   r   rS   rn  r  r  rP  r!   rT   rs  r  r)  r)  r  _ddp_graph_staticrH  )
rV   ri  r   r3  r_  r   ru  r  r  r  s
             rX   "test_detect_ddp_is_actually_staticz@DistributedTest._DistTestBase.test_detect_ddp_is_actually_staticr$  sz   7299 7  JJ!!$)),JOO%E, Ehh''?? $		{+6 @ 
 kk!R7q EAc{EJC779DMMOOOCKK$A$A$CD	EE ((##;; II;'+ < C
 ++aF3C1X  #4Q!Dwwy  S[[::<=rY   c           	          G d dt         j                        } |       j                  | j                        }dD ]7  }t	        || j                  g| j                  d||      }t        d      D cg c]  }d  }}t        d      D ]  }|j                          t        j                  dd	| j                  
      }	 ||	|d   |d   |d         \  }
|d<   |d<   |d<   t        t        |            D ]\  }t        j                  ||         r | j                  ||   j                  d        ;| j                  ||   d   j                  d        ^ |
j                         j                           : y c c}w )Nc                   ,     e Zd Zd fdZd Zd Z xZS )JDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModelc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        | j                  j                  j                  | _        y r  )rL   rM   rN   rO   r   r   r  r  rU   s    rX   rM   zSDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModel.__init__$  sK    G$&!yyRe<DH!yyRe<DH"&((//"8"8DKrY   c                 J    t        j                  dd| j                        }|S )NrH   rE   r  )rS   rT   r  )rV   r  s     rX   
__init_optzUDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModel.__init_opt$  s    ++aDKK@CJrY   c                    t        j                  | j                  |            }| j                  |      }|| j	                         }|| j	                         }|t        j                  |      s| j	                         }|||d|ifS )Nr   )r   r   r   r   _MyModel__init_optrS   	is_tensor)rV   r^   opt_1opt_2
opt_nesteds        rX   r_   zRDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModel.forward$  sz    txx{+AA} $ 1} $ 1!)1L%)__%6
 eUXz,BBBrY   r`   )rc   rd   re   rM   r	  r_   rf   rg   s   @rX   r  r	  $  s    9CrY   r  r  F)rL  rS  r  r  rN  rz   rJ   rH   rE   r  r   )r	  r	  r	  r   )rN   r   rk  r  r!   rs  rB  rS   rT   r  r	  r  grad_fnr  r)  )rV   rN  r  r   r3  r_  ru  r  r  r^   r  s              rX   _test_ddp_new_tensor_in_fwdz9DistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd$  sY   C")) C0 ILL+E, *- $		{"&))&++6!- &+1X.t..q *AMMOAr$))<A25Qs1v#a&3/CQQQ #3s8_ M ??3q62 ,,SV^^TB ,,SVH-=-E-EtL	M
 HHJ'')** /s   3	E8c                 &    | j                  d      S )NFr@  r	  r  s    rX   test_ddp_new_tensor_in_fwdz8DistributedTest._DistTestBase.test_ddp_new_tensor_in_fwd$  s     333GGrY   c                 &    | j                  d      S )NTr@  r	  r  s    rX   'test_ddp_new_tensor_in_fwd_static_graphzEDistributedTest._DistTestBase.test_ddp_new_tensor_in_fwd_static_graph$  s     333FFrY   c                    | j                   }t        j                  j                  |       t        j                  |       t        j                  j	                  |       fd}t        j
                  j                  j                  j                  j                  }t        j
                  j                  j                  j                  j                  }||fD ]  }t               j                  |      }t        j
                  j                  j                  || j                   g      }|j                  |||       t        j
                  j                  j                  t        j                  |      | j                   gd      }	t        j                   dd|      }
t#        d      D ]&  } ||
      j%                         }||k(  r?t'        |	j(                  j+                               }|D ]  }t-        j.                  |         |	|
      j%                         }||k(  r?t'        |	j(                  j+                               }|D ]  }t-        j.                  |        t        j                  j1                          s| j3                  ||	       |j5                          |j5                          s||k(  s| j3                  ||	       ) t-        j6                           y )Nc                 <   |j                         D cg c]  \  }}|	 }}}|D cg c]2  }t        j                  || j                  d      j	                         4 }}r|S t
        j                  j                  |      j                          y c c}}w c c}w )NT)r  r  )	ru   r1  r  rm  rW  rS   r]  collect_allr  )r_  r  ru  rI   r  futsreturn_futuress         rX   buffer_comm_hookzWDistributedTest._DistTestBase._test_ddp_buffer_hook_allreduce.<locals>.buffer_comm_hook$  s    5B5H5H5JKkq&6KK
 #*	  OOc&7&7$ jl#  "KMM--d388: Ls
   B7Brj  F)rL  r  rJ   rE   r  )r  rS   rn  r  r  rN   rP  r#  _BufferCommHookLocationPRE_FORWARDPOST_FORWARDrB   r!   _register_buffer_comm_hookr  r  rT   rs  r  r  r   r  r1  r  r  r  r)  r  )rV   r	  r  r	  hook_pre_fwdhook_post_fwdhook_run_locationr   	model_ddpmodel_ddp_no_hookr   ru  r  model_no_hook_buffersr   loss_no_hooks    `              rX   _test_ddp_buffer_hook_allreducez=DistributedTest._DistTestBase._test_ddp_buffer_hook_allreduce$  su   99DJJ!!$'d#JJ""4(; !!--EEQQ  !!--EERR  & 2! '(--d3!HH--EE $		{ F 	 44/1B %*HH$5$5$M$MMM%( $		{&+ %N %!
 kk!R5q QA )# 2 2 4I )L8045F5M5M5U5U5W0X-&; 4F OOF34 $5S#9#=#=#?L(M9045F5M5M5U5U5W0X-&; 4F OOF34JJ**, *229>OP&&( ))+ &*;}*L229>OP?Q@ e2rY   c                 (    | j                  d       y )NTr	  r	  r  s    rX   ,test_ddp_buffer_hook_allreduce_return_futurezJDistributedTest._DistTestBase.test_ddp_buffer_hook_allreduce_return_future4%  s     000ErY   c                 (    | j                  d       y )NFr	  r	  r  s    rX   test_ddp_buffer_hook_allreducez<DistributedTest._DistTestBase.test_ddp_buffer_hook_allreduce<%  s     000FrY   c                 (   | j                   }t        j                  j                  |       t        j                  |       t        j                  j	                  |       d }t               j                  |      }t        j                  j                  j                  || j                   g      }|j                  ||       t        j                  j                  j                  t        j                  |      | j                   g      }t        j                  dd|      }t        d      D ]`  } ||      j                         } ||      j                         }	| j                  ||       |j!                          |	j!                          b y )Nc                 v    |j                         D cg c]  \  }}|	 }}}| j                  |       y c c}}w rk   )ru   _default_broadcast_coalesced)r_  r  ru  rI   r  s        rX   r	  zZDistributedTest._DistTestBase.test_ddp_broadcast_buffer_via_hook.<locals>.buffer_comm_hookQ%  s9     6C5H5H5JKkq&6KK009 Ls   5rj  rJ   rE   r  )r  rS   rn  r  r  rB   rN   rP  r!   r	  r  r  rT   rs  r  r  r)  )
rV   r  r	  r   r	  r	  r   ru  r  r	  s
             rX   "test_ddp_broadcast_buffer_via_hookz@DistributedTest._DistTestBase.test_ddp_broadcast_buffer_via_hookD%  sC    99DJJ!!$'d#JJ""4(: #$))$/E))AA II; B I 00<LM % 1 1 I Ie$ II; !J ! ++aD1C1X (%cN..0	0599;**96GH""$%%'(rY   c                     G d dt         j                  j                         G fddt        j                        } || j
                        }t        j                  ddd      j                  | j
                        }t         j                  j                  j                  || j
                  g      }| j                  t              5   ||      j                         j                          d d d        |j                          d	|_        t         j                  j                  j                  || j
                  g      } ||      j                         j                          y # 1 sw Y   {xY w)
Nc                   ,    e Zd Zed        Zed        Zy)SDistributedTest._DistTestBase.test_ddp_remove_autograd_hooks.<locals>.SimulateErrorc                     |S rk   ry   )r  r  s     rX   r_   z[DistributedTest._DistTestBase.test_ddp_remove_autograd_hooks.<locals>.SimulateError.forwardq%  s     LrY   c                     t         rk   )r  )r  grad_outputs     rX   r)  z\DistributedTest._DistTestBase.test_ddp_remove_autograd_hooks.<locals>.SimulateError.backwardu%  s    &&rY   N)rc   rd   re   staticmethodr_   r)  ry   rY   rX   SimulateErrorr	  p%  s(    ! ! ' 'rY   r	  c                   *     e Zd Z fdZfdZ xZS )MDistributedTest._DistTestBase.test_ddp_remove_autograd_hooks.<locals>.MyModelc                     t         |           d| _        t        j                  dd      j                  |      | _        y )NTrE   )rL   rM   errorrN   rO   rn  r   )rV   r  rW   s     rX   rM   zVDistributedTest._DistTestBase.test_ddp_remove_autograd_hooks.<locals>.MyModel.__init__z%  s2    G$&!%DJ!yyR055f=DHrY   c                 ~    | j                   r | j                  j                  |            S | j                  |      S rk   )r	  r   apply)rV   r   r	  s     rX   r_   zUDistributedTest._DistTestBase.test_ddp_remove_autograd_hooks.<locals>.MyModel.forward%  s2    zz#xx(;(;C(@AA#xx},rY   rb   )rW   r	  s   @rX   r  r	  y%  s    >
- -rY   r  rE   Tr   rj  F)rS   rz  FunctionrN   r   r  r  rn  rP  r!   r#  r  r  r)  _remove_autograd_hooksr	  )rV   r  r   r  
model_ddp1
model_ddp2r	  s         @rX   test_ddp_remove_autograd_hooksz<DistributedTest._DistTestBase.test_ddp_remove_autograd_hooksi%  s$   ' 7 7 '
-")) 
-  DII&EJJr2T:??		JE**BB II; C J
 ""<0 35!%%'0023 --/  EK**BB II; C J u!!#,,.3 3s   %E--E6zSTest is failing, tracking issue at https://github.com/pytorch/pytorch/issues/102751c                 j   t          G d d              G fddt        j                        } || j                        }t	        j
                  ddd      j                  | j                        }t        j                  j                  j                  || j                  gdd	
      }| j                  dk(  r* ||      \  }}|j                         j                          n: ||      \  }}|j                         |j                         z   j                          | j                  dk(  rW| j                  t        d      5  |j                          d d d        | j                  t        d      5   ||       d d d        y |j                           ||       y # 1 sw Y   JxY w# 1 sw Y   y xY w)Nc                   ,    e Zd ZU ej                  ed<   y)EDistributedTest._DistTestBase.test_ddp_has_finalized.<locals>.MyClassobjN)rc   rd   re   rS   ro   r   ry   rY   rX   MyClassr	  %  s    \\!rY   r	  c                   *     e Zd Z fdZfdZ xZS )EDistributedTest._DistTestBase.test_ddp_has_finalized.<locals>.MyModelc                     t         |           || _        t        j                  dd      j                  |      | _        t        j                  dd      j                  |      | _        y )N   i   )rL   rM   r  rN   rO   rn  r   r   r-  s     rX   rM   zNDistributedTest._DistTestBase.test_ddp_has_finalized.<locals>.MyModel.__init__%  sO    G$& $DI!yyt499$?DH!yyx8==dCDHrY   c                     | j                   dk(  r(| j                  |       | j                  |            fS | j                  |      | j                  |      fS ra  )r  r   r   )rV   r   r	  s     rX   r_   zMDistributedTest._DistTestBase.test_ddp_has_finalized.<locals>.MyModel.forward%  sI    yyA~#xx}gdhhsm.DDD#xx}dhhsm;;rY   rb   )rW   r	  s   @rX   r  r	  %  s    D< <rY   r  rE   r	  Tr   g      p?)rL  r  r6  r   r   )r   rN   r   r  rS   r  rn  rP  r!   r  r)  r  r  _check_reducer_finalized)	rV   r  r   r  r_  out1ru  out2r	  s	           @rX   r-  z4DistributedTest._DistTestBase.test_ddp_has_finalized%  sj    " " "<")) < DII&EJJr4t<AA$))LE((##;; II;'+5	 < C yyA~e*a
##% Z
ddhhj(224yyA~++L:vw 30023 ++L:vw J  ,,.E
3 3 s   F2	F)F&)F2c                    | j                   }t        j                  j                  |       t        j                  |       t        j                  j	                  |        G d dt
        j                        } |       j                  |      }t        j
                  j                  j                  || j                   g      }t        j                  dd|      }t        d      D ]  }|dk(  r(|j                  j                  dz   |j                  _         ||      j                         }|j                          t        t        j                                D cg c]+  }t        j"                  |j                  j                        - }}t        j$                  ||j                  j                         |d   }	|dd  D ]  }
| j'                  |	|
         y c c}w )	Nc                   &     e Zd Zd fdZd Z xZS )ODistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.NetWithBuffersc                     t         |           t        j                  ddd      | _        t        j                  ddd      | _        | j                  dt        j                  dd             y rD   rK   rU   s    rX   rM   zXDistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.NetWithBuffers.__init__&  sQ    G$&YYr2E:DFYYr159DF((5;;q!3DErY   c                 B    | j                  | j                  |            S rk   )rQ   rP   r]   s     rX   r_   zWDistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.NetWithBuffers.forward&  s    66$&&),,rY   r`   rb   rg   s   @rX   rB   r	  &  s    F-rY   rB   rj  rJ   rE   r  r   rH   )r  rS   rn  r  r  rN   r   rP  r!   rT   rs  r   rI   r  r)  r1  r   r  r  r  )rV   r  rB   r   r	  r   ru  r  bufs
rank_0_bufr  s              rX   test_ddp_broadcast_bufferz7DistributedTest._DistTestBase.test_ddp_broadcast_buffer&  s    99DJJ!!$'d#JJ""4(- - #$))$/E))AA II; B I ++aD1C1X 619.7.>.>.E.E.II$$+ ~))+ #4#6#6#89 $$Y%5%5%<%<=  i&6&6&=&=>!!W
8 6C$$Z566s   0Gz8Only Nccl & Gloo backend support DistributedDataParallelc                 d    G d dt         j                        }t        j                  j	                  | j
                         t        j                  dd| j
                  dz   z  z          |       j                  | j
                        }t        j                  |      }t        j                   j                  j                  || j
                  gd      }t        j                  dd	d
      }t        d      D ]  }|j                          |j                           ||      } ||      }|j                         |j                         z   }|j                          | j
                  dk(  su|j!                         }	t#        j$                         }
t        |
      D ]C  } ||	      } ||	      }|j                         |j                         z   }|j                          E |j'                         D ]   }|j(                  |
z  |j(                  _        " t-        |j'                         |j'                               D ]X  \  }}| j/                  t        j0                  |j(                  |j(                        |j(                   d|j(                          Z  t#        j2                          y )Nc                   &     e Zd Zd fdZd Z xZS )JDistributedTest._DistTestBase.test_static_graph_multi_forward.<locals>.Netc                     t         |           t        j                  dd      | _        t        j
                         | _        y r  )rL   rM   rN   rO   r  r   r   rU   s    rX   rM   zSDistributedTest._DistTestBase.test_static_graph_multi_forward.<locals>.Net.__init__&  s-    G$&!yyR0DH "	DIrY   c                 B    | j                  | j                  |            S rk   )r   r  r]   s     rX   r_   zRDistributedTest._DistTestBase.test_static_graph_multi_forward.<locals>.Net.forward&  s    99TXXa[11rY   r`   rb   rg   s   @rX   r   r	  &  s    *
2rY   r   *   r@  rH   Tr  rJ   rE   rn  r  rz   r   z vs )rN   r   rS   rn  r  r  r  r  r  rP  r!   r   rs  rB  r  r)  r%  r1  r   rs  r  r  r  r)  r  r  )rV   r   r   r  r   ru  rP   rQ   r  	inp_cloneitersr   p_ddpp_locals                 rX   test_static_graph_multi_forwardz=DistributedTest._DistTestBase.test_static_graph_multi_forward&  s   2bii 2 JJ!!$)),bDDIIM$::;EJJtyy)E--.KHH%%==499+D > E **Q62C1X !%%'#J#Juuw( 99> #		I //1E"5\ ('	2'	2 uuw0	( )335 5&'ffun5 +.((*#..0+ 	w !NN %

GLL  %zzl$w||n=			+@ LLNrY   c                 R   t         }| j                  }|j                  |      }t        j                  j
                  j                  t        j                  |      | j                  g      }|j                         }|j                  dd      }| j                  |       t        j                  j                  |      }t        j                  j
                  j                  || j                  g      }|j                         }|j                  dd      }| j                  |       y )Nrj  has_sync_bnTF)r  r  rn  rS   rN   rP  r!   r  r  r  r   rH  r  r  r)  )rV   r   r  rU  
no_sync_bnr  sync_bn_loggedr>  s           rX   test_sync_bn_loggedz1DistributedTest._DistTestBase.test_sync_bn_logged&  s     E99D

4(I**BBi( II; C J  *??A-11-FN^,((??	JI))AA II; B I  )>>@-11-GNOON+rY   c                     G d dt         j                  j                        }| j                  } |       j	                  |      }t         j                  j
                  j                  ||g      }t        j                  d      j	                  |      }t        j                  dgg|d      }t        j                  dg|d      }t        j                  dg|	      }|||d
}|j                  j                  j                  j                         }	|j                  j                  j                         }
t         j                  j                  |||      }| j!                  ||       |j                  j                  j                  }|j                  j                  }| j!                  ||	       | j!                  ||
       |j#                          | j%                  |j&                         | j%                  |j&                         | j)                  |j&                         | j)                  |j                  j                  j                  j&                         | j)                  |j                  j                  j*                  j&                         | j)                  |j                  j                  j&                         y )Nc                   &     e Zd Zd fdZd Z xZS )MDistributedTest._DistTestBase.test_stateless_api_with_ddp.<locals>.MockModulec                     t         |           t        j                  j	                  dd      | _        t        j                  d      }| j                  d|       y )NrH   rI   )rL   rM   rS   rN   rO   r  r   rR   )rV   rI   rW   s     rX   rM   zVDistributedTest._DistTestBase.test_stateless_api_with_ddp.<locals>.MockModule.__init__'  sA    G$&#hhooa3DG"ZZ]F((6:rY   c                 >    | j                  |      | j                  z   S rk   )r  rI   r]   s     rX   r_   zUDistributedTest._DistTestBase.test_stateless_api_with_ddp.<locals>.MockModule.forward'  s    771:33rY   r`   rb   rg   s   @rX   
MockModuler	  '  s    ;4rY   r	  rj  )rH   rH   rq  T)r  r   r  r  )zmodule.l1.weightzmodule.l1.biaszmodule.buffer)rS   rN   r   r  rk  rP  r!   r  r   r   r  r  r%  rI   rA  functional_callr  r)  assertIsNotNoner  rY  rG   )rV   r	  r  r   r^   r  rG   rI   rs  prev_weightprev_bufferro  
cur_weight
cur_buffers                 rX   test_stateless_api_with_ddpz9DistributedTest._DistTestBase.test_stateless_api_with_ddp'  s   4UXX__ 4 YYF\__V,FXX&&>>F8 ? F 

6"%%f-A\\C5'&MF<<fDID\\3%7F$*"&!'J
 !--**11779K --..446K**,,VZCCQ$))00J--JZ5Z5LLN  -  +fkk*fmm..55::;fmm..33889fmm22778rY   c                 H    G d dt         j                        }d }d }d } |       } |       }|j                  j                  |       |j                  j	                  |       |j                  j                  |       |j                  j	                  |       |j                  j                  |       |j                  j                  |       t        |j                  | j                        | j                  g      }t        j                  dd      } ||      } ||j                  | j                              }	| j                  ||	       |j                         j                          |	j                         j                          |j                         D 
cg c]  }
|
j                   }}
| j                  |d	   |j                  j                   j                         | j                  |d
   |j                  j"                  j                         y c c}
w )Nc                   &     e Zd Zd fdZd Z xZS )TDistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.DummyTestModelc                     t         |           t        j                  d       t	        j
                  dd      | _        y )Nr   rJ   )rL   rM   rS   r  rN   rO   r   rU   s    rX   rM   z]DistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.DummyTestModel.__init__B'  s-    G$&%%a( ii1oDGrY   c                 $    | j                  |      S rk   r   r]   s     rX   r_   z\DistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.DummyTestModel.forwardG'  s    771:%rY   r`   rb   rg   s   @rX   DummyTestModelr
  A'  s    .
&rY   r	
  c                 F    t         j                  j                  |d         S ra  )rN   
functionalr   )r   r  s     rX   	relu_hookzODistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.relu_hookJ'  s    }}))%(33rY   c                 @    t         j                  j                  |      S rk   )rN   r
  gelur   _inputrc  s      rX   	gelu_hookzODistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.gelu_hookM'  s    }}))&11rY   c                 H    t         j                  j                  |d         fS ra  )rN   r
  celur
  s      rX   	celu_hookzODistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.celu_hookP'  s    **6!9577rY   rj  r   rJ   r   rH   )rN   r   r   register_forward_pre_hookregister_forward_hookregister_backward_hookr!   rk  r  rS   r  r  r  r)  rs  r  r  rG   )rV   r	
  r
  r
  r
  r  rx  
input_dataoutput_local
output_ddpr   	ddp_gradss               rX   test_ddp_forward_backward_hookz<DistributedTest._DistTestBase.test_ddp_forward_backward_hook>'  s   & &428 )*K&(INN44Y?NN00;LL229=LL..y9NN11)<LL//	:/TYY'TYYKI Aq)J&z2L":==#;<J\:6'')NN%%')2)=)=)?@A@I@Yq\;>>+@+@+E+EFYq\;>>+>+>+C+CD As   #Hc                 .   t        j                  d       d}t        j                         dz   }| j                  }t        j
                  dd|      }t        j
                  dd|      }t         j                  j                  dd      j                  |      }t        t        j                  |      |g      }	t        t        j                  |      |g      }
t         j                  j                  |	j                         |	      }|	j                  ||       |	j!                          t#        d
      D ]P  }|j%                           |	|      }t'        j(                  ||      }|j+                          |j-                          R |	j/                         ||d}|dk(  r| j1                  d      5 }t        j2                  ||       d d d        | j5                  t7        j8                        d       | j5                  |j8                  d   j;                         d       t=        j>                          dd|di}| j1                  d      5 }t        j@                  ||      }d d d        | j5                  t7        j8                        d       | j5                  |j8                  d   j;                         d       |
jC                  d          |d   }|d   }t         j                  j                  |
j                         |	      }| j5                  |jD                  |jD                         | j5                  |jF                  |jF                         |jF                  D ]4  }|dk7  s	|dk7  s| j5                  tI        ||      tI        ||             6 | j5                  |jJ                  tM                      tO        |jP                  jS                         |jP                  jS                               D ]%  \  }}tT        jV                  jY                  ||       ' |
j                  ||       |
j!                          t#        d
      D ]  }|j%                          |j%                           |	|      } |
|      }t'        j(                  ||      }t'        j(                  ||      }|j+                          |j+                          |j-                          |j-                           tO        |	j                         |
j                               D ]+  \  }}| j5                  |jZ                  |jZ                         - t=        j>                          |dk(  rt]        j^                  |       y y # 1 sw Y   xY w# 1 sw Y   !xY w)Nr   g{Gz?z/checkpoint.ptr  rH   r  r   rj  r  rE   )r<  r  comm_hook_stateztorch.distributedzHNOTE: Process group is not serializable and excluded from a saved state.r  r4  rH  r  zNOTE: Process group will be set to a default group (i.e. the world size).                If a different group is desired, please set `self.process_group` after PowerSGD state is loaded.r<  r  r
  rm  rng)0rS   r  r   
gettempdirr  rT   rN   rO   rk  r!   r  r  r  r  rs  r  r(  rs  rB  r   r>  r)  r  r<  
assertLogsr9  r  r  records
getMessager1  r  r   r  re   ra  getattrrm  r   r  r
  	get_stater  r  assert_array_equalr  r   r   )rV   r   
hook_stater  r  r  r  r#  r  rx  dummy_ddp_modelr}  ru  r  r  r   capturedr  r  
dummy_hookdummy_hook_statedummy_optimizerentryentry1entry2
out_origin	out_dummyloss_origin
loss_dummy
orig_paramdummy_params                                  rX   _test_hook_picklingz1DistributedTest._DistTestBase._test_hook_picklingh'  s   a  M!,,.1AAJ99DKK1T2E[[Ad3F((//!Q'**40C/c0BPTvVI5c"vO 	(<(<(>=QI((T:OO2Y !##%&zz#v. ! (224!#-E qy__%89 2XJJuj12   X%5%5!6:  $$Q'224^
 LLN$d1X&67L!45 O"ZZ
N
O S!1!12A6  #..0r ++J|,DE#K0J)*;<#kkoo**, . O
 Z44d6G6GH Z113C3M3MN *33 O+$$ 0%8'*e:T -;;=O=QR #&((*,<,@,@,J,J,L# > 

--ff=>
 ../?L!!#2Y 
'##%))+&u-
+E2	jjV<ZZ	6:
$$&##% $$&
' ,/$$&(B(B(D, D'
K   +2B2BCD
 LLNqy		*% k2 2O Os   5U=V
=V
VzSkipped due to flakinessc                 x    t         j                  }t        j                  d dd      }| j                  ||       y )NrH   r{   )rm  r'  r(  )r  r   r*  r6
  )rV   r   r+  s      rX   test_ddp_hook_pickling_powerSGDz=DistributedTest._DistTestBase.test_ddp_hook_pickling_powerSGD'  s:     ))D%33"*+$%N
 $$T>:rY   c                    t        t        j                  d         }ddlm}  |d|f      }t               }t        j                  j                  | j                         t               j                         }t        j                  j                  j                  ||      }| j                  |j                  |       | j!                  t"        d      5  t        j                  j                  j                  |||      }ddd       | j!                  t"        d	      5   |dd
|d
z  f      }t        j                  j                  j                  ||      }ddd       y# 1 sw Y   bxY w# 1 sw Y   yxY w)zC
            Test DDP with device_mesh initialization.
            rG  r   )init_device_meshrn  )device_meshz<Cannot specify both process_group and device_mesh arguments.)rm  r;
  Nz!Only 1D device mesh is supported,rJ   )r   r   rJ  torch.distributed.device_meshr:
  r   rS   rn  r  r  r  rN   rP  r!   r  r;
  r  r  )rV   rK  r:
  r;
  rv  r   rx  s          rX   #test_ddp_device_mesh_initializationzADistributedTest._DistTestBase.test_ddp_device_mesh_initialization'  sK    RZZ56JF*6J=AK#%BJJ!!$)),"$))+E))AA%U`AaIY22K@''\  "HH--EE F 	 ''A  /v:?7KL!HH--EE{ F 		   s   -E# :E/#E,/E8c                    t         j                  j                  dd      j                  | j                        }t        j                  |      }t         j                  j                  j                  || j                  g      }t         j                  j                  j                  || j                  gd      }t        j                  |      }t        j                  |      }t        j                  dd      j                  | j                        }t        d      D ]  } ||      j                         } ||      j                         }| j                  ||       |j                          |j                          t        |j!                         |j!                               D ]+  \  }	}
| j                  |	j"                  |
j"                         -  y)z>Tests that DDP works with torch compile when static_graph=TruerE   rj  Tr  ru  N)rS   rN   rO   rn  r  r  r  rP  r!   compiler  rs  r  r  r)  r  rs  r  )rV   r   model_cloner_  
ddp_staticr  ru  out_ddpout_ddp_staticr  r  s              rX   test_ddp_compile_static_graphz;DistributedTest._DistTestBase.test_ddp_compile_static_graph(  sn    HHOOB+00;E--.K((##;; II; < C **BB II;! C J
 --$Cz2JJJr2&++DII6E1X 7e*..*!+E!2!6!6!8  .9  "'')!#.."2J4I4I4KL 7FB$$RWWbgg677rY   c                     G d dt               } G d dt        j                  j                        } |       j	                  | j
                        }t        j                  j                  j                  || j
                  gd      }|j                  d       t        j                  dd      j	                  | j
                        } |       5   ||      j                         j                          d	d	d	       y	# 1 sw Y   y	xY w)
z.Tests that we can configure DDP to avoid clonec                       e Zd ZddZy)FDistributedTest._DistTestBase.test_ddp_sink_noclone.<locals>.OpPatcherNc                     |j                   }|t        j                  j                  j                  k(  rt        d      |r|ni } ||i |S )Nzclone encountered!)_overloadpacketrS   opsatenr%  r  )rV   rA  typesr  r  func_packets         rX   __torch_dispatch__zYDistributedTest._DistTestBase.test_ddp_sink_noclone.<locals>.OpPatcher.__torch_dispatch__=(  sH    "&"6"6K"eiinn&:&::*+?@@'-V2F000rY   )ry   N)rc   rd   re   rN
  ry   rY   rX   	OpPatcherrG
  <(  s    1rY   rO
  c                   &     e Zd Zd fdZd Z xZS )DDistributedTest._DistTestBase.test_ddp_sink_noclone.<locals>.MyModelc                 l    t         |           t        j                  j	                  dd      | _        y r  )rL   rM   rS   rN   rO   r   rU   s    rX   rM   zMDistributedTest._DistTestBase.test_ddp_sink_noclone.<locals>.MyModel.__init__E(  s$    G$&#hhoob"5DGrY   c                 $    | j                  |      S rk   r   r  s     rX   r_   zLDistributedTest._DistTestBase.test_ddp_sink_noclone.<locals>.MyModel.forwardI(  s    775>)rY   r`   rb   rg   s   @rX   r  rQ
  D(  s    6*rY   r  Tr  FrE   N)r   rS   rN   r   rn  r  rP  r!   _set_ddp_sink_cloner  r  r)  )rV   rO
  r  r   r_  r  s         rX   test_ddp_sink_noclonez3DistributedTest._DistTestBase.test_ddp_sink_noclone3(  s    1- 1*%((// * INN499-E((##;; II;'+ < C
 ##E*JJr2&++DII6E ,E
 ))+, , ,s   %C??Drk   )FNF)FN)TN)rq  N)Nr   FNr   )NFFFr  r   )rO  )rJ   NFrh  )FF(  rc   rd   re   r  r  r  r  r  r  r  r  r  r$  r'  r*  r-  r/  r;  r>   r   r   r   r>  r+   rB  rD  r6   r;  rL  r-   rM  r/   rQ  rT  r]  ra  rf  rh  rx  r  r  r  r  r;   r<   r  r  r1   r  r  r  r  r  r  r  r  r  r  r  r
  r8   r9   r  r  r   r$  r(  r;  skip_collectiver=  r?  rA  rD  rF  rH  rJ  rU  rW  rY  r[  r]  ro  rq  rt  rv  rx  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  ri  rS   ra  r  r  r  r  r  r  r  r  r   r  r  r  r  r
  r  r  r  r  r  r  r"  r&  r	  r*  r-  r0  r3  r6  r>  rE  rA  rD  rF  rH  rJ  rM  rO  rQ  rS  rV  rX  rZ  r^  rd  rf  ri  rk  rn  rp  rr  rx  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r*   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r  r$  r.  r4  rI  rV  rY  r\  r^  rb  r   r   rJ  rl  ry  r  r  r  r  r  r  r  r  r#   r  r  r  r
  r  r  r  r  r!  r%  r,  r2  r9  r;  rI  r.   rM  rO  r_  rh  rq  r   r  r  r  r  r  r  r  r  r  r  r  r,   r  r  r  r  r  r  r  r  r  r  r  r  r  r*  r,  r0  skipIfNoTorchVisionr8  r1  r  r;  r>  rB  rD  rI  ra  rg  r0   r4   rl  ro  ry  r{  r~  r  r  r  r  r=   r  r  r   r  unittestskipIfr  r  r  r  r+  r  r  r$  skipr&  r(  r>  rA  rT  r[  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r	  r	  r	  r	  r3   r	  r 	  r"	  r%	  r'	  r/	  r6	  rU	  rY	  r[	  rg	  rl	  ro	  r	  r	  r	  r	  r	  r	  r	  r	  r	  r	  r	  r-  r	  r	  r	  r
  r
  r6
  r8
  r=
  rD
  rU
  ry   rY   rX   _DistTestBaser    sA   	*	+	+	+	..	!6	.<	.	/ 
	'	1	>	1	G
	, 
(vD

 
(&&y110

	B



	B$ 
!	'vD

	>

 
!	> 
(vD

	>

	> 
(=88DD7)WX

 
A		!	
	5 
 
	


	5 
(=88DD7)WX

 
	( 
	


	( 
(=88DD7)WX

 
A		!		& 
 
	

	& 
(=88DD7)WX

 
A		!		5 
 
	

	5( 
(=88DD7)WX

 
A		!		 
 
	

	 
(=88DD7)WX

 
		 
	


		 
(=88DD7)WX

 
A		!		 
 
	

	 
(=88DD7)WX

 
!	"	M 
	


"	MH 
(=88DD7)WX

 
!		= 
	


	=< 
!	*	" 
*	"X 
(=88DD7)WX

 
!	%	= 
	


%	=N 
(=88DD7)WX

 
A		!	P	9 
 
	

P	9f 
	'v;;mT

		

 

	: 
	'v;;mT

		

 

	< 
	'6(9;V	W	y*H	I#	 
J 
X 
#	J 
	'6(9;V	W	y*H	I	 
J 
X 
	( 
	'6(9;V	W	y*H	I	 
J 
X 
	0 
	 	'6(9;V	W	y*H	I	 
J 
X 
! 
	> 
(6(9;U	V	 
W	, 
(6(9;U	V	 
W	, 
(6(9;V	W	y*H	I		6 
J 
X		6 
(6(9;V	W	y*H	I	3 
J 
X	3 
(6(9;V	W	y*H	I	I 
J 
X	I  
	'6(9;P	Q	y*H	I*	S 
J 
R 
*	SZ 
	'6(9;P	Q	y*H	I	( 
J 
R 
	( 
	'6(9;P	Q	y*H	I	4 
J 
R 
	4 
	'6(9;P	Q	y*H	I	'	3Q	R	'"
g

	4	

 
S 
J 
R 
	4%	SN 
(vM

	4

	4 
(vM

	E

	E 
(vM

 
(	3Q	R	'"
g

	I	

 
S

	I
K	 Z 
(}445IJJiBC

	?	

	? 
(}445IJJiBC

	P	

	P 
(}445IJJiBC

 
(	3V	W	'"
g

	T	

 
X	

	T
 	YD 
(vM

	=

	= 
(vM

	U

	U 
(vM

 
(	3V	W	'"
g

	R	

 
X

	R
+	XZ 
(v<

	0

	0 
(v<

	A

	A 
(v<

 
(	3V	W	'"
g

	>	

 
X

	>
 
(v<

	

	: F	P 
(vB

	?

	? 
(v3'V"3@

 
	R 
	


	R 
!	'vB

	?

 
!	? 
(vB

	?

	? 
(v=

 
	X 
	


	X8 	B 
(vB

 
(}44X>>i/0


		




	 
(v?

 
(}44X>>i/0

 
	 
	



	" 
(vB

 
(}44X>>i/0


		




	 
(vB

 
(}44X>>i/0

		



	 
(vB

 
(}44X>>i/0

		



	 
(vB

 
(}44X>>i/0

 
!
	 
!	




	 
(vB

 
(}44X>>i/0

 
!
	 
!	




	 
(vB

 
(}44X>>i/0

 
!	 
!	



	 
(vB

 
(}44X>>i/0

 
!	 
!	



	 
(vB

 
(}44X>>i/0


		




	 
(vB

 
(}44X>>i/0


		




	 
(vB

 
(}44X>>i/0

		



	 
(vB

 
(}44X>>i/0

		



	  '	R 
(vB

 
(}44X>>i/0


		




	 
(v?

 
(}44X>>i/0

 
	 
	



	" 
(vD

 
(}44X>>i/0

 
)	 
	



)	Z QU	* 
(vN

 
	 


	@ "4	@ ++6	p 
(vB


	


	 
(vB

	

	 
(v3'V"3I

 
	 
	


	  
(v3'V"3I

 
	 
	


	" 
(vB

	

	 
(vB

	

	$ 
(v3'V"3I

 
	 
	


	" 
(vB


	


	 
(vB

	

	 
(vB

	

	 
!	'vB


	

 
!
	 
!	'vB


	

 
!
	 
!	'vB

	

 
!	 
!	'vB

	

 
!	 
(vB


	


	 
(vB


	


	 
(vB

	

	 
(vB

	

			9 
(vL

	:

	: 
(vL

 
	I 


	I 

	 

	 
	 
	 
	 
	 
	 
	 
(vB

	

	 1	f 
&vh	/		 
0		 
&vh	/		 
0		 
&vh	/		 
0		 
&vh	/	 
0	 
!	%vh	/	 
0 
!	 
!	%vh	/		 
0 
!		 
!	%vh	/	 
0 
!	 
!	%vh	/	 
0 
!	 
&vh	/	 
0	 
&vh	/		 
0		 
&vh	/		 
0		 
&vh	/	 
0	 /4U[[	B 
(vB

 
(uF

	1



	1, 
(vB

 
(uF

	=



	= 
(v?

 
	P 


	P
 
(vB

 
(uF

	Q



	Q 
(v?

 
	 


	 
(vB

 
(uF

 
!	= 
!



	= 
(vB

 
(uF

	=



	= BF	: 
(vB

 
(uF

	(



	(, 
(vB

 
(uF

	<



	< 
(v?

 
	O 


	O
 
(vB

 
(uF

 
!	< 
!



	< 
(vB

 
(uF

	<



	< /4U[[	B 
(vB

	@

	@ 
(vC

 
	S 


	S
 
(vB

	T

	T 
(vC

 
	 


	 
!	'vB

	@

 
!	@ 
(vB

	@

	@ 
(v@

 
#	 


#	N QU	. 
(vO

 
	 


	" 
(vO

 
	 


	"	< 05{{%	N 
(}445JKKi=>

	J	

	J 
(}445JKKi=>

		

	 
!	'}445JKKi=>

	J	

 
!
	J 
(}445JKKi=>

	J	

	J 
(}445JKKi=>

 		

 	H /4U[[	> /4U[[	6 ++	B 
(uG

	S

	S 
(vJ

 
		 


		 
(uG

	

	 
(vJ

 
	 


	 
(uG

	U

	U 
(vJ

 
		 


		 
(uG

	

	 
(vJ

 

	 



	 
(u<

	@

	@ 
(vC

 
#	S 
#

	S
 
(u<

	T

	T 
(vC

 
#	 
#

	 
(uG

 
!	S 
!

	S 
(vJ

 
	 		 
! 



		 
(uG

 
!	U 
!

	U 
(vJ

 
	 		 
! 



		 
(u<

 
!	@ 
!

	@ 
(vJ

 
!	"	S 
# 
!


	S
 
(uG

	S

	S 
(vJ

 
		 


		 
(uG

	U

	U 
(vJ

 
		 


		 
(u<

	@

	@ 
(vC

 
#	S 
#

	S BF	&> 
	'u@

 
(u.0D

	P



 
	P
 
!		'u@

	P

 
 
!
	P
 
!		'u@

	P

 
 
!
	P
 
(}44]CCi45

	=	

	= 
!	'}44]CCi45

	=	

 
!
	= 
(}44]CCi45

	=	

	=	&	'	6 SW	S	/  E	YV $)#(<	|!	F 
(vH

	4

	4 
(vH

	P

	P 
(=88??7)LM

		

	 
(=88??7)LM

 
#bjj67	8	 
9	


	 
(6(9;K	L	 
M	* 
(=88??7)LM

 
#bjj67	8	 
9	


	B 
(=88@@7)[\

 
#bjj67	89	H 
9	


9	HvD	#LK	Z 
!		4 
	4> %)L	:\ 
!			 
		 
!		 
	 
!	*	G 
*	GX	o 	 
!	 	: 
 	:DA	<F 
!		 
	 
!		 
	 
!		 
	
 
!		 
	
7	r 
(=88@@7)[\

 
#bjj67	8	P 
9	


	P 
(=88@@7)[\

 
#bjj67	8	Y 
9	


	Y 
(=88@@7)[\

 
#bjj67	8
	 
9	



	 
(=88@@7)[\

 
#bjj67	8,	 
9	


,	j %*	3< %*		3$ LQO	Ab 
(uHF!2Hw&7H@

 
w	*	6 
+	


	6 
(uHF!2Hw&7H@

 
w	*	R 
+	


	R 
(uHF!2Hw&7H@

 
w	*	 
+	


	. 
(uHF!2Hw&7H@

 
w	*	 
+	


	: 
(uHF!2Hw&7H@

 
w	*	/ 
+	


	/& 
(=88??7)LM

 
)	 
	


)	V5	n 
(=88??7)LM

 
	' 
	


	'* ;	z-	<^	N		>	0@ 
!		'=88??7)LM

		

 

	 
!		'=88??7)LM

		

 

		 
!			'=88??7)LM

		

 
 
	 
!			'=88??7)LM

		

 
 
	 
!		'=88??7)LM

		

 

	 
(=88??7)LM

 
	 
	


	.	` 
(=88??7)LM

 
&	 
	


&	P 
(=88??7)LM

 
	 
	


	( 
(=88??7)LM

 
'	  
	


'	 R 
(=88??7)LM

 
	A	'	  
 
	

'	 R 
(=88??7)LM

 
$	J 
	


$	JL 
(=88??7)LM

 
	 
	


	& 
(=88??7)LM

 
	B 
	


	BC	J 
(vH

P	N

P	Nd 
(=88??7)LM

 
1	W 
	


1	Wf 
(vH

	=

	=* 
	@ 
	@ =AOOQU	: 
&vh	/	!		 
 
0	2 
&vh	/	!		> 
 
0	>( 
&vh	/	#bjj67	8	Y 
9 
0	Y4 
&vh	/	!		3 
 
0	3& 
(=88??7)LM

 
#bjj67	8L	7 
9	


L	7\	04 
&m&C&CE&J	K	(

<()2::i+@

 
 '@	A	1 
B

 
L
	1 
&m&C&CE&J	K	(

<()2::i+@

 
 '@	A	B 
B

 
L
	B-	^ 
(uF

 
&m&C&CE&J	K	'@	A	. 
B 
L


	. 
(uF

 
&m&C&CE&J	K	'@	A	6 
B 
L


	6
	0 
!		'=88??7)LM

*	2	

 

*	2X 
!		'=88??7)LM

2	=	

 

2	=hJ	X 
&m&C&CE&J	K	!		$%N	O	P 
P 
 
L	P 
&m&C&CE&J	K	!		'	3V	W	'"
g

	>	

 
X 
 
L	>8/	># /	>$ /	>d 
&m&C&CE&J	K	!		'	3V	W	'"
g

 
F*,L	M	9 
N	

 
X 
 
L	94 
!		'=88??7)LM

,	<	

 

,	<\i	V 
!		'=88??7)LM

4	5	

 

4	5l 
!		'=88??7)LM

u		

 

u	n 
!		'=88??7)LM

	/	

 

	/: 
!		'=88??7)LM

	$	

 

	$0D	6L 
&m&C&CE&J	K	(

<()2::i+@

 
 z	2	\	]	6 
^ 
3

 
L	6 
&m&C&CE&J	K	(

<()2::i+@

 
 z	2	> 
3

 
L
	>V	9p 
&m&C&CE&J	K	!		@ 
 
L	@ 
 '@	A	%m&C&CE&J	K	!	0	 
 
L 
B0	d 
&m&C&CE&J	K	!	 	$ 
 
L 	$D 
&m&C&CE&J	K	!	c	' 
 
Lc	'J 
&m&C&CE&J	K	!		" 
 
L	"B 
&vh	/H	 
0H	T 
 '@	A	%m&C&CE&J	K	!	N	 
 
L 
BN	` 
&m&C&CE&J	K	!	+	R 
 
L+	RZ 
 '@	A	%m&C&CE&J	K	!	c	 
 
L 
Bc	J 
&vh	/	I 
0	I:	"(	%T 
&m&C&CE&J	K	!		K 
 
L	K 
&m&C&CE&J	K	!		J 
 
L	J 16	%<3	%j 
&m&C&CE&J	K	'u.0D

 
!		A 


 
L
	A 
&m&C&CE&J	K	'u.0D

 
!		B 


 
L
	B	% 
&m&C&CE&J	K	'u.0D

 
!		 


 
L
	* 
&m&C&CE&J	K	'u.0D

 
!		 


 
L
	8^	@ 
(=88??7)LM

 
!		R 
	


	R
 
(=88??7)LM

 
!		R 
	


	R
 
(=88??7)LM

 
!		 
	


	< >C	B 
&m&C&CE&J	K	!		' 
 
L	' 
&vh	/	'"
c

!	&	

 
0
!	&F 
&vh	/	: 
0	:*3	&j 
!	%m&C&CE&J	K	#bjj67	8	N 
9 
L 
!	N
 
!	%m&C&CE&J	K	#bjj67	8	M 
9 
L 
!	M 
&vh	/	= 
0	= 
&vh	/	 	'"
c

	<	

 
! 
0	<* 
&vh	/	 	Q 
! 
0	Q 
&m&C&CE&J	K	x	0	!	:	 
 
1 
L:	x 
(=88??7)LM

 
 x	0	!		F 
 
1	

	F.p	Qd 
 '@	A	%m&C&CE&J	K	!		T 
 
L 
B	T 
 '@	A	%m&C&CE&J	K	!		S 
 
L 
B	S
 
(=88??7)LM

 
!	$	& 
	


$	&L 
(=88??7)LM

 
!		\	]*	7 
^ 
	

*	7X 
!		'=88??7)LM

	9	

 

	9  
!		'=88??7)LM

Q	6	

 

Q	6f 
!		'=88??7)LM

!	!	

 

!	!F 
!		'=88??7)LM

+	>	

 

+	>Z1	*f 
!		'=88??7)LM

	H	

 

	H 
!		'=88??7)LM

	G	

 

	GK	Z 
!		'=88??7)LM

	F	

 

	F 
!		'=88??7)LM

	G	

 

	G 
!		'=88??7)LM

	(	

 

	(@ 
!		'=88??7)LM

.	/	

 

.	/` 
!		'=88??7)LM

 
l	m+	 
n	

 
+	Z|	| 
!		'=88??7)LM

#	6	

 

#	6J 
!		'v3'V"3F

2		

 

2	h 
!		'v3'V"3F

	,	

 

	,* 
!		'=88??7)LM

+	9	

 

+	9Z 
&m&C&CE&J	K	!	&	E 
 
L&	EPu	&n 
(=88@@7)[\

 
#bjj67	8	',

	;

 
9	

	; 
&m&C&CE&J	K	!		 
 
L	@ 
!		A		'=88??7)LM

	7	

 
 
	74 
!		A		'=88??7)LM

	,	

 
 
	,rY   r[
  N)rc   rd   re   r[
  ry   rY   rX   r  r    s    UW, UW,rY   r  r  )r  r   r/  rV  r   rF  rS  r   r  collectionsr   r   r   
contextlibr   r   dataclassesr   datetimer	   	functoolsr
   typingr   r   r   r   rX
  numpyr  rS   
torch.cudatorch.distributedr#  r1  6torch.distributed.algorithms.model_averaging.averagers
algorithmsmodel_averagingr  Htorch.distributed.algorithms.model_averaging.hierarchical_model_averagerhierarchical_model_averagerr  2torch.distributed.algorithms.model_averaging.utilsutilsru  torch.nnrN   torch.nn.functionalr
  r   torch._utils_internalr   r  r   r  torch.utils._python_dispatchr   torch.autogradr   torch.cuda.ampr   r   +torch.distributed.algorithms.ddp_comm_hooksr   r0  r   r  r   r  r   r  torch.distributed.optimr   "torch.distributed.distributed_c10dr   r   r   torch.distributed.utilsr   r   torch.profilerr   r    torch.nn.parallelr!   torch.nn.parallel.distributedr"   r#   *torch.testing._internal.common_distributedr$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   $torch.testing._internal.common_utilsr7   r8   r9   r:   r;   r<   r=   r>   /torch.distributed.optim.post_localSGD_optimizerr  r  torch.utils.data.distributedr?   r  r  r  ImportErrorrT  rU  rY  r   rB   ri   r   barrT   foo_cpu_tensorrc  r2  r5  r3  r7  r9  r  r  r  rj  r}  r   rW
  rJ  r   getenvr   r&  r%  r   r   rL  rM  rP  rN  rO  r   r   r   r   r   r   r   r   r  r	  r  rO  r  r  r  r  r)  r  r  rE  rL  r_  rg  ra  rq  rv  r{  r  r  r  r  r[
  ry   rY   rX   <module>r
     s       	  
   < < 2 !   3 3       J J b b R R    A A : % /  A 

 6 V     *	 	 	 R Q ; O <<7	!RYY 	! $ G	[U[[A&' Hd#34	4+,   	LLLLLLLL	   	LLLLLLLL	% ! 	LLLLLLLL	* & "<A J 
 5)  **Y
biix04c: H* A "
 A % &V " A !
 T 
 299 #")) #$	ryy 	299 #299 #
		 

ryy 
	RYY 	")) &(")) ( %	u- rD1  
 " )*  
 4  &    #%++ W 15EKK V?$ $NH(* H(VVW, VW,tn << =o  Os   S S! S!