
    VhM                         U d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ  G d de	      Z G d	 d
e	      Z G d de	      Z G d de	      Zdaeeeej$                           ed<   dej(                  fdZy)    N)chain)Optional)_get_device_index)Function)commc                   ,    e Zd Zed        Zed        Zy)	Broadcastc                    t        d |D              sJ d       |D cg c]  }t        |d       }}|| _        t        |      dk(  ryt        |      | _        |d   j                         | _        t        j                  || j                        }g }t        | j                  dd        D ]"  \  }|r	|j                  fd|D               $  | j                  |  t        t        j                  |            S c c}w )Nc              3   N   K   | ]  }|j                   j                  d k7    ywcpuNdevicetype.0is     L/home/dcms/DCMS/lib/python3.12/site-packages/torch/nn/parallel/_functions.py	<genexpr>z$Broadcast.forward.<locals>.<genexpr>   "      
'(AHHMMU"
   #%z2Broadcast function not implemented for CPU tensorsTr       c              3   (   K   | ]	  }|     y wNr   )r   outputidxs     r   r   z$Broadcast.forward.<locals>.<genexpr>   s     *M66#;*Ms   )allr   target_gpuslen
num_inputs
get_deviceinput_devicer   broadcast_coalesced	enumerateneeds_input_gradextendmark_non_differentiabletupler   from_iterable)ctxr   inputsxoutputsnon_differentiablesinput_requires_gradr   s          @r   forwardzBroadcast.forward   s    
,2
 
 	@?	@ 
 <GGa(D1GG%v;!V!!9//1**63??C (1#2F2Fqr2J(K 	N$C$&#***MW*MM	N 	$##%89U((122 Hs   C>c                 ^    dt        j                  | j                  | j                  g| z   S )Nr   )ReduceAddCoalescedapplyr#   r!   r+   grad_outputss     r   backwardzBroadcast.backward   s4    +11cnn
/;
 
 	
    N__name__
__module____qualname__staticmethodr1   r7   r   r8   r   r	   r	      s(    3 3$ 
 
r8   r	   c                   ,    e Zd Zed        Zed        Zy)r3   c                    t        dt        |      |      D cg c]  }||   j                          c}| _        t        dt        |      |      D cg c]
  }||||z     }}t	        j
                  ||      S c c}w c c}w )Nr   )ranger    r"   r   r   reduce_add_coalesced)r+   destinationr!   gradsr   grads_s         r   r1   zReduceAddCoalesced.forward'   s     ,1CJ
+K
&'E!H!
 6;1c%j*5UV%A
N+VV((==
 Ws   A:A?c                 H    dt        j                  | j                  g| z   S )NNN)r	   r4   r   r5   s     r   r7   zReduceAddCoalesced.backward0   s(    
 OOCOO;l;< 	<r8   Nr9   r   r8   r   r3   r3   &   s(    > > < <r8   r3   c                   ,    e Zd Zed        Zed        Zy)Gatherc                     t        d |D              sJ d       |dk(  rd _        nt        |d      }| _        | _        t	        d |D               _        t        d |D              r4|dk(  r/t	        d |D              }t        j                  d	       d _        nd
 _        t	         fd|D               _	        t        j                  | j                   j                        S )Nc              3   N   K   | ]  }|j                   j                  d k7    ywr   r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>;   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   <   K   | ]  }|j                           y wr   )r"   r   s     r   r   z!Gather.forward.<locals>.<genexpr>D   s     >!q||~>s   c              3   B   K   | ]  }|j                         d k(    ywr   N)dimr   ts     r   r   z!Gather.forward.<locals>.<genexpr>E   s     ,quuw!|,s   r   c              3   >   K   | ]  }|j                  d         yw)r   N)viewrO   s     r   r   z!Gather.forward.<locals>.<genexpr>F   s     5166!95s   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.Fc              3   T   K   | ]  }|j                  j                         ! y wr   )sizerN   )r   r   r+   s     r   r   z!Gather.forward.<locals>.<genexpr>O   s     @Asww@s   %()r   target_devicer   rN   r)   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr   gather)r+   rU   rN   r,   s   `   r   r1   zGather.forward9   s     
,2
 
 	=<	= 
 E! %C-mTBM -C>v>>,V,,5f55FMM'
 %)C!$)C!@@@{{6377C,=,=>>r8   c                     t         j                  | j                  | j                  | j                  |      }| j
                  rt        d |D              }d|z   S )Nc              3   &   K   | ]	  }|d      ywrM   r   )r   gs     r   r   z"Gather.backward.<locals>.<genexpr>X   s     #BQAaD#Bs   rF   )Scatterr4   rV   rZ   rN   rY   r)   )r+   grad_outputscattered_gradss      r   r7   zGather.backwardR   sK    !--NNCOOSWWk
   ##B/#BBOo--r8   Nr9   r   r8   r   rH   rH   8   s(    ? ?0 . .r8   rH   c                   ,    e Zd Zed        Zed        Zy)r_   c           	         |D cg c]  }t        |d       }}|| _        |j                  j                  dk7  r|j	                         nd| _        d }t        j                  j                         r;| j
                  dk(  r,|D cg c]!  }t        t        j                  d|            # }}t        j                  |||| j                  |      }|t        |      D ]s  \  }	}
t        j                  j                  ||	         5  t        j                  j                         }|j                  ||	          |
j                  |       d d d        u |S c c}w c c}w # 1 sw Y   xY w)NTr   cuda)r   rN   r   r   r"   r#   torchre   is_available_get_streamr   scatterr%   current_streamwait_streamrecord_stream)r+   r   chunk_sizesrN   inputr-   streamsr   r.   r   r   main_streams               r   r1   zScatter.forward]   sG   ;FGa(D1GG161B1Be1K5++-QS::""$)9)9R)? IT>DELL89G  ,,uk;Q&w/ 6	6ZZ&&{1~6 6"'**";";"=K++GAJ7((56 66
 # H6 6s   E&EAEE'	c                 `    d d d t        j                  | j                  | j                  g| fS r   )rH   r4   r#   rN   )r+   r`   s     r   r7   zScatter.backwardr   s+    T4c.>.>!V+!VVVr8   Nr9   r   r8   r   r_   r_   \   s*     ( W Wr8   r_   _streamsr   c                 6   | j                   dk(  ryt        t        | j                   d      }|yt        dg|j	                         z  at        | j
                     ,|j                  | j
                        t        | j
                  <   t        | j
                     S )zBGet a background stream for copying between CPU and target device.r   N)r   getattrrf   rr   device_countindexStream)r   
device_mods     r   rh   rh   {   s     {{eT2J6J3355%!+!2!26<<!@FLL!!r8   )rW   	itertoolsr   typingr   rf   torch._utilsr   torch.autogradr   torch.nn.parallelr   r	   r3   rH   r_   rr   listrw   __annotations__r   rh   r   r8   r   <module>r      s|        * # "
 
6< <$!.X !.HWh W8 48(4./
0 7" "r8   