
    Vh-                        U d dl Z d dlmZmZ d dlZd dlZd dlmZ d dlmZ ej                  j                  Z
i Zeej                  j                  ef   ed<   ej                  j                   Zej"                  j$                  Zd Zi Zd Zd(dZ eej,                  j.                        d	ed
efd       Z eej0                  j.                        d	ed
eeef   fd       Zdededee   defdZ eej<                        dededee   dededee   dee   dee   d
eee   ee   ee   f   fd       Zdee   fdZ eej@                        dededee   dee   dee   dee   dee   ded e!dee   d
eeee   ee   f   fd!       Z  eejD                        dedededee   dee   dee   d"ee   d#ed e!dee   d$ed
eeee   ee   f   fd%       Z" eej"                  j$                  j,                  j.                  d&'        eej"                  j$                  jF                  j.                          eej"                  j$                  jH                  j.                          eej"                  j$                  jJ                  j.                          eej"                  j$                  jL                  j.                          eej"                  j$                  j0                  j.                          eej"                  j$                  j<                  j.                          eej"                  j$                  j@                  j.                          eej"                  j$                  jN                  j.                          eej"                  j$                  jD                  j.                          eej"                  j$                  jP                  j.                         y))    N)CallableOptional)Tensor)_maybe_remove_out_wrapperdecomposition_table_for_jvpc                       fd}|S )Nc                 J    	  t              |       S # t        $ r | cY S w xY wN)register_decomposition	Exception)fops    T/home/dcms/DCMS/lib/python3.12/site-packages/torch/_decomp/decompositions_for_jvp.py	decoratorz/maybe_register_decomposition.<locals>.decorator*   s/    	-)"-a00 	H	s    "" )r   r   s   ` r   maybe_register_decompositionr   )   s         c                 $    t        | t              S )N)registry)r   r   )fns    r   register_decomposition_for_jvpr   9   s    !"/JKKr   c                    | t         v rt         }n| t        v rt        }nt        d|        ||    }t        |      }|rst        j
                  j                  |      }t        j                  |      }d } ||      }t        j
                  j                  |      j                  j                  }n)t        j
                  j                  |      j                  }t        j
                  j                  | |       y )Nz!could not find decomposition for c                    | j                   j                         D cg c]  }|  }}| j                   j                         D cg c]  }|  }}ddj                  |       ddj                  |       dS c c}w c c}w )Nzdef wrapped_decomp(z, z):
  return decomp_fn(z)
)
parametersvalueskeysjoin)sig	param_str	param_def	param_uses       r   get_function_defz=_register_jit_decomposition_for_jvp.<locals>.get_function_defT   s    9<9N9N9PQII;QIQ9<9L9L9NOII;OIO(9)=(>>UVZV_V_`iVjUkknoo ROs   
A8
A=)r   decomposition_tableRuntimeErrorr   torchjitignoreinspect	signatureCompilationUnitwrapped_decompgraphscript_register_decomposition)decomp
use_pythondecomposition_table_used	decomp_fnr   r"   f_strr,   s           r   #_register_jit_decomposition_for_jvpr4   =   s    ,,#> 	&	&#6 >vhGHH(0I
 *)4III$$Y/		*	p !%		))%0??EE		  +11	II%%fe4r   selfreturnc                 R    t        j                  t        j                  |             S r
   )r%   sumdiag)r5   s    r   tracer:   e   s    99UZZ%&&r   c                 *   t        j                  | j                  d      |       }t        j                  t        j                  |              }| j
                  s| j                  r| j                  d      }n|}|t        j                  |      z
  |fS )Nr   )r   )r%   minimum	new_zerosexpabsis_cudais_xpulog1p)r5   minzbuffers       r   log_sigmoid_forwardrF   j   sl    
--r*D
1C		599T?"#A||t{{%Q''r   inputrstdinner_dim_indiceskeepdimc                     t        j                  | ||      }t        j                  | |d|      }t        j                  d|z  d      |z
  }|j	                         }dt        j
                  ||z         z  }||fS )N)dimrJ   F)rL   unbiasedrJ         )r%   meanvarpowdetachsqrt)rG   rH   rI   rJ   rP   rQ   epss          r   recompute_mean_varrV   u   sq     ::e!2GDD
))E05'
RC
))AHa
 3
&C
**,Cuzz#)$$D:r   grad_outnormalized_shaperP   weightbiasoutput_maskc                 |   |j                   }|j                         }	|	t        |      z
  }
||
d  }|d |
 }t        t	        |
|	            }t        t	        d|
            }d}|D ]  }||z  }	 d}|D ]  }||z  }	 |dk  s|dk  r8|j                  |      |j                  ||
d        |j                  ||
d        fS t        |||d      \  }}||z
  |z  }|| |z  }n| }||z  }t        j                  ||d      }t        j                  ||      }t        j                  ||d      }t        j                  ||      }||z
  |z
  }|d   r	||z  |z  }nt        j                  |      }|d   r1|/t        |      dkD  rt        j                  | |z  |d      }n3| |z  }n-|t        j                  |      }nt        j                  d      }|d   r9|7t        |      dkD  rt        j                  | |d      }n>| j                         }n-|t        j                  |      }nt        j                  d      }|||fS )Nr   rN   TrJ   Fr   rO   )shaperL   lenlistranger=   rV   r%   r8   mul
zeros_likezerosclone)rW   rG   rX   rP   rH   rY   rZ   r[   input_shape
input_ndimaxis
inner_dims
outer_dimsrI   outer_dim_indicesNiMmean_rstd_x_hat
grad_x_hatabc1c2c3innerd_inputd_weightd_biass                                  r   native_layer_norm_backwardr|      sn    ++KJ,--DTU#JUd#JU445U1d^,	A 	Q	A 	QAvaOOK(OOK./OOK./
 	
 &eT3DdSLE5U]e#E&

QA		*/6A	:u	%B	2($	/B	5"	BEBJE1~%*QY%$7""5)1~&, !A%).5 "3U*H  %'H		##F+;;r?1~$* !A%',yy;Le'TF^^%F		!!$'RXv&&r   xc                 "    d}| D ]  }||z  }	 |S )NrN   r   )r}   rrm   s      r   prodr      s$    	A 	QHr   running_meanrunning_var	save_meansave_invstdtrainrU   c
                 P   |j                   }
|j                         }|dk\  sJ d       d}t        |
      |
|   z  }|}|}|rI||J d       dgt        t	        d|j                                     z   }|J t        |||d      \  }}n ||J |}t        j                  ||z         }||J dg|z  }|
|   ||<   g }t	        |      D ]  }||k7  s	|j                  |        t        j                  ||      }d|z  }t        j                  | |      }t        j                  | ||z
  z  |      }t        j                  ||z  |      }t        j                  t        j                  ||z  ||z        |      }|t        j                  ||      dz  }nt        j                  ||z  |      }|r||z
  |z  }| |z
  |z
  |z  }n| |z  }|	d   r||z  }n-|t        j                  |      }nt        j                  d	      }|	d   r|}nt        j                  |      }|||fS )
NrO   z$rank of the input must be at least 2rN   z7when train=True, save_mean and save_invstd are requiredr   Fr]   g      ?r   )r^   rL   r   r`   ra   rV   r%   rsqrtappendreshaper8   rb   rc   rd   )rW   rG   rY   r   r   r   r   r   rU   r[   rf   
input_rankrh   num_featuresrP   invstdreduciton_dimsbroadcast_maskreduction_axesrm   normgrad_output_sumdot_p	grad_mean
proj_scale
grad_scaleproj
grad_inputgrad_weight	grad_biass                                 r   native_batch_norm_backwardr      sn    ++KJ?BBB?D${4'88LDF$)@ 	
E	
@ tE!UYY[$9::!!!)%QVWf'K,CCC[3./$"222S:%N&t,N4 "N: %9!!!$% ==~.DDii.9OIIh%$,/@Eo4nEIuyyvGXJ~]]6>:S@
]]6F?NC

*$)3zA

*
1~fn		&&
 kk"o1~#	$$
	 Y//r   save_varupdatereservec                 *    t        | |||||||||	
      S r
   )r   )rW   rG   rY   r   r   r   r   r   rU   r[   r   s              r   batch_norm_backwardr   )  s/     & r   T)r0   )F))r(   typingr   r   r%   torch._decompr   torch._prims_common.wrappersr   _decompr#   r   dict_opsOperatorBase__annotations__r   opsatenr   r   r4   r:   defaultrF   tupler`   intboolrV   r|   r   r   floatr   nll_loss_backwardnll_loss2d_backward_log_softmax_backward_data_softmax_backward_datacudnn_batch_norm_backwardmiopen_batch_norm_backwardr   r   r   <module>r      s    %    B mm77 GI T%**"9"98"CD I== yy~~4 ! L!5P djj001' '6 ' 2' d66>>?(f (vv~)> ( @(48IHL   ? ?@I'I'I' 3iI' 	I'
 I' VI' 6
I' dI' 8FXf-x/??@I' AI'XDI    ? ?@N0N0N0 VN0 6"	N0
 &!N0 N0 &!N0 N0 
N0 dN0 68F#Xf%556N0 AN0b   8 89  6"	
 &!  v  
 d  68F#Xf%556 :6 $EIINN$8$8$@$@T R #EIINN$D$D$L$L M #EIINN$F$F$N$N O #EIINN$M$M$U$U V #EIINN$I$I$Q$Q R #EIINN$F$F$N$N O #EIINN$M$M$U$U V #EIINN$M$M$U$U V #EIINN$L$L$T$T U #EIINN$F$F$N$N O #EIINN$M$M$U$U Vr   