
    Vh                        d Z ddlZddlZddlZddlZddlZddlmZmZ ddl	m
Z
mZmZmZ ddlZddlmc mZ ddlmZmZmZ ddlmZmZ ddlmZmZmZmZ ddlm Z m!Z!m"Z" dd	l#m$Z$m%Z% dd
l&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z:m;Z; ddl<m=Z= ddl>m?Z? erddl@mAZA  e$eBd      ZC e$eBd      ZDd ZEd ZFd ZG G d d      ZH G d d      ZI eH       ZJg dZK e=eeej                  ej                  j                  j                  j                  g      ZP ej                         aRd ZS G d d       ZTd!aUd!aVd!aWej                  d'd"       ZYej                  d#        ZZd(d$Z[d% Z\d& Z]y))a  
Provides functionality for compiling PyTorch's autograd (automatic differentiation) system.

This module implements compiled autograd, which traces and optimizes backward pass
computations at runtime. The key components are:

- AutogradCompilerInstance: Traces and compiles autograd graphs using FX
- Context managers (_enable/_disable): Control when compiled autograd is active
- Utility functions: Support graph manipulation, tensor operations, and hooks

Compiled autograd can significantly improve backward pass performance by removing
Python overhead and enabling additional optimizations. It works by capturing
backward computations into an FX graph that can be compiled and optimized,
while maintaining the same semantics as eager mode autograd.
    N)Counterdefaultdict)AnyOptionalTYPE_CHECKINGUnion)call_backward	call_hookFakeCompiledAutogradEngineGetItemSourceLocalSource)countersget_chromium_event_loggerlazy_format_graph_codeset_locals_to_stealcompile_contextCompileContext	CompileId)getArtifactLoggertrace_structuredclone_preserve_strides)FakeTensorMode)GraphModule)BackwardState)	decomposedisable_autocast_cachedisable_proxy_modes_tracingfetch_object_proxyProxyTorchDispatchModePythonKeyTracertrack_tensor_tree)
DimDynamicShapeEnv)preserve_node_metaset_stack_trace)
OrderedSet)CapturedTraceback)Proxycompiled_autogradcompiled_autograd_verbosec                  h    t         j                  j                  j                  j	                  d      S )Nr-   )torch_logging	_internal	log_stateis_artifact_enabled     O/home/dcms/DCMS/lib/python3.12/site-packages/torch/_dynamo/compiled_autograd.py snapshot_verbose_logging_enabledr7   F   s(    >>##--AA# r5   c                  ^    t         j                  j                  j                  j                  S N)r/   	_inductorconfigtriton
cudagraphsr4   r5   r6   snapshot_cudagraph_enabledr>   L   s    ??!!((333r5   c                      | t        |       S | S r9   r   )xs    r6   maybe_clonerA   P   s    }%a((Hr5   c                       e Zd Zd Zd Zd Zy)OpNamespacec                 "    t               | _        y r9   )r   custom_function_name_counterselfs    r6   __init__zOpNamespace.__init__\   s    :A))r5   c                 ^   |r1d|z   }| j                   |   }| j                   |xx   dz  cc<   | | }t        | |      rJ t        |||      |r,t        | |t        j
                  j                               |S t        j
                  j                  fd       }t        | ||       |S )NCppNode   c                       | i |S r9   r4   )argskwargsresults     r6   run_non_traceable_cpp_in_eagerz7OpNamespace.add.<locals>.run_non_traceable_cpp_in_eagerm   s    t.v..r5   )rE   hasattrOpsetattrr/   _dynamoallow_in_graphdisable)rG   namefnis_custom_functionis_traceablecountrP   rO   s          @r6   addzOpNamespace.add_   s    t#D55d;E--d3q83VE7#D4&&&D"01D$ < <V DE  ]]""/ #/ D$ >?r5   c                     t        | |      S r9   )getattr)rG   rW   s     r6   getzOpNamespace.gett   s    tT""r5   N)__name__
__module____qualname__rH   r\   r_   r4   r5   r6   rC   rC   [   s    D*#r5   rC   c                       e Zd Zd Zd Zd Zy)rR   c                 <    || _         || _        || _        d| _        y )Nz#torch._dynamo.compiled_autograd.ops)rX   rY   r`   ra   )rG   rW   rX   rY   s       r6   rH   zOp.__init__y   s    "4?r5   c                 &     | j                   |i |S r9   )rX   )rG   rM   rN   s      r6   __call__zOp.__call__   s    tww'''r5   c                 :    | j                   dz   | j                  z   S )N.)ra   r`   rF   s    r6   __repr__zOp.__repr__   s    $t}}44r5   N)r`   ra   rb   rH   rf   ri   r4   r5   r6   rR   rR   x   s    @(5r5   rR   )inputssizesscalarshookspacked_datac           	      B    t        t        t        | d d                   S )N)compiled_autograd_idframe_idframe_compile_idr   )rp   s    r6   make_compile_contextrs      s(    %9!%	
 r5   c                   J   e Zd Zd8dZd Zedefd       Zdee	j                     dee   deeeef      d	eeeeef         fd
Zdee   fdZd Zdede	j&                  j(                  j*                  dee   fdZd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!defdZ"d Z#d Z$d  Z%dee   fd!Z&d" Z'd# Z(d$ Z)d% Z*d& Z+ed'        Z,ed(        Z-d) Z.d* Z/d+ Z0d, Z1d- Z2d. Z3d/ Z4d0 Z5	 d9d	eeeeef         fd1Z6d2efd3Z7d4ed5ed6ee	j&                  jp                     fd7Z9y):AutogradCompilerInstancereturnNc                 4   || _         t        j                         | _        | j                  j                  | _        t               | _        t        dd| j                        | _        t               | _
        t        | j                  d      | _        d | _        y )NT)allow_fallback_kernelsallow_non_fake_inputs	shape_envsymbolic)compiler_fn
contextlib	ExitStackstackcloser&   rz   r   fake_tensor_moder#   	fx_tracerr"   
proxy_modehooks_proxy)rG   r|   s     r6   rH   z!AutogradCompilerInstance.__init__   st    &))+
ZZ%%
! .#'"&nn!

 )*0L,0r5   c                 t    t        |t        j                        sJ | j                  j	                  ||      S )N)source)
isinstancer/   Tensorr   from_tensor)rG   r@   r   s      r6   	wrap_fakez"AutogradCompilerInstance.wrap_fake   s2    !U\\***$$0060BBr5   c                 ,    t        t        |       |      S r9   r   )rW   idxs     r6   r   zAutogradCompilerInstance.source   s    [.44r5   rj   rk   rl   originsc                 	    t         d   dxx   dz  cc<   t        t               _        t	         j                         _         j
                  j                          t        j                          _	        t               j                  d j                  d j                  id       d  _        i  _        t        j                  j!                          j"                  _        t        j&                  j)                  t*               j"                  _        i  j"                  _        i  _         fdt2        D        \  } _         _         _         _         j<                  j?                  tA                      |\  }}}tC        |      D 	
cg c]'  \  }	}
 jE                  |
 jG                  d	|	            ) }}	}
 jI                  |||       tC        |      D 	cg c]@  \  }	} jJ                  jM                  | jG                  d
|	      tN        jP                        B }}	} jI                  | j4                  |      }tC        |      D ]!  \  }}||    j0                  |jR                  <   # tC        |      D ]  \  }	} jG                  d|	      }tU        |tV              r/ jJ                  jM                  ||tN        jP                        ||	<   WtU        |tX              rL jJ                  j[                   jJ                  j]                  ||tN        jP                        ||      ||	<   t_        dta        |              jI                  | j6                  |       tC        |      D ]+  \  }} j6                  |    j0                  |jR                  <   -  j<                  j?                  tc        i               j<                  j?                   jd                          j<                  j?                   jf                          j<                  j?                  ti                       jd                  jJ                  J  jd                  jJ                  } j<                  j?                  t        j&                  jj                  jl                  jo                  |             tq        ts        jt                               |||fS c c}
}	w c c}}	w )Nr,   capturesrK   graph_idTlog_pt2_compile_event)
tracer_clsc              3   Z   K   | ]"  }j                   j                  d |di        $ yw)placeholderr4   N)r   create_proxy).0rW   rG   s     r6   	<genexpr>z9AutogradCompilerInstance.begin_capture.<locals>.<genexpr>   s.      
 NN''tRD
s   (+rj   rk   rl   )r   dynamic_dim)hintr   zUnexpected scalar type: );r   nextCOMPILE_COUNTERidrs   r   	__enter__timetime_nsstart_time_nsr   log_event_startaot_graph_cls_nameaot_graph_infosr/   nnModuler   rootfxGraphr#   graphtensor_attrssymnode_proxy_lookup_graph_placeholderssizes_proxyscalars_proxyr   packed_data_proxyr   enter_contextr'   	enumerater   r   bind_objects_to_proxiesrz   $create_unspecified_symint_and_symbolr%   DYNAMICnoder   intfloatcreate_symfloatnodecreate_unspecified_symbolAssertionErrortyper   r   r   r   experimentalsymbolic_shapes_suppress_guardsstrr   current_compile_id)rG   rj   rk   rl   r   
args_proxyinputs_originssizes_originsscalars_originsr   r@   valproxiesisymintr   symvalenvs   `                 r6   begin_capturez&AutogradCompilerInstance.begin_capture   s'    	$%j1Q61'3DGG<&&(!\\^!#33!"&	 	4 	
 26:<#hhoo/$xx~~~I&(#$&!
+
	
" 	

  !3!569@6 $F+
Q NN1dkk(C89
 
 	$$VZH &e,
 S NN??GS)""
 
 ..ud6F6FV"5) 	@IAv5<QZD%%fkk2	@ "'* 	LHC[[C0F#s##~~RR&& 
 C'#~~AANN<<%$.$6$6 = 
 !  B   %%?cKK'	L( 	$$Wd.@.@/R"7+ 	KIAv595G5G5JD%%fkk2	K 	

  2/

  !6!67

  1

  !7!9:$$..:::##--

  HH!!11BB3G	
 1134	
 	
m

s   ,S
 AScompile_reasonsc                 2    sJ t        dd fd       y )Nartifactc                      dddS )N!compiled_autograd_compile_reasonsjsonrW   encodingr4   r4   r5   r6   <lambda>z>AutogradCompilerInstance.log_compile_reasons.<locals>.<lambda>#  s    ;"! r5   c                       S r9   r4   )r   s   r6   r   z>AutogradCompilerInstance.log_compile_reasons.<locals>.<lambda>'  s     r5   metadata_fn
payload_fn)r   )rG   r   s    `r6   log_compile_reasonsz,AutogradCompilerInstance.log_compile_reasons  s"      /	
r5   c                 H    j                         D cg c]  } j                  |       }}j                  }	|	j                  |	j                  ~	t
        j                  j                  fd       }
 j                  j                  d|
||g|i       d | j                  |    fd} |       } fd}t
        j                  j                  j                  j                  ||      }t        j                    j                  |      }|S c c}w )Nc                 v    t        j                  j                  j                  j                  | |g| }|S r9   )r/   
_functorch_aot_autogradruntime_wrappers_backward_prologue_functional)ctx_saved_tensorsctx_symints	flat_argsoutmaybe_subclass_metadatametadatas       r6   call_aot_bwd_prologuezOAutogradCompilerInstance.proxy_call_aot_backward.<locals>.call_aot_bwd_prologueF  sB    ""00AA__!'	
 C Jr5   call_functionkindtargetrM   rN   c            	         d }  | j                   j                        }t        |t        d u      z
        D cg c]  }|   	 }}j	                         }t        |      t        j                        k(  sJ |D cg c]  }j                  |       }}||d t        |       |j                         d}i d }j                   j                  j                  D ]  }	|	j                  dk(  r||   j                  |	<   |dz  }+|	j                  dk(  rt        |	j                        dk(  sJ |	j                  d   D 
cg c]U  }
t        |
t        j                  j                         r-t        j                  j#                  |
   j$                        n|
W }}
|	j                  dk(  r|	j&                  }j$                  j)                  |      }t+        j$                  j,                  |t/        j                   |             j$                  j1                  d|di       }||	<   R|	j                  dk(  r0j$                  j                  j3                  |	fd	      }||	<   t5        d
       |J d }|D cg c]/  }t        |t        j                  j"                        r |       n|1 }}j7                  ||       |S c c}w c c}w c c}
w c c}w )Nc                 Z    d}| j                   D ]  }|j                  dk(  r|dz  } |S  |S )Nr   r   rK   )nodesop)r   num_argsr   s      r6   
num_inputszkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.num_inputsb  sA    !KK Dww-/ A   r5   r   r   rK   outputget_attrr4   r   c                     |    S r9   r4   )nvalue_remaps    r6   r   ziAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.<lambda>  s    A r5   zshouldn't get herec                  t    t               5  t        j                  ddddd      cd d d        S # 1 sw Y   y xY w)Nr   {   r    r/   zerosr4   r5   r6   dummyzfAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.dummy  s1    02 8 ;;q!Q378 8 8s   .7)
_bw_moduler   ranger   _get_compiled_autograd_symintslensymintsto_proxyappendr   r   r   rM   r   r/   r   Noder+   r   r   get_fresh_qualnamerS   r   r^   create_node	node_copyr   r   )r   r   r   	pall_argsr   epsymintsargs_idxpoutputsr   r   rW   qualnamerO   r   ooutputsr   ctxpbackward_statepgradsrG   s                    @r6   copy_paste_aot_backward_graphzWAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_grapha  s     "#.."6"67H#(Ct8S4T)T#Uq	I  88:Gw<3s{{#333329:Qa(:H:(0InG%*  1 HK7;H,,22 ?77m+(1((;(@(@K%MHWW(tyy>Q...
 "&1	   &a7 {1~t~~F H   WW
*;;D#~~@@FH++Xws~~t7T "^^77
HbRTUF(.K%WW/!^^11;;6F )/K%()=>>5?6 '''8
 JRDE:a8a?G  (((;Nq ;" :s   KKAK4K$c                 *    t         j                  j                   fd       }t        j                  j
                  |      }j                  j                  d|t        |      i       }j                         }j                  |g|g       |S )Nc                  *    j                  |       S )N)
is_runtime)creation_fn)unwrapped_argsr  subclass_metas    r6   make_subclasszkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.proxy_subclass_constructor.<locals>.make_subclass  s    $00J0WWr5   r   r   )r/   rT   rU   pytreetree_mapr  r   r   tupleallocate_dummyr   )r  r  r  r  punwrapped_argspoutputr   rG   s   ``     r6   proxy_subclass_constructorzTAutogradCompilerInstance.proxy_call_aot_backward.<locals>.proxy_subclass_constructor  s    ]]))X *X %oodmm^LOnn11$$?+	 2 G ((*F((&G9=Mr5   )make_subclass_override)r   r  _forward_clsr   r   r/   rT   rU   r   r   r   r   r   r   _backward_epilogue_functionalr  r  )rG   pinputspsaved_tensorssaved_tensorspctxr  maybe_backward_state_idxr  r	  CompiledFunctionr   r  r  r   resultspresultsr   r   r  r  s   `    `          @@@@r6   proxy_call_aot_backwardz0AutogradCompilerInstance.proxy_call_aot_backward*  s5   ( /2.P.P.RSDMM!$SS ++#,,"2"J"J		%	%	 
&	 ,, ( 
  - 	
 #/"../GHOF	P 01	$ ""00AA__##=	 ` 
 ??4==':M Ts   Dbackward_idxr  r(  c           
      x   | j                   J | j                   |   }| j                  |      }| j                  |      }	t        |j                  d      r| j	                  ||	||||      }
n(| j
                  j                  dt        ||	g|i       }
|
J t               5  g }t        |      D ]M  \  }}||
|   |j                  d        |\  }}}}|j                  t        j                  ||||             O | j                  ||
       d d d        t        |      S # 1 sw Y   t              S xY w)N_aot_idr   r   )sizedtypelayoutdevice)r   r  rQ   r"  r,  r   r   r	   r    r   r  r/   emptyr   r  )rG   rj   output_metadatasr&  r-  r  r(  r'  r$  r%  r   grad_insr   output_metadatar2  r3  r1  r0  s                     r6   proxy_call_backwardz,AutogradCompilerInstance.proxy_call_backward  sf    +++---'}53##Y/22(G nn11$$" 
  2 	G """(* 	<57H(12B(C $_"*gcl.BOOD).=+tKKTvfU ((7;	< X	< Xs   #A0D&&D9c                     ||j                         |j                         |j                         |j                         |j                         |j                         f}| j                  t        |d gdz        S )N   )rk   stridesstorage_offset
proxy_callcopy_slices_prologue)rG   rj   baseviewrM   s        r6   call_copy_slices_prologuez2AutogradCompilerInstance.call_copy_slices_prologue  sd    JJLLLN!JJLLLN!
 3TD6A:FFr5   c                 R    | j                  t        ||||fd gt        |      z        S r9   )r=  copy_slices_epiloguer   )rG   needs_input_gradrO   res
grad_slices        r6   call_copy_slices_epiloguez2AutogradCompilerInstance.call_copy_slices_epilogue  s2     vsJ7FS)**
 	
r5   c                 p    t               5  t        j                  ddg      cd d d        S # 1 sw Y   y xY w)Nr   i[r   rF   s    r6   r  z'AutogradCompilerInstance.allocate_dummy  s-    (* 	/;;9~.	/ 	/ 	/s   ,5c                 2    t         j                  ||||      S )zBinds ops.fn_name = fn)opsr\   )rG   fn_namerX   rY   rZ   s        r6   bind_functionz&AutogradCompilerInstance.bind_function  s    www$6EEr5   c                 Z    t         j                  |      }| j                  ||g||      S )z:Proxies a call to ops.fn_name(grads, *args) into the graph)rJ  r_   r=  )rG   rK  gradsrM   r7  r   s         r6   apply_functionalz)AutogradCompilerInstance.apply_functional  s)    WWWrE>D>?CCr5   c                 `    t        j                  |      \  }}t        j                   fd|      } j                  j	                  d||i       }|D cg c]  } j                          }} j                  |t        t        |            D 	cg c]  }	||	   	 c}	       |S c c}w c c}	w )z*Proxies a call to fn(*args) into the graphc                 &    j                  |       S r9   r  )r  rG   s    r6   r   z5AutogradCompilerInstance.proxy_call.<locals>.<lambda>  s    t}}Q/? r5   r   rM   rN   )	r  tree_flattenr  r   r   r  r   r   r   )
rG   rX   rM   r7  r   _
proxy_args	proxy_outrO   r   s
   `         r6   r=  z#AutogradCompilerInstance.proxy_call  s    **40	1__%?F
NN//Rj 0 
	 2AAA$%%'AA$$VE#f+DV-Wqil-WX B-Ws   B&B+
c                    t         j                  d      }t        j                  | j                  |g|      }| j
                  j                  d||i       }t        |      t        |      k(  sJ | j                  ||       |S )zEProxies a call to ops.validate_outputs(outputs, *args) into the graphvalidate_outputsr   rS  )	rJ  r_   r  r  r  r   r   r   r   )rG   rU  r  rM   r7  r   rV  new_proxy_outputss           r6   rY  z)AutogradCompilerInstance.validate_outputs%  s    WW'(__T]]W4Dt4DE
 NN77Rj 8 
 ?#s7|333$$W.?@r5   c                     | j                  |      }| j                  |      }| j                  j                  dt        j                  ||fi       }| j                         }| j                  |g|g       |S )Nr   rS  )r  r   r   r/   r\   r  r   )rG   old_varnew_varold_var_proxynew_var_proxyrW  rO   s          r6   
accumulatez#AutogradCompilerInstance.accumulate0  ss    g.g.NN//UYYm]-KTV 0 
	 $$&$$fX	{;r5   c                     | j                   j                  dt        |g|D cg c]  }| j                  |       c}|      S c c}w Nr   )r   r   r
   r  )rG   hookrM   rN   r@   s        r6   proxy_call_hookz(AutogradCompilerInstance.proxy_call_hook:  sM    ~~**,01q$--"1 
 	

 2s   Ac                     | j                   J | j                   |   }| j                  |   }| j                  ||d      }| j                         }| j	                  |g|g       |S )Nunpack_hook	hook_type)r   r   rd  r  r   )rG   hook_iddata_idrc  dataproxyr   s          r6   rf  z$AutogradCompilerInstance.unpack_hookE  s{    +++(%%g.$$# % 

 !!#$$cUUG4
r5   r   c                     | j                   J | j                   |   }| j                  |||   d      }t               5  t        ||         ||<   | j	                  ||   g|g       d d d        |S # 1 sw Y   |S xY w)Ntensor_pre_hookrg  r   rd  r    rA   r   )rG   rj   ri  r   rc  rl  s         r6   rn  z(AutogradCompilerInstance.tensor_pre_hookR  s    +++($$1I' % 

 )* 	?#F1I.F1I((&)ug>	? 	? s   )A22A<c                    | j                   J | j                   |   }| j                  ||d      }t               5  |D cg c]  }t        |       }}| j	                  ||       d d d        |S c c}w # 1 sw Y   |S xY w)Npre_hookrg  ro  )rG   rj   ri  rc  r   r@   s         r6   rq  z!AutogradCompilerInstance.pre_hook_  s    +++(&&  ' 

 )* 	:.45k!n5F5((9	:  6	: s   A6A1A61A66B c                    | j                   J | j                   |   }| j                  |||d      }t               5  |D cg c]  }t        |       }}| j	                  ||       d d d        |S c c}w # 1 sw Y   |S xY w)N	post_hookrg  ro  )rG   r  rj   ri  rc  r   r@   s          r6   rs  z"AutogradCompilerInstance.post_hookl  s    +++(&&!	 ' 
 )* 	;/67!{1~7G7((':	;  8	; s   A7A2A72A77Bc                    t        |t        j                        sJ | j                  J | j                  |   }| j	                  ||d      }t               5  t        |      g}| j                  ||g       d d d        |S # 1 sw Y   |S xY w)Npost_acc_grad_hookrg  )r   r/   r   r   rd  r    rA   r   )rG   inputri  rc  rl  s        r6   ru  z+AutogradCompilerInstance.post_acc_grad_hookz  s    %...+++($$* % 

 )* 	9 '(E((8	9 	9 s    BBc                    i }d}t        |j                        }|d   j                  dk(  sJ |d   }t        |j                  j	                               }t        t              }||   |d   k(  sJ |t        |      z   dz
  }||   |d   k(  sJ t        |      D ]  \  }	}
|s)|
j                  d   j                  j                  dk(  rd}1|
j                  d   j                  j                  d	k(  }t        |
j                  d   j                               dk(  }|s|st        |
j                  j	                               }t        d
 |D              s|
||	<    |rn|j                         D ]B  }
t        j                  d|
       |
j                  d   j!                         |
j                  d<   D t        |j	                               S g S )NFr   rj   rK   r   cudaTcpuc              3     K   | ]  }t        |j                  t        j                  j                        xr |j                  j
                  d v xs3 t        |j                  t              xr |j                  j                     yw))primsatenN)r   r   r/   _ops
OpOverload	namespacerR   rY   r   users     r6   r   zDAutogradCompilerInstance.move_graph_nodes_to_cuda.<locals>.<genexpr>  sw      
  #4;;

0E0EF G KK115FF
 #4;;3 ? $ > >>
s   BB	zMoving node %s from cpu to cuda)listr   r   userskeysr   r   r   metar3  r   r0  allvaluesverbose_logdebugry  )rG   r   to_movehas_cuda_inputsr   rj   inputs_usersfirst_getitem_idxlast_getitem_idxr   r   is_cpu	is_scalar
node_userss                 r6   move_graph_nodes_to_cudaz1AutogradCompilerInstance.move_graph_nodes_to_cuda  s   ,.U[[!Qx(***qFLL--/0 34&'<?:::,s</@@1D%&,r*:::: . 	&GAt"tyy'7'>'>'C'Cv'M"&YYu%,,11U:FDIIe,11349I)!$**//"34
 
 !+
 
 "&GAJ-	&4 ( ;!!"CTJ#'99U#3#8#8#:		% ;
 ''	r5   c                 @   t        |t        j                  j                        xry |j                  dk(  xrh |j
                  t        j                  j                  j                  j                  t        j                  j                  j                  j                  fv S rb  )r   r/   r   r  r   r   rJ  r}  sym_sizer   	sym_numeldefault)rG   r   s     r6   is_sym_nodez$AutogradCompilerInstance.is_sym_node  sn    tUXX]]+ O?*O		''++UYY^^-E-E-M-MNO	
r5   c                 2   t               t        | j                  j                  j	                  d            D ].  \  }}j                  |j                  j                                0 t        t              dz
  k(  sJ fd}t        | j                  j                  j                        }| j                  j                  j                  |       t        | j                  j                  j                        }t        j                  d||z
         y )Nr   )r   rK   c                     | v xsE | j                   dk(  xs4 | j                   dk(  xs# | j                   dk(  xr | j                  t        v S )Nr   r   r   )r   r   _impure_targets)r   unpack_nodess    r6   	is_impurez/AutogradCompilerInstance.dce.<locals>.is_impure  sV    $ S77m+S77h&S GG.Q4;;/3Q	r5   zDCE removed %d nodes)r)   r   r   r   
find_nodesupdater  r  r   r   r   eliminate_dead_coder  r  )rG   r   r   r  beforeafterr  s         @r6   dcezAutogradCompilerInstance.dce  s    
 3=, !5!5!@!@M!@!RS 	3GAt

 12	3C+,q0000	 T^^))//000;DNN((../0&5.Ar5   c                 l    t        | j                  j                  | j                  j                  |      S r9   )r   r   r   r   )rG   r   s     r6   create_graph_modulez,AutogradCompilerInstance.create_graph_module  s%    4>>..0D0DbIIr5   c           	      >     j                   j                  dt        j                  di         j                  j                           j                   j                  dd j                   j                   j                  |            fi        g t               r% j                   j                   j                         j                   j                  j                  D ]%  }dD ]  }||j                  v s|j                  |=   ' t        dd  fd        j                           j!                           j#                           j%                           j'                           j)                           j+                           j-                           j/                           j1                  d	 j2                         t5        d
g       t7        dddd      }t8        j;                  d|       t<        j?                  d|       t        dfd        fd}tA               jC                  dtE        jF                         d j2                  i jH                  d        jJ                  jM                  d d d        | jO                        fS )Nr   r4   r   )tensor_metaexample_valuer   r   c                      dddS )N&compiled_autograd_graph_pre_reorderingstringr   r4   r4   r5   r6   r   z6AutogradCompilerInstance.end_capture.<locals>.<lambda>  s    @$! r5   c                      t         j                  j                   j                  j                  d j                   d      j                  d      S )NCompiledAutogradPreReorderingFprint_output)r   r   r   r   r   print_readablerF   s   r6   r   z6AutogradCompilerInstance.end_capture.<locals>.<lambda>   sG    {##$$"477)=9  n%n0	 r5   r   r  rj   zCompiled autograd graphT)include_deviceinclude_stridecoloredz%scompiled_autograd_graphc                  (     j                  d      S )NFr  )r  )r   s   r6   r   z6AutogradCompilerInstance.end_capture.<locals>.<lambda>'  s    u333G r5   )r   c           	      >   	 da D ](  }||   j                         j                  d      ||<   * t               5  t	        j
                        5   | |||||      cd d d        cd d d        da S # 1 sw Y   nxY wd d d        da y # 1 sw Y   da y xY w# da w xY w)NT)non_blockingF)in_compiled_autograd_region
pin_memoryry  _disablers   r   )	compiled_fnrj   rk   rl   rm   packed_inputsr   runtime_inputs_to_moverG   s	          r6   runtime_wrapperz=AutogradCompilerInstance.end_capture.<locals>.runtime_wrapper*  s    4.2+/ OA &q	 4 4 6 ; ; ; NF1IO Z U!5dgg!> U&vugumTU U U /4+U U U /4+U /4+e+sF   9B B
A3	B
'	B 3A<	8B
?B 
BB B Br,   r   r   )(r   r   r   _exec_final_callbacks_stubr   r   r  
create_argr  r>   r  r   r   r  r   rename_aot_dispatcher_nodesdelay_unpack_hook_nodesreorder_tensor_pre_hook_nodes'reorder_pre_hook_nodes_to_schedule_asapreorder_accumulate_grad_nodes%reorder_pre_hook_nodes_to_mimic_eager reorder_post_acc_grad_hook_nodesreorder_post_hook_nodesr  r  r   r   r   compiled_autograd_loginfor  r  r   log_event_endr   r   r   r   __exit__r|   )rG   r  r   fieldlazy_graph_coder  r   r  s   `     @@r6   end_capturez$AutogradCompilerInstance.end_capture  sI   ##&AA		
 	

""^^&&t}}W'=>@		
 -/%'%)%B%B4>>CWCW%X"
 NN((.. 	)D@ )DII%		%()	)
 	1	
 	((*$$&**,446**,224--/$$& 	
((+;DGG9)EFEH:.0%
 	""49$0%G	


	4 	"#11LLN!"& 	2 	
 	%%dD$7 0 0 777r5   c           	      n   | j                   ydt        j                  j                  j                  dt        j                  j                  j                  fd}t        t              }| j                  j                         D ]  \  }}|d   }|d   }d}|d   j                  }||   rd	||    }||xx   d
z  cc<   t        |j                        }	t        |	      }
|
J 	 |
j                  dk7  rt        |	      }
|
j                  dk7  r	 t        | j                  j                  j                        }t!        |      D ]  }t        |        t        |      }|j                  dk7  r- |||
      s$t        |      }|j                  dk7  r
 |||
      s$|
j                  dk7  r|j                  dk7  r|j"                  st        |      }6 |||
      st        d| | d	|
j$                   |_        t'        |
j(                        D ]+  \  }}d| | d	|j$                   |j(                  |   _        - t        |	      }
t        |      }|
j                  dk7  r|j                  dk7  r y# t        $ r Y w xY w# t        $ r' t*        j-                  d| j                   |||       Y 0w xY w)z
        Renames nodes as they appear in the AOTDispatcher backward graphs, prefixed by AOT id
        e.g. AOTDispatcher backward graph X's `sin_Y` -> `aotX_sin_Y`
        Ncaaotc                    | j                   |j                   k(  }|s]t        | j                   d      xrE t        |j                   d      xr- | j                   j                  |j                   j                  k(  }|st        | j                   d      rt        |j                   d      rz|j                   j                         dk(  r]t        |j                  j                  d      d      r8| j                   j                         |j                  d   j                         k(  }|xra | j                  |j                  k(  xrF | j                  |j                  k(  xr+ t        | j                        t        |j                        k(  S )Nr`   rW   zaten::reshapeoriginal_aten)
r   rQ   r`   rW   r  r_   r   r   r   all_input_nodes)r  r  target_matchs      r6   
is_similarzHAutogradCompilerInstance.rename_aot_dispatcher_nodes.<locals>.is_similarH  s1   99

2L BIIz2 B

J7B		**cjj.A.AA  !BIIv.CJJ/JJOO%8CHHLL96B  "yy~~/388O3L3Q3Q3SS  HEESVVOHGGsxx'H **+s33F3F/GG	r5   ca_node_start_idxaot_id aot_gmrU  rK   r   r   zIFailed to match %s%s (NodeCall %s) nodes with AOT backward graph %s nodes)r   r/   r   r   r  r   r   r   itemsr   iterr   r   r   StopIterationr   r   r  rW   r   r  r  r  )rG   r  aot_id_counternodecall_indexr  r  r  aot_id_postfix	aot_graphaot_itaot_nodeca_itrU  ca_noder   inps                   r6   r  z4AutogradCompilerInstance.rename_aot_dispatcher_nodes@  s   
 ""*	588==-- 	EHHMM4F4F 	8 *5S)9$($8$8$>$>$@ ;	 ND $%8 9(^FNX,,If%#$^F%;$<!=6"a'" )//*FF|H'''kk_4#F|H kk_4
(T^^1177801  AK u+ jjH,Z5R #5kG jjH,Z5R kkX-'**2H"=="&u+ %gx8 ,+%(0@(--#QGL"+H,D,D"E K3 $'vh~.>az!J  //K
  $F|H"5kG% kkX-'**2HC;	  ! H ! !!_++"s,   $)I4B
JCJ4	J J,J43J4c                 z    | D cg c]*  }t        |      t        j                  j                  u s)|, }}|S c c}w r9   )r   r/   r   r  )rM   r   r   s      r6   get_all_nodesz&AutogradCompilerInstance.get_all_nodes  s4     !=qDGuxx}}$<== >s   *88c                     | j                   dk(  sH| j                   dk(  r:| j                  t        j                  k(  r| j                  d   j                   dk(  ryy)Nr   r   r   TF)r   r   operatorgetitemrM   )r   s    r6   is_placeholderz'AutogradCompilerInstance.is_placeholder  sF    77m#GG&x///		!=0r5   c                    | j                   j                  j                  dt        j                  j
                  j                  j                        D ]  }|j                  d   |j                  d   }}d}|j                  t        j                  k(  r|}|j                  d   }t        ||g      }||j                  usm| j                  |      r|j                  |       ||j                  |        y)a  
        Usage of AOTAutograd causes all the accumulate_grad_ nodes to get pushed to the end of
        the graph.  This differs from eager mode, which schedules them as soon as possible. This
        pass attempts to reorder the graph to mimic eager behavior.
        r   r   r   r   rK   N)r   r   r  r/   rJ  inductoraccumulate_grad_r  rM   r   r  r  maxprevr  r  )rG   r   
param_node	grad_nodegetitem_nodeargs         r6   r  z6AutogradCompilerInstance.reorder_accumulate_grad_nodes  s     NN((33uyy'9'9'J'J'R'R 4 
 	-D %)IIaL$))A,	JL8#3#33((--a0	z9-.C$))#D,?,?,D

4 +JJ|,	-r5   c                     | j                   j                  j                  dt              D ]H  }|j                  j                  dd      dk7  r#t        |j                        }|j                  |       J y)zp
        We can delay unpack hooks until they are needed, even later than in the eager autograd engine.
        r   r  rh  Nrf  )	r   r   r  r
   rN   r_   minr  prepend)rG   r   
first_users      r6   r  z0AutogradCompilerInstance.delay_unpack_hook_nodes  si     NN((33y 4 
 	%D {{{D1]BTZZJt$	%r5   c                 f   | j                   j                  j                  dt              D ]  }|j                  j                  dd      dk7  r#|j                  d   }|j                  d   }||j                  usP| j                  |      rb|j                  |       |j                  |        y)a  
        Usage of AOTAutograd causes all the tensor_pre_hook nodes to get pushed
        to the end of the graph. This differs from eager mode, which schedules
        them as soon as possible. This pass attempts to reorder the graph to
        mimic eager behavior.
        r   r  rh  Nrn  r   rK   )
r   r   r  r
   rN   r_   rM   r  r  r  )rG   r   r  
input_nodes       r6   r  z6AutogradCompilerInstance.reorder_tensor_pre_hook_nodes  s     NN((33y 4 
 	*D {{{D15FF99Q<L1J*43F3Fz3R!!,/##D)	*r5   c                    | j                   j                  j                  dt              D ]Y  }|j                  j                  dd      dk7  r$|j                  d   }| j                  |j                  d         }g }g }|g}|D ]p  }|j                  dk(  s|j                  t        j                  k(  s1|j                  |j                  d          |j                  |       |j                  |       r t        ||      D ]'  \  }}	|j                  |       |j                  |	       ) t        |      }
|
|j                   us| j#                  |
      r1|
j                  |       |D ]  }|j                  |        \ y)a  
        In this function, we schedule the pre hooks as soon as possible. This
        does not match eager behavior (schedule pre hook right before its
        registered node), but it can make acc grad be scheduled properly when
        the pre hooks are registered to them. After reordering acc grad node, we
        will reorder the pre hooks again to mimic eager behavior.
        r   r  rh  Nrq  r   rK   )r   r   r  r
   rN   r_   rM   r  r   r   r  r  r  zipremover  r  r  )rG   r   r  input_nodes	to_remove	to_append
hook_blockr   abr  s              r6   r  z@AutogradCompilerInstance.reorder_pre_hook_nodes_to_schedule_asap  sh    NN((33y 4 
 	+D {{{D1Z?99Q<L,,TYYq\:KIIJ  )44?*qxx8;K;K/K$$QVVAY/$$Q'%%a(	)
 Iy1 &1""1%""1%& k"C$))#D,?,?,D

<(# +A ''*+3	+r5   c                    g }| j                   j                  j                  dt              D ]3  }|j                  j                  dd      dk7  r#|j                  |       5 t        |      D ]  }|j                  d   }t        |j                  j                               }t        |      dk(  rDt        d |D              sJ t        t        |d   j                  j                                     }||j                  us|j!                  |       |j!                  |       |D ]  }|j!                  |         y)a%  
        Usage of AOTAutograd causes all the pre_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them
        right before their registered node execution. This pass attempts to
        reorder the graph to mimic eager behavior.
        r   r  rh  Nrq  r   c              3   x   K   | ]2  }|j                   d k(  xr |j                  t        j                  k(   4 yw)r   N)r   r   r  r  r  s     r6   r   zQAutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eager.<locals>.<genexpr>#  s8       ?*Nt{{h>N>N/NNs   8:)r   r   r  r
   rN   r_   r  reversedrM   r  r  r  r   r  r   r  r  )rG   	pre_hooksr   hook_getitem_noder  registered_noder  s          r6   r  z>AutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eager  s1    	NN((33y 4 
 	#D {{{D1Z?T"	# Y' 	5D $		!*+E5zQ  !    #4a(;(;(=#>?Odii/''(9:''-$ 5G#++G45#	5r5   c                 n   g }| j                   j                  j                  dt              D ]3  }|j                  j                  dd      dk7  r#|j                  |       5 t        |      D ]  }|j                  d   }|j                  d   }d}t        |j                  j                               D ]Q  }|j                  dk(  s|j                  t        j                  j                   j"                  j$                  k(  sO|} n |J d       |j                  |       |j                  |        y)	a  
        Usage of AOTAutograd causes all the post_acc_grad_hook nodes to get
        pushed to the end of the graph. This differs from eager mode, which
        schedules them as soon as possible. This pass attempts to reorder the
        graph to mimic eager behavior.
        r   r  rh  Nru  r   rK   z8post_acc_grad_hook must have corresponding acc grad node)r   r   r  r
   rN   r_   r  r
  rM   r  r  r  r   r   r/   rJ  r  r  r  )rG   post_acc_grad_hooksr   r  r  acc_grad_noder   s          r6   r  z9AutogradCompilerInstance.reorder_post_acc_grad_hook_nodes/  s*    !NN((33y 4 
 	-D {{{D15II&&t,	- 01 	&D99Q<L1J !M***//12 DDO+EII$6$6$G$G$O$OO$%M !, J,
   .%)	&r5   c           	         g }| j                   j                  j                  dt              D ]3  j                  j                  dd      dk7  r#|j                         5 t        |      D ]  j                  d   }j                  d   }j                  d   }t        |      dkD  r@g }|j                  t        |             |D ]=  }|j                  fd	t        |j                  j                               D               ? t        |      }|j                  dk(  r|j                   t"        j$                  j&                  j(                  j*                  k(  r|j                  d   }d}	t        |j                  j                               D ]H  }
|
j                  dk(  s|
j                   t        k(  s'|
j                  j                  dd      d
k(  sG|
}	J |	$|	j                  |       |j                         |j,                  us| j/                  |      r|j                  |       |j                          y)a  
        Usage of AOTAutograd causes all the post_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them as
        soon as possible. This pass attempts to reorder the graph to mimic eager
        behavior.
        r   r  rh  Nrs  r   rK      c              3      K   | ]G  }|j                   d k(  r2|j                  t        k(  rj                  j	                  dd      dk(  s| I yw)r   rh  Nrs  )r   r   r
   rN   r_   )r   r  r   s     r6   r   zCAutogradCompilerInstance.reorder_post_hook_nodes.<locals>.<genexpr>p  sH      -?2 KK94 KKOOK>+M -s   AAru  )r   r   r  r
   rN   r_   r  r
  rM   r   extendr  r  r  r  r   r   r/   rJ  r  r  r  r  r  )rG   
post_hooksr  output_nodesr  input_nodes_and_usersr  r  r  post_acc_grad_hook_noder   r   s              @r6   r  z0AutogradCompilerInstance.reorder_post_hook_nodesV  s    
NN((33y 4 
 	$D {{{D1[@d#	$ Z( +	*D99Q<L99Q<L))A,K< 1$$&!!((k):;) 	
%,, - $Z%5%5%:%:%< =- 	 +,C/)JJ%))"4"4"E"E"M"MM XXa[
*.'j..3356 4A/HH	1HHLLd;?SS23/4 +6+22<@ ''-$))#D,?,?,D

<(##D)W+	*r5   c                 0    |y t        |t              r|D cg c]  } j                  |       c}S t        |t              rt         fd|D              S t        |t        j
                  t        j                  f      r j                  |j                     S t        |t        j                        s|S t         j                  |      }t        |t        j                  j                  j                  j                        sJ |j                   S c c}w )Nc              3   @   K   | ]  }j                  |        y wr9   rR  )r   r@   rG   s     r6   r   z4AutogradCompilerInstance.to_proxy.<locals>.<genexpr>  s     5aq)5s   )r   r  r  r  r/   SymIntSymFloatr   r   r   r!   r   r   r   proxy_tensor_ProxyTensorrl  )rG   tr@   r  s   `   r6   r  z!AutogradCompilerInstance.to_proxy  s    9a./0DMM!$00a51555a%,,78,,QVV44!U\\*H)$..!<,(=(=(J(J(W(WXXX!!! 1s   Dc                    t        |t        j                  j                        r|rft	        |      t	        |      k(  sJ g }t        t	        |            D ]1  }||   \  }}| j                  ||d        |j                  ||          3 |}n$t        t	        |            D cg c]  }||   	 }}t	        |      t	        |      k(  sJ t        ||d | j                         |S c c}w N)constanttracer)
r   r/   r   r+   r   r   set_node_originr  r$   r   )rG   objectsr   r   bound_proxiesr   r  	node_names           r6   r   z0AutogradCompilerInstance.bind_objects_to_proxies  s     guxx~~.7|s7|333 "s7|, 5A07
-NI((NDI!((45 (/4S\/BC!71:CC7|s7|+++'7T$..Q	 Ds   #C%indexc                     | j                   J | j                   |   }t               }t        ||d | j                         |S r!  )r   r   r$   r   )rG   r(  rl  bw_states       r6   bind_backward_statez,AutogradCompilerInstance.bind_backward_state  sB    +++  ' ?(EDPr5   r'  r  pyobjc                    d}||j                   }t        |d      rx|j                  t        d      || _        |j
                  }t        | j                  j                  j                        ||j                  j                  d| j                  |<   | | d| d}t        j                         j                         d   }|j                  d|      }t!        |       y )	Nr  r/  zThis compiled backward function was saved by AOTAutogradCache, which does not support
                    compiled autograd. Please turn off AOTAutogradCache using `TORCHINDUCTOR_AUTOGRAD_CACHE=0`.)r  r  r  z (NodeCall )rx  z:raw_stack_trace = CapturedTraceback.extract().format()[-1])r"  rQ   _lazy_backward_infoRuntimeErrorr   r/  r   r   r   r   	bw_moduler   r*   extractformatreplacer(   )	rG   r'  r  r,  maybe_aot_idforward_clsnew_coderaw_stack_tracenew_stack_traces	            r6   r$  z(AutogradCompilerInstance.set_node_origin  s     ,,K{I.22:&s  +4'*22),T^^-A-A-G-G)H*)==GG8$$^4  [k.9IK+335<<>rB)11H(
 	(r5   rv   Nr9   ):r`   ra   rb   rH   r   staticmethodr   r   r  r/   r   r   r   r   r  r   r   r   r,  autogradfunctionBackwardCFunctionr   r8  rA  rG  r  rL  rO  r=  rY  r`  rd  rf  rn  rq  rs  ru  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r+  Functionr$  r4   r5   r6   ru   ru      s   1C 5] 5 5a
U\\"a
 Cya
 eCJ'(	a

 d5c?+,a
F
c
Zx1
 1 ^^$$661 #+3-1f
G
/
FD
			
# &/c /b
B.J^8@`D  
  -*%*("+H!5F%&N:*x"" LP)1$uS#X2G)H& )) ) //0	)r5   ru   Fc              #     K   |rt        |      t        u sJ ddlm} |j                  j
                  dk(  r
da	 d  day dd l}|j                  j                  j                  j                  t        j                  t        |       |      \  }}t               r3|j                  j                  j                  j!                  t"               da	 |j&                  j)                  d      5  d  d d d        |sda|j                  j                  j                  j                  ||       y # daw xY w# 1 sw Y   ExY w# |sda|j                  j                  j                  j                  ||       w xY ww)Nr   )
eval_frameforce_eagerTF)r   booltorch._dynamorA  _stancestance%compiled_autograd_enabled_force_eagertorch._inductor.cudagraph_trees_CrT   r,   set_autograd_compiler	functoolspartialru   r7   set_verbose_loggerr  compiled_autograd_enabledr<  set_multithreading_enabled)r|   dynamicrA  r/   prior_compilerprior_dynamics         r6   _enablerS    sL    G}$$$(  M1 15-	:491 	/
 HH..DD6Dg
	
 ,-HH..AA+N$(!	::5A  ",1)HH..DD- 5:1"  ",1)HH..DDsL   7E9D- BE9E  +D40E  85E9-D11E94D=9E   6E66E9c               #   r  K   t         j                  j                  j                  j	                  d d      \  } }da	 d  | rdat         j                  j                  j                  j	                  | |       y # | rdat         j                  j                  j                  j	                  | |       w xY ww)NFT)r/   rI  rT   r,   rJ  rN  )rQ  rR  s     r6   r  r    s     
 	**@@uM !&
(,%**@@M	
 (,%**@@M	
s   :B7A: 9B7::B44B7c                  r   da t        rJ t        j                  j                  j
                  j                  d d       t        j                  j                  j
                  j                  d        t        j                  j                  j
                  j                          t        j                         ay )NF)rN  r  r/   rI  rT   r,   rJ  rM  clear_cache	itertoolsr[   r   r4   r5   r6   resetrX  #  sw     %***	HH&&<<T5I	HH&&99$?	HH&&224oo'Or5   c                     | d   }|j                  ||      }|J |j                  |       ||z
  }	|j                  |||	      }
||
|
j                  t        j
                        gS )Nr   )memory_format)new_empty_stridedcopy_
as_stridedcloner/   contiguous_format)rj   
base_sizesbase_stridesbase_storage_offset
view_sizesview_stridesview_storage_offsetgradrO   offsetrF  s              r6   r>  r>  0  sw     !9D##J=F
LL #66F"":|VDJJ
 0 0u?V?V 0 WXXr5   c                     d gt        |       z  }t        t        |             D ]5  }| |   s	||   |dk(  r|j                  ||          |||<   .||   ||<   7 |S )Nr   )r   r   r\  )rD  rO   rE  rF  grad_inputsr   s         r6   rC  rC  D  sy    &3/00K3'() (A1v~Av  Q(!'A!$QA( r5   )Fr:  )^__doc__r}   rK  rW  r  r   collectionsr   r   typingr   r   r   r   r/   torch.utils._pytreeutils_pytreer  torch._dynamo.external_utilsr	   r
   r   torch._dynamo.sourcer   r   torch._dynamo.utilsr   r   r   r   torch._guardsr   r   r   torch._loggingr   r   torch._prims_commonr   torch._subclassesr   torch.fxr   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   r    r!   r"   r#   r$   %torch.fx.experimental.symbolic_shapesr%   r&   torch.fx.tracebackr'   r(   torch.utils._ordered_setr)   torch.utils._tracebackr*   torch.fx.proxyr+   r`   r  r  r7   r>   rA   rC   rR   rJ  r   r  r  r  r  r  r[   r   rs   ru   rN  rG  r  contextmanagerrS  r  rX  r>  rC  r4   r5   r6   <module>r     s         , 6 6  $ $ 
 <  E D > 6 ,   ?   G B / 4 $ *(4GH *EF4# #:5 5 m M "==		++33	 ")//#	w) w)v! "  ). % $  & &R 
 
$(Y(r5   