
    Vhn                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlmZ d dlZd dlZd dlZd dlmZmZ d dlmZ d d	lm Z m!Z! d d
l"m#Z# d dlm$Z$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? ddl@mAZAmBZBmCZC ddlDmEZEmFZFmGZGmHZHmIZImJZJmKZK ddlLmMZMmNZNmOZOmPZP ddlBmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[ ddl\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZg ddlhmiZi ddljmkZk ddllmmZm ddlnmoZompZpmqZqmrZrmsZsmtZtmuZumvZvmwZwmxZx ddlymzZzm{Z{ er.d d l|m}Z}m~Z~mZ d d!lmZ d d"lmZ d d#lmZ d d$lmZ dd%lmZ dd&lmZ d d'lmZ  ej                  e      Zej                   j#                  ed(      Zej&                  j(                  Z ej*                         Z eAj.                         rd d)lmZ nd4d*Zd5d+Zd6d,Z	 	 	 	 	 	 d7d-Zd8d.Z	 	 	 	 	 	 d9d/Z G d0 d1ej>                  j@                        Z G d2 d3e      Zy):    )annotationsN)defaultdict)contextmanager)AnyCallableNoReturnOptionalTYPE_CHECKINGUnion)Expr)deviceTensor)get_decompositions)defakedynamo_timed)FakeScriptObject)
LazyStringtrace_structured)compute_required_storage_lengthmake_channels_last_strides_for)
FakeTensor)BackwardState)magic_methodsmethod_to_operator)free_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSympyBooleanSymTypes)Node)no_dispatch)
OrderedSet)int_oo   )configirmetrics)BackendFeatureDeviceOpOverridesget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registrationWorkspaceArg)CppWrapperCodegenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)ConstantDonatedBufferFixedLayoutget_device_typeGraphPartitionSignatureInputBuffer	Pointwise	Reduction
StorageBox	TensorBoxTorchBindObject)constrain_to_fake_tensorsconstrain_to_fx_stridesFALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsrequire_contiguousunsupported_output_tensor)autotune_cache)AutotuneCacheBundler)SizeVarAllocator)
convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_donated_idxsget_sympy_Expr_dtypeis_same_tensor#maybe_get_suppress_shape_guards_ctxnormalize_nameshould_assume_input_alignedValueWithLineMap)NullHandlerV)IterableIteratorSequence)
ModuleType)_EffectType)GraphModule)Graph)PythonWrapperCodegen)BaseSchedulerNode)output_code_log
perf_hints)log_module_codec                      y N )argskwargss     E/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/graph.pyre   re      s        c                   t        | t        j                  t        j                  t        j                  j
                  j                  f      sJ d       t        | t        j                  j
                  j                        rt        j                  S t        | t        j                        rt        |       S | j                  rt        j                  S | j                  rt        j                  S y )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr   corenumbersIntegertorchint64rR   
is_integeris_floatfloat32)constant_buffers    rk   may_get_constant_buffer_dtyperz      s    %,,

EJJ4F4F4N4NO  	r 
 /5::#5#5#=#=>{{/5::.#O44!!{{		!	!}}rl   c                6    t        d t        D              }| |v S )Nc              3  2   K   | ]  }t        |        y wrg   )r   ).0ms     rk   	<genexpr>z"is_magic_method.<locals>.<genexpr>   s     HQ-a0Hs   )r$   r   )op	magic_opss     rk   is_magic_methodr      s    H-HHI?rl   c           	         |j                  d      }| }t        |      D ]=  \  }}t        ||      s t        ddj	                  |d |              t        ||      }? |S )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)objtargettarget_atomsattr_itriatoms         rk   getattr_recursiver      sv     <<$LH\* +4x&5chh|BQ?O6P5QR  8T*+ Orl   c                    i }| j                  d      d   }d|j                  vr|S t        |j                  d         D ],  \  }}||j                  d   v s|j                  d   |   ||<   . |S )Noutputr   r   user_visible_output_idxsoriginal_output_strides)
find_nodesmetar   ri   )gretoutput_nodeidxnodes        rk   get_user_visible_output_stridesr      s    ')C,,(,+A.K!)9)99
{//23 I	T+""#=>>#(()BCCHCII Jrl   c                   t         j                  syt        t        j                  t        j
                  t        j                  g      }t        t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  g      }	 	 	 	 dd}t'        | j(                        D ]  }t+        |j,                  t.        j0                  j2                  j4                        rd|j6                  d<   K ||      }|sV||v rd|j6                  d<   |j6                  j9                  dd      r0|j:                  D ]!  } ||      }|s||vsd|j6                  d<   # t         j<                  r||v sd|j6                  d<    y)a  
    Nodes like convolution/convolution_backward want its input to be dense.
    If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

    The pass finds nodes that dislike padding. These are nodes that can be reached
    from a convolution/convolution_backward in the backward direction without
    going thru a reduction.
    Nc                    | j                   dk(  rZt        | j                  t        j                  j
                        r,t        | j                  d      r| j                  j                  S d S )Ncall_function_overloadpacket)r   rn   r   rt   _ops
OpOverloadr   r   )r   s    rk   _get_overload_packetz8mark_nodes_dislike_padding.<locals>._get_overload_packet   sU    
 ww/)4;;

(=(=>%67	 KK''	
 	
rl   Tdislike_paddingF)r   torch.fx.Nodereturnz%Optional[torch._ops.OpOverloadPacket])r'   comprehensive_paddingr$   atenconvolutionconvolution_backward
_scaled_mmvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducereversednodesrn   r   rt   _higher_order_opstriton_kernel_wrapTritonKernelWrapperMutationr   getall_input_nodespad_outputs)	r   user_visible_output_stridesops_dislike_paddingops_like_paddingr   curr   priorprior_ops	            rk   mark_nodes_dislike_paddingr      s    ''$%%OO	
 "MMHHIIIIHHIIIIHHHHKKKK	
"



	.

   /JJ##66RR
 +/CHH&'!#&$$*.CHH&'88<<)51,, 9/6#3348EJJ019 !!c-H&H*.CHH&'1/rl   c                      e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d?	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d@ fdZdAdZ	 	 	 	 dBdZ	 	 	 	 dCdZ	 	 	 	 dDd	Z	 	 	 	 dEd
Z		 	 	 	 	 	 dFdZ
dGdZej                  dHd       ZdIdZedJd       ZdKdZ	 	 	 	 	 	 	 	 dLdZdMdZdNdZdOdZedPd       Z	 	 	 	 dQdZdRdZ	 	 	 	 dSdZdTdZdUdZdV fdZdWdZdddXdZ dYdZ!	 	 	 	 dZd Z"dNd!Z#d[d"Z$	 	 	 	 	 	 d\d#Z%	 d]	 	 	 	 	 d^d$Z&d_d%Z'	 	 	 	 	 	 	 	 d` fd&Z(da fd'Z)edbd(       Z*	 	 	 	 	 	 	 	 dcd)Z+ddd*Z,ddd+Z-	 	 	 	 	 	 	 	 de fd,Z.dAd-Z/edfd.       Z0edgd/       Z1	 	 	 	 	 	 	 	 	 	 	 	 dhd0Z2di fd1Z3dAd2Z4	 	 	 	 dj	 	 	 	 	 	 	 	 	 dkd3Z5	 	 dld4Z6dAd5Z7dld6Z8dmd7Z9	 	 dnd8Z:dZ;d9ed:<   dod;Z<dod<Z=dpd=Z>dqd>Z? xZ@S )rGraphLoweringzlist[ir.IRNode]graph_outputsNFc                	   t         |   |       || _        ||n| j                  ||	      | _        d| _        |	| _        |
| _        || _        || _	        || _
        || _        || _        d| _        |t               }d| _        nd| _        || _        |j"                  j%                         | _        t)        t*        j,                            | _        t1        |      | _        g | _        i | _        i | _        t)        t:                  | _        |r|j>                  n	t)               | _        |r|j@                  n	t)               | _         d| _!        i | _"        g | _#        g | _$        |r|ni | _%        |rt)        |jM                               n	t)               | _'        |r|jP                  ni | _(        i | _)        i | _*        i | _+        t)        t:                  | _,        t)        t:                  | _-        t)        t:                  | _.        t)        t:                  | _/        t)        t:                  | _0        t)        t:                  | _1        d | _2        d | _3        g | _4        ddl5m6} to        jp                         r|r|n|| _9        d | _:        i | _;        t)        t:                  | _<        g | _=        i | _>        t        t              | _A        i | _B        t        j                         | _D        || _E        || _F        || _G        i | _H        || _I        || _J        t        t              | _M        d | _N        d | _O        | j                  r| j                         n	t)               | _Q        t)        dg      | _R        t        |j                        | _U        t        |j                  | j                         d| _W        d| _X        g | _Y        d | _Z        i | _[        |j                         | _]        | j                  j                  j                  d	i       | _a        ||j                  ni | _b        t                 t        j                  d       t              | _f        i | _g        t)               | _h        t)        t:                  | _i        t)               | _j        t)               | _k        t)        t:                  | _l        t        j                         | _o        d
| _p        t               | _r        y )N)is_inferencer   FTcpu)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)ssuper__init__example_inputsdecide_layout_opt
layout_optnum_channels_last_convr   is_backwardis_const_graphconst_wrapper_codeconst_kernel_codeconst_moduleinputs_to_checkextra_tracebackr   reuse_shape_env
_shape_envdeferred_runtime_assertscopyras_by_symbolr$   ro   rp   bound_unbacked_symbolsrM   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalstrzero_dim_cpu_tensor_listdevice_typesdevice_idxsdevice_typebuffer_to_padded_sizebuffers
operationsconst_output_indexkeysfolded_constants	constantstorchbind_constantsseen_subgraphsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_buffersnever_reuse_buffersinplaced_to_remove
device_opswrapper_codeextern_kernel_nodes&torch._inductor.extern_node_serializerr   r'   	is_fbcodeextern_node_serializercurrent_nodelistsmutated_inputsmutated_input_idxsname_to_bufferr   listname_to_users
name_to_optimecreation_timenamecpp_wrapperrecord_multi_kernel_choicemulti_kernel_to_choiceaot_modegraph_idnext_post_grad_graph_counterpost_grad_graph_id	schedulercurrent_devicefind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   graphr   r   	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler   r   r   allocated_constant_namer/   	functools	lru_cacher,   effectful_opsaligned_inputsno_fuse_buffer_nameslow_precision_codegen_opsinvoke_quant_opsall_codegen_kernel_names	itertoolscountworkspace_idplaceholder_idxrQ   bw_donated_idxs)selfgmr   	shape_envr  r  r  r   r  r   r   r   r   r   r   r   r  r   r   	__class__s                      rk   r   zGraphLowering.__init__  sf   , 	, % '''F 	
 '(#(&,"4!2(.$ 
I#(D #'D # ..335 	 '1&>&@#(3,.VX=?"(23(9%)5L%%:< 	 )5L$$*, 	 ! <>"(*.0"4" 	
 " )..01 	 '3L"" 	 FH 68.0",S/"3)#0'1#'8$)#0#-c?#4 ",S/"3-126>@ V !&< #, 	# ,0+-
(o/-/46@KD@Q35!YY[	&
 +6'68#  "&'?"@>B 7; 7;ooD002:< 	' !+,G+H I+J288+T("288T-M-MN !  	 9=& GI -/[[]040@0@0D0D.1
- 5A4LL00RT 	$ 	"#$=I$7$7$=>R$S!;=/9|$.sO$5!:D,&1; )33(9% &OO-  "/1rl   c                8    | j                   j                          y rg   )r   freeze_runtime_assertsr4  s    rk   r9  z$GraphLowering.freeze_runtime_asserts  s    ..0rl   c                2   | j                   r2t        |j                               t        |j                               fS ddlm}  |dt        | j                  j                               }| j                  j                  ||      \  }}}|D cg c]4  }t        |t        j                        r|j                  j                  n|6 }}|D cg c]4  }t        |t        j                        r|j                  j                  n|6 }	}||	fS c c}w c c}w )z
        Support dynamic shapes and dynamic strides by assigning variables
        to each dimension.  We duck-shape tensors, so if two tensors
        have the same size they get assigned the same symbolic variable.
        r   )ConstantSource__inductor_unknown_tensor_)r   rN   sizestridetorch._dynamo.sourcer<  lenr   
var_to_val,create_symbolic_sizes_strides_storage_offsetrn   rt   SymIntr   expr)
r4  exr<  sourcer>  r?  _r   r_sizer_strides
             rk   symbolic_sizes_stridesz$GraphLowering.symbolic_sizes_strides  s     ,RWWY79R		:   < $,S1K1K-L,MNF LL	 NRRAu||!<!&&++!CRROUV!:a#>AFFKKAEVVx SVs   9D9Dc                    |j                         D cg c]  }t        j                  |       }}|j                         D cg c]  }t        j                  |       }}||fS c c}w c c}w )z+
        Primarily used to weights
        )r>  ro   rs   r?  )r4  rF  r   r>  r?  s        rk   static_sizes_stridesz"GraphLowering.static_sizes_strides  sZ     +-'')4Qa 44,.IIK8q%--"88V| 58s   A%A*c                P   t        |t        j                        r|j                  }t        |t        j                        r|j                  }t        |t        j
                        r1|j                  | j                  v r| j                  |j                     S |j                         S rg   )	rn   r(   r>   datar=   ComputedBufferr  r   get_size)r4  r   s     rk   get_allocation_sizez!GraphLowering.get_allocation_size  su     dBLL)99DdBMM*99DtR../		T777--dii88==?"rl   c                    |j                         }| j                  |      }|j                  }|j                  }t	        |||      S rg   )
get_layoutrR  r?  offsetr   )r4  r   layoutr>  r?  rU  s         rk   get_allocation_storage_sizez)GraphLowering.get_allocation_storage_size  sA     "''-.tVVDDrl   c                h    t        |t              sJ |       || j                  t        |            v S rg   )rn   r*   r,   r8   )r4  r   features      rk   has_featurezGraphLowering.has_feature  s4    
 '>2;G;2$33OF4KLLLrl   c                8    | j                   x}r|S t        d      )NzNo current device)r  r   r4  r   s     rk   get_current_device_or_throwz)GraphLowering.get_current_device_or_throw  s$    (((6(M233rl   c              #  b   K   | j                   }|| _         	 d  || _         y # || _         w xY wwrg   )r  )r4  r   r   s      rk   set_current_devicez GraphLowering.set_current_device  s1     ##$	("'D%Ds   /# /	,/c                8    | j                   ry| j                  ryy)N	inferencebackwardforward)r   r   r:  s    rk   get_training_phasez GraphLowering.get_training_phase  s    rl   c                  t         j                  syt         j                  ry| j                  j                  D cg c]@  }|j
                  t        j                  j                  j                  j                  k(  s?|B }}t        |      }|dk(  ryt        j                  j                  j                  r;t        j                  j                  j                         rt!        d |D              ryt        t#        | j                  j                              d|z  k\  rt$        j'                  d       yt)        d |D              rt$        j'                  d       ydd	}dd
}dd}|rBddlm} t/        t0              }	|D ]  }
t        j2                  j4                  j7                  |
      \  }}}|r~ |d      5 }t8        j:                  5   |
j
                  |i | ddd       ddd       j=                         } ||
      rd}n ||
      rd}n ||
      rd}nd}|	|xx   |z  cc<   t$        j'                  d        d}d}d}d}t?        |	jA                               }|	d   |z  |	d   |z  z   |	d   |z  z   |	d   |z  z   }||k  }|st$        j'                  d||       |S t)        tC        ||            rt$        j'                  d       yt)        tC        ||            rt$        j'                  d       yt!        tC        ||            rt$        j'                  d       yyc c}w # 1 sw Y   UxY w# 1 sw Y   ZxY w)zl
        Decide if we should enable layout optimization for this graph based on
        heuristics.
        FTr   c              3     K   | ]G  }d D ]@  }|j                   |   j                  d   j                  t        j                  d      k(   B I yw)r   r&   valr   N)ri   r   r   rt   r}   nr   s      rk   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>=  sT      !  s  '..%,,u2EEEs   AAi,  z*Skipped layout opt because only a few convc              3  t   K   | ]0  }d D ])  }t        |j                  |   j                  d          + 2 yw)rg  rh  N)r   ri   r   ri  s      rk   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>L  sE      

  QVVC[--e45
5
s   68zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670c                    | j                   d   j                  d   }t        |t        j                        sJ | j                   d   dkD  xr |j                  d      dkD  S )Nr&   rh  r   )ri   r   rn   rt   r   r>  )rj  meta_vals     rk   
is_groupedz3GraphLowering.decide_layout_opt.<locals>.is_groupedV  sQ    vvay~~e,Hh55566":>:hmmA&6&::rl   c                   | j                   d   j                  d   j                  d      dz  | j                   d   j                  d   j                  d      k  xr. | j                   d   j                  d   j                  d      dkD  S )Nr&   rh  r      ri   r   r>  rj  s    rk   is_in_out_channelz:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel[  sv    q	u%**1-1QVVAY^^E5J5O5OPQ5RR 6FF1INN5)..q1A5rl   c                    | j                   d   j                  d   j                  d      dk  xr. | j                   d   j                  d   j                  d      dk  S )Nr&   rh  r   @   rq  rr  s    rk   is_small_channelz9GraphLowering.decide_layout_opt.<locals>.is_small_channela  sT    q	u%**1-3 8FF1INN5)..q1R7rl   )FlopCounterMode)displayNgroupedsmallin_outdefaultzConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)rj  r   r   bool)rj  r   r   r}  )"r'   layout_optimizationforce_layout_optimizationr  r   r   rt   opsr   r   r|  rA  backendsmkldnnenabledis_availableallr
  logdebugr   torch.utils.flop_counterrw  r   float	_inductorfx_utilsget_fake_args_kwargsrY   	fake_modeget_total_flopsr   valuesmap)r5  r   rj  
conv_nodesnconvrn  rs  rv  rw  flop_countsr   successri   rj   flop_counter_modecounted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                           rk   r   zGraphLowering.decide_layout_opt%  s5    ))++ xx~~
UYY^^5O5O5W5W)WA

 
 JA: NN!!))%%224 #  
 tBHHNN#$e3IIBC 

 

 IIw 	;
		 @,7,>K" <(-(@(@(U(U)%v (7 9;L[[ 9'DKK8899 %6$E$E$GM!$'$-	)$/$+	*40$,	$-		*m;*II:;-<6 "'!& %$k0023K I&);;g&)99:h'*;;< i(+==>  +k9M 		~"
 ! & s:z*+IIX  s$j12IIT  s#Z01IIVWm
D9 99 9s0   A L1<L1?ML6#M6M ;MM	c                @    | j                   | j                    d| S |S )z2Prepend the given name with the graph name if any.rH  )r  r4  r  s     rk   qualify_namezGraphLowering.qualify_name  s&    99 ii[$((rl   c                    t        | ||| j                  | j                  | j                  | j                  | j
                  | j                  | j                  |      
      S )a  
        Make a subgraph of the current graph with all inherited parts, except
        the graph module (`gm`) and `example_inputs`.  The subgraphs are lowered
        separately and lifted into a separate function in the parent output
        wrapper code.  The subgraph name is qualified by the parent graph's
        name. Note that the lifting of subgraph is supported for python wrapper
        only. For cpp wrapper, we inline the subgraphs in the parent wrapper.
        )
parentr5  r   r6  r  r  r  r   r   r  )SubgraphLoweringr   r  r  r  r   r   r  )r4  r5  r   subgraph_names       rk   make_subgraphzGraphLowering.make_subgraph  sZ      )oo((]]#'#>#>**((""=1
 	
rl   c                   t        t                  }t        | j                  j                  j
                        D ]w  }|j                  t        j                  j                  j                  j                  k(  r|j                  |       P|j                  D ]  }||v s|j                  |        w y | j                  j                  j
                  D ]"  }||v s|j                  |j                         $ |S )aC  
        The rule to decide if an node prefer channels last is simple.
        1. if it's input/output of a convolution
        2. if one of its user prefers channels last

        We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
        Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
        channels last.

        Consider the scenario: conv -> batch-norm -> relu -> conv
        Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
        1. the output of batch-norm should be channels last initially since its input is a conv's output.
           Forcing the batch-norm's output to be contiguous results in the first copy
        2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
           We need convert it to channels last layout which results in the second copy.
        With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
        can be saved.
        )r$   r"   r   r%  r  r   r   rt   r  r   r   r|  addusersupdate)r4  
output_setrj  users       rk   r  z-GraphLowering.find_nodes_prefer_channels_last  s    &  %'
$++++112 	Axx599>>55===q! :%NN1%	0 ""(( 	+AJ!!!''*	+ rl   c                    || j                   vr2| j                   j                  |       t        j                  d|       y y )NzUsing FallbackKernel: %s)r  r  perf_hint_loginfor  s     rk   warn_fallbackzGraphLowering.warn_fallback  s:    t,,,!!%%d+94@ -rl   c                R   | j                   j                  |j                         |j                  %| j                  j                  |j                         t
        j                  j                  r7|| j                  vr(t
        j                  j                  | j                  |<   y y y rg   )	r   r  typeindexr   rY   r  r  r"  r\  s     rk   add_device_infozGraphLowering.add_device_info!  sy    fkk*<<#  .77F$2J2J$J/0ww/C/CD$$V, %Krl   c                "    t         j                  S rg   )rY   r  r:  s    rk   r  zGraphLowering.fake_mode(  s    {{rl   c           	        || j                   v r| j                   |   S || j                  v r| j                  |   S || j                  v rzt        j                  j                  |   }t        j                  |t        j                  |j                  |j                  gt        j                  j                  |             S y Nr  rV  )r	  r   r   rY   r  r(   ConstantBufferr7   r   dtyperM  )r4  buffer_namerO  s      rk   try_get_bufferzGraphLowering.try_get_buffer,  s     $---&&{33$+++$$[11$..(77$$[1D$$ ~~KK./gg.J.J4.P  rl   c                    t        d      )Nz'Should not be called for the main graph)r   )r4  symbols     rk   add_symbol_graph_inputz$GraphLowering.add_symbol_graph_input>  s    DEErl   c                H    | j                  |      }||S t        d|       )Nz$Failed to find buffer matching name )r  r   r4  r  bufs      rk   
get_bufferzGraphLowering.get_bufferA  s1     !!+.?JA+OPPrl   c                   || j                   v r| j                   |   j                  S t        | j                  d      r|| j                  j                  v ro| j                  j                  |   }|| j
                  v r| j
                  |   j                         S || j                  v r| j                  |   j                         S || j
                  v r| j
                  |   j                         S || j                  v r| j                  |   j                         S t        j                  d|      }|r | j                  |j                  d            S t        d|       )Nmutation_real_namez1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r&   could not find )r   r  r   r  r  r	  	get_dtyper   rematchgroupKeyError)r4  r  mutated_bufr~   s       rk   r  zGraphLowering.get_dtypeI  s7   $..(>>+.444 DNN$89t~~@@@..;;KHKd111**;7AACCd///((5??AA$---&&{3==??$+++$$[1;;==HHI;W>>!''!*--677rl   c                F   || j                   v r| j                   |   j                         S || j                  v r0| j                  |   }|j                         sy|j	                         S || j
                  v r| j
                  |   j	                         S t        d|       )Nr&   r  )r   numelr	  has_tensor_output	get_numelr   r  r  s      rk   r  zGraphLowering.get_numel_  s    $..(>>+.4466$---%%k2C((*==?"$+++$$[1;;==677rl   c                ^    t        d      5  t        |   | cd d d        S # 1 sw Y   y xY w)NzGraphLowering.run)r   r   run)r4  ri   r7  s     rk   r  zGraphLowering.runk  s-    -. 	&7;%	& 	& 	&s   #,c                   |j                   
J d|        t        |t        j                        sJ | j	                  dt        | j                               }| j                  j                  |       || j                  |<   ||_         |S )NzOperation registered twice: r   )	operation_namern   r(   	Operationr  rA  r   appendr  )r4  r   r  s      rk   register_operationz GraphLowering.register_operationo  s      (M,H*MM("bll+++  2c$//&:%;!<=r" " rl   set_namec                  | j                  dt        | j                               }| j                  j                  |       || j                  |<   |j                         }|St        |t        j                        r(|j                         r|t        j                  d      k(  s| j                  |       |r||_        |S )Nr  r   )r  rA  r   r  r	  
get_devicern   r(   rP  is_zero_elementsrt   r   r  r  )r4  bufferr  r  r   s        rk   register_bufferzGraphLowering.register_bufferx  s      3s4<<'8&9!:;F#$*D!""$ 62#4#45++-ell511   (FKrl   c                j    | j                  ddj                  |      z         }|| j                  |<   |S )Nlist_rH  )r  r   r  )r4  operation_namesr  s      rk   register_operation_listz%GraphLowering.register_operation_list  s3      388O+D!DE*

4rl   c                &     d fd |       y )Nc                    t        | t        t        f      r| D ]
  } |        t        | t        j                        r4| j                         D ]   }j                  |   j                  |        " y y rg   )rn   r
  tupler(   r>   get_read_namesr  r  )valuex	read_nameregisterr4  s      rk   r  z1GraphLowering.register_users_of.<locals>.register  sm    %$/  AQK %.!&!5!5!7 @I&&y188?@ /rl   )r  %Union[Iterable[ir.IRNode], ir.IRNode]r   Nonerh   )r4  node_outputr  s   ` @rk   register_users_ofzGraphLowering.register_users_of  s    	@ 	rl   c                    t        |t              sJ | j                  j                  |       || j                  vry| j                  |   D ]  }|j                           y)z
        When a buffer is mutated we need to make sure all the reads to
        the old version are realized before the mutation happens.
        N)rn   r   r   r  r  realize)r4  r  r  s      rk   mark_buffer_mutatedz!GraphLowering.mark_buffer_mutated  s\    
 $$$$  &t)))&&t, 	DLLN	rl   c                    || j                   v r|| j                  v s
J d|z          t        | j                   |         }|| j                  j                  v r| j                  j                  |   S | j                  |   S )z
        In AOTI, module buffers may have been mutated during the tracing and compilation.
        Thus we need to read from previously stored original buffers, to make sure the
        generated model.so uses correct initial values.
        z$Can not find the original value for )r&  r   rP   r%  r   )r4  r  	orig_names      rk   get_original_value_of_constantz,GraphLowering.get_original_value_of_constant  s     t3338N 	
2T9	
N 5T5Q5QRV5WX	 DKK,,, KKY'	
 %	
rl   c                   t         j                  j                  s2| j                  j	                         D ]  \  }}t        ||      s|c S  |dt        | j                         }|}|d   j                         rd| }| j                  |      }t        |      }|}d}|| j                  v r| d| }|dz  }|| j                  v r|| j                  |<   |j                  d|j                  dt        |j                               dt        |j                               dt        |      d	| j                   |<   || j"                  |<   |S )Nconstantr   	constant_rH  r&    r  )r'   aot_inductoruse_runtime_constant_foldingr   itemsrS   rA  isdigitr  rU   r   r  r  r>  r?  hashr   r&  )r4  r  rO  constant_namer  r  prefixcnts           rk   allocate_non_dup_const_namez)GraphLowering.allocate_non_dup_const_name  sZ    ""??(,(<(<(> )$u!$.(() <c$..123D	7??tf%D  &  %dnn$XQse$D1HC dnn$  $t{{oQtzznATYY[!$AeDKKM&:%=QDz!n 	D!
 .7$$T*rl   c                    | j                  ||      }t        j                  t        j                  |t        |j                  |j                  g| j                  |                   S r  )	r  r>   creater(   r  r7   r   r  rM  )r4  rO  r  new_names       rk   add_tensor_constantz!GraphLowering.add_tensor_constant  sb     33D$?"KK.2.G.G.M
 	
rl   c                X   | j                   |   j                  |k(  s||S t        j                  j                  j                         5  | j                  | d|j                   |j                  xs d | j                   |   j                  |            cddd       S # 1 sw Y   yxY w)z
        We AOT copy constants to the devices they are needed on.
        If device_override doesn't match the constant's device, then
        copy it and return a different name.
        NrH  r   )
r   r   rt   utils_python_dispatch_disable_current_modesr  r  r  to)r4  r  device_overrides      rk   r  zGraphLowering.constant_name  s     >>$&&/9_=TK[[))@@B 	 33&/../0E0E0J/KLt$''8	 	 	s   	AB  B)c                   | xj                   dz  c_         t        | 	  |||      }| j                  |      }t	        |t
              rB|j                  j                  }|| j                  |<   | j                  j                  |       |S t	        |t        t        t        f      rAt        j                  |      }|| j                  |<   | j                  j                  |       |S t	        |t               r9t#        ||      }|| j                  |<   | j                  j                  |       |S || j                  j                  |       y t	        |t$              r| j                  j                  |       y t	        |t&        j(                        rt+        t,        j.                  j0                  j2                        dk(  rdt5        t7        t,        j.                  j0                  j2                              j8                  t&        j:                  j<                  j>                  u sJ tA        jB                  ||jD                        }|| j                  |<   | j                  j                  |       |S t	        |t&        jF                        sJ |       |jH                  s| jK                  |      \  }}	n| jM                  |      \  }}	| jN                  re| jP                  rY| j                   | jP                  v rAtS        jT                  tW        |tY        |jD                  |jZ                  ||	                  }
n@tS        jT                  t]        |tY        |jD                  |jZ                  ||	                  }
|
| j                  |<   | j                  j                  |       |
j^                  j^                  | j`                  |<   | j0                  j2                  r| jc                  |jD                         te               5  tg        |      r| jh                  jk                  |       d d d        |
S # 1 sw Y   |
S xY w)Nr&   r  r  )r  r   r  )6r2  r   placeholderr  rn   r!   r   rE  r   r   r  intr}  r  ro   sympifyr   r?   r   rt   	GeneratorrA  rY   r  r  r  r  iterr   _prims	rng_primsgraphsafe_run_with_rng_stater(   GeneratorStater   r   _has_symbolic_sizes_stridesrM  rK  r   r3  r>   r  r6   r7   r  r:   rO  r   r  rT   rV   r*  r  )r4  r   ri   rj   examplerE  r   gensizesstridestensorr7  s              rk   r  zGraphLowering.placeholder  sp    	!'%fdF;""6*gx(<<$$D(,Df%""))&1K#tU!34==)D(,Df%""))&1K!12!vW=C(+Df%""))&1J_""))&1g}- ""))&11AGG((../14agg22889:AA<<))FFGG ##GC(+Df%""))&1J'5<<09'90
 22!66w?NE7!88ANE7 $$$$(<(<<%%&w~~w}}eWUF %%&w~~w}}eWUF %+&!%%f--3[[-=-=""6*""  0 12 	0*73##''/	0 	0 s   'P77Qc                   |t         j                  u r/t        |d   t        t        t
        f      rt        |   |||      S t        |t        j                  j                        st        |d      r ||i |S |t        vrt        |t        j                  j                        s
J | d       |j                         j                  d      d   }|t         v rt#        |dd       n+t$        j&                  rt)        |g      rt*        nt,        }t.        j1                  d|j3                  |||             t        j4                  j6                  j8                  |j:                  v rt<        }nqt        j4                  j6                  j>                  |j:                  v rd }n>t        j@                  jB                  jE                  |      r| jF                  rtH        }nd }t#        ||	       n&t)        |g      rt+        |||      t-        |||      	 t.        jK                  d
t        |          | jL                  }tO        |      }|r||}
}	 ||g|i |\  }}t        |   |i |}|r| jQ                  |	
||       |S # tR        $ r-}tU        ||||      jW                  |jX                        d d }~ww xY w)Nr   _inductor_lowering_functionz is not an OpOverloadr   FT)warnoverride_decompz"Creating implicit fallback for:
%s)layout_constraintz  via %s)-operatorgetitemrn   r
  r  dictr   r   rt   r   OpOverloadPacketr   rE   r   r  r   rB   rF   r'   implicit_fallbacksr   r3   r4   r  r  operator_str_CTagneeds_fixed_stride_ordertagsrA   flexible_layout_libraryr  
is_builtinr   rI   r  r  rG   propagate_mutation	Exceptionr2   with_traceback__traceback__)r4  r   ri   rj   	base_nameerrordecided_constraintrj  layout_constraintsold_args
old_kwargsouter7  s                rk   r   zGraphLowering.call_functionZ  s   X%%%*T!WtUD>Q*R7(v>> &%**"="=>71D
 4*6**"fejj&;&;< (/0< ++C03I//f5$G** *6(3 .5 
 9&&vtV< 88<<88FKKG)@&XX\\11V[[@)-& ~~++66v>4CSCS-?* .2* f8JK#VH- 0fEE264HH	IIj)F"34!!A!9&!A!'+V*1!EdEfEfF#T4V4C! ''8ZvNJ 	#AvtV<KK	s   A/J 	J>(J99J>c                Z    t        | j                        dk(  xr | j                  d   dk  S )zM
        True if this is a small constant attr that will be inlined.
        r&   r      )rA  shape)ts    rk   can_inline_constantz!GraphLowering.can_inline_constant  s(    
 177|q 4QWWQZ1_4rl   c                   t        | j                  |      }t        |t        j                  j
                        rE|| j                  v r| j                  |   S t        j                  ||      }|| j                  |<   |S t        |t        j                  j                        r+|| j                  |<   d| j                  |<   t        ||      S t        |t              r?|j                  | j                  |<   d| j                  |<   t        ||j                        S t        |t        j                         sJ t"        j$                  j&                  st"        j(                  st+        |      r| j-                  ||      S t/               5  |j0                  dk(  r9t3        |j5                         |j6                  |j8                        cd d d        S | j;                  |      r[t<        j?                  dtA        |             ddl!m"}  ||jG                         |j6                  |j8                  	      cd d d        S 	 d d d        | j-                  ||      S # 1 sw Y   xY w)
N)r  graph_moduler   r  rh   )r  r  r   zInlining constant: %s r&   )r  )r  r   )$r   r%  rn   rt   fxr_   r   r(   Subgraphr(  ScriptObjectr   r   r?   r   real_objr   r'   r  r  always_keep_tensor_constantsrJ   r  r#   r=  r5   itemr  r   r?  r  r  r   loweringr  tolist)r4  r   ri   rj   r  r9  r  s          rk   get_attrzGraphLowering.get_attr  s    "$++v6eUXX112,,,**622++6>C*-D'JeUXX223/4D$$V,*,D'"e<</0/4~~D$$V,*,D'"ennEE%...<<22(/++E6::] 
	V{{b **,ekk%,,
	V 
	V
 ''.		2CK@,ellnEKKU
	V 
	V
 /
	V ''v66
	V 
	Vs   ?IA"II$c                    t         rg   AssertionErrorr4  r   ri   rj   s       rk   call_modulezGraphLowering.call_module      rl   c                    t         rg   rL  rN  s       rk   call_methodzGraphLowering.call_method  rP  rl   c                d   t         |   |||      }t        |t        t        f      s|f}t        |t        t        f      sJ t        |             t        d |D              sJ |       t        j                  j                  j                  d   }t        |t        t        f      s|f}|D cg c]!  }t        j                  j                  |      # }}g }t        |      t        |      k(  sJ t        ||      D ]C  \  }}	t        |t        j                   t        j"                  f      s|j%                  |       Ct        |j'                         t        j(                        r/|j%                  t        j                  j+                  |             t,        j.                  j                  j1                  |      sJ |	j2                  d   j5                         D 
cg c]4  }
t        |
t,        j6                        r|
j8                  j:                  n|
6 }}
|j%                  t        j<                  ||             F || _        | j@                  jC                         D ]_  \  }}t        |tD              rt        |t         tF        jH                  t,        j.                  j                  jJ                  f      sJ dt        |              t        |t               s|jM                          t        |t               sJ |jN                  }t        |t        jP                        sJ |}|jN                  }t        |tR              r|jU                         |k7  st        jV                  jY                  || jZ                  |          	 | j>                  j]                  |      }| jZ                  |   | j>                  |<   b | ja                          tb        je                  d| jf                  | jh                  | jh                         y d       y c c}w c c}
w # t^        $ r Y w xY w)Nc              3  8  K   | ]  }t        |t        t        j                  t	        d       t        j
                  t        j                  t        j                  j                  j                  t        t        j                  t        j                  f	        y wrg   )rn   r>   r(   r5   r  r  ro   r   logicboolalgBooleanr  EffectfulKernelShapeAsConstantBuffer)r}   r  s     rk   r   z'GraphLowering.output.<locals>.<genexpr>  sp      
  KKJ%%JJKK''//&&,,

s   BBr   rh  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr   )5r   r   rn   r  r
  r  r  rY   r  r  ri   r(   ExternKernelrealize_inputrA  zipr>   BaseViewr  get_output_specCommBufferLayout
copy_inputrt   r  is_storage_and_layoutr   r?  rD  r   rE  try_match_insignificant_stridesr   r   r  r?   ro   r   r  r  rO  r=   r:   get_nameMutationLayoutSHOULDREMOVErealize_intor   r  
ValueErrorfinalizer  r  r   r  )r4  r   ri   rj   resultfx_node_argsr  result_correct_stridesrfx_nodesmeta_stridesr  r  value_storage_boxindr7  s                   rk   r   zGraphLowering.output  s    f5&5$-0YF&5$-0>$v,>0 
 
 
 	  !	 
$ ww++003,6(?L<BCq"////2CC!#< CK///fl3 	JAwa",,!<=&--a0A--/1D1DE '--boo.H.H.KL ))??BBB %\\%0779  $.a#>AFFKKAE    '--66q,G#	* 4,,224 	KD%%1	5::u/A/A/P/PQ G8eFG  eY/MMOeY///JJEeR]]333 %JJEe[1U^^5E5M--::455d;,,223DEC.2.H.H.ND&&s+-	4 			U''!]]6DMM	
 =?	
m D" B " s   /&P"9P7P""	P/.P/c                F    | j                   D ]  }|j                           y rg   )r   decide_layout)r4  r  s     rk   rg  zGraphLowering.finalizeM  s!    << 	 C	 rl   c              #  b   K   | j                   }	 || _         d  || _         y # || _         w xY wwrg   )r  )r4  r   olds      rk   set_current_nodezGraphLowering.set_current_nodeQ  s1     	$ $D #DDs   /# /	,/c              #  T   K   | j                   }	 d  || _         y # || _         w xY wwrg   r   )r4  rt  s     rk   set_current_wrapper_codez&GraphLowering.set_current_wrapper_codeZ  s)     	$ #DDs   ( (	%(c                    t        |      t        |      k(  sJ t        |      t        |      k(  sJ |j                  t        j                  j                  j
                  u r|j                  d   }t        |t              sJ t        j                  j                  j                  |d   |d   |j                         D ci c];  \  }}|t        |t        j                  j                        r|j                  d   n|= c}}      }	|	D ]X  }
|d   |
   }|d   |
   }||u r j!                  t        j                  j"                  j$                  j&                  ||fi        Z yt        |j                  t        j(                  j*                        sJ 	 	 	 	 	 	 	 	 d fd}|j                  j,                  }t/        t1        ||            D ]!  \  }\  }}|j2                  |   } ||||       # |j2                  D ci c]  }|j4                  | }}|j7                         D ]  }||   }||   }||   } ||||        yc c}}w c c}w )ax  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

        Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
        and then called fx_node(*new_args, **new_kwargs).

        If fx_node mutates any of new_args/new_kwargs, and they are different from
        old_args/old_kwargs, then we need to update the original tensor.
        rj   
kernel_idxconstant_args_idxrh  Nc                L   ||u ry | j                   | j                   j                  r{t        |t        j                        r|f}|f}t        ||      D ]K  \  }}||u rj                  t        j                  j                  j                  j                  ||fi        M y y y rg   )
alias_infois_writern   r(   IRNoder\  r   rt   r  r   copy_r|  )
schema_argold_argnew_argold_arg_itemnew_arg_itemr4  s        rk   maybe_propagatez9GraphLowering.propagate_mutation.<locals>.maybe_propagate  s     '!$$0Z5J5J5S5S gryy1&jG&jG25gw2G .L,#|3 &&		,,44|\6RTV 6T0rl   )r  ztorch._C.Argumentr  	ir.IRNoder  r  r   r  )rA  r   rt   r  higher_ordertriton_kernel_wrapper_mutationrj   rn   r$  r   r   get_mutated_tensorsr  rB  r"   r   r   r   r  r|  r   r   _schemar   r\  	argumentsr  r   )r4  rl  r7  r8  new_args
new_kwargsrj   kvmutatedr  r  r  r  schemar   r  argschema_kwargskeys   `                   rk   r/  z GraphLowering.propagate_mutationb  sJ     8}H---:#j/111>>UYY33RRR^^H-Ffd+++--@@TT<(./ !'1 
1ehhmm(Dqvve}!KG   Y$X.t4$X.t4g%""599>>#7#7#?#?'7ASUWXY '..%***?*?@@@	)	4=	HQ		( '''0Xx1H'I 	:#C#'7))#.JJ9	: 392B2BC33CC??$ 	:C oG oG&s+JJ9		:WR Ds   A I
Ic                $   -./ d*fd}ddl m} t         j                        .t         j                        /t        g      }j                  dk(  }|r# j                        \  }}|t        ||      z  }t        j                  j                  |      5   j                        5  t        j                        5  j                  dk(  rdj                  t        j                   urHt#              s|j%                  ddfd      r' |d        t'        j                  d	
      i }nj                  dk(  rj                  t(        j*                  j,                  j.                  u rt0        j2                  dk7  r |d       t0        j2                  dk(  r}	}
j4                  j7                  d      x}r|d   }|d   }t9        ||||      \  }}nt;        g|i |\  }} j=                  j                  ||      } j?                  |	|
||       ntA        dt0        j2                         tC        j                        r |d       tE        j4                  d   t(        jF                  t(        jH                  t(        jJ                  f      r$j4                  d   jL                  jN                  }n'tP        0         }n |d       tP        0         }t(        j*                  jT                  jV                  jX                  t(        j*                  jT                  jZ                  jX                  t(        j*                  jT                  j\                  jX                  t(        j*                  jT                  j^                  jX                  t(        j*                  jT                  j`                  jX                  g-tc        d jd                  D              } jf                  v }tc        -fdjd                  D              }j4                  j7                  dd	      rtE        |th              r|jk                          j4                  d   jm                         }t)        jn                  jp                  jr                  | }|ju                         |k7  r7|s5t        jv                  |      }t        jx                  j{                  ||      }|rDtE        |th              r4tE        |j|                  t        j~                        r|jk                          |s|r%tE        j4                  d   t(        j                        r|r jf                  j7                        }nj4                  d   jm                         }|t        |      dkD  rt0        j                  xs | xr | }t(        j                  j                  j4                  d         }t        t        |            dkD  }|sl|rjt        |j                               dk(  rN j                  v r@|s>|s<t        j                  j                  |j                         t(        j                        }|st        |      r؉j4                  d   j                         s$tE        |j|                  t        j~                        r6t        jx                  j{                  |t        jv                  |      |      }na|D cg c]4  }tE        |t(        jF                        r|jL                  jN                  n|6 }}t        jx                  j                  |||      }t        t        jd                              }|dkD  rtE        |th              rjd                  D ]  }|j                  t        v rn|j                          t(        j*                  jT                  j                  jX                  t(        j*                  jT                  j                  jX                  t(        j*                  jT                  j                  jX                  g}g } j                  s=|j                  t(        j*                  jT                  j                  jX                         t(        j                  j                  r|t(        j*                  j                  j                  jX                  t(        j*                  j                  j                  j                  t(        j*                  jT                  j                  jX                  t(        j*                  j                  j                  jX                  t(        j*                  j                  j                  j                  t(        j*                  j                  j                  j                  t(        j*                  j                  j                  j                  gz  }|t(        j*                  j                  j                  jX                  t(        j*                  j                  j                  j                  t(        j*                  j                  j                  j                  t(        j*                  j                  j                  jX                  t(        j*                  j                  j                  jX                  t(        j*                  j                  j                  j                  gz  }t(        j                  j                  r2|t(        j*                  j                  j                  jX                  gz  }|j                  |v rPt        jx                  j{                  |t        jv                  j4                  d   jm                               d      }|j                  |v rd|j                  d   u rSt        jx                  j{                  |t        jv                  t        j4                  d   j                                    }|j                  dk(  stE        |j|                  j|                  t        t        f      s|jk                           |j                  t        jd                               tE        |th              r |j                         r|j                          tE        |th              rbtE        |j|                  t              rH|j|                  j|                  }tE        |t              r"|j                  d      r|jk                          d d d        d d d        d d d        tE        th              r#tE        |j|                  t        j                        rtE        |j|                  j|                  t        j                        r(|j|                  j|                  j                  d       ntE        |j|                  j|                  t        j                        ry|j|                  j|                  j                  d       tE        |j|                  j|                  t        j                        ritE        |j|                  j|                  j|                  t        j                        r1|j|                  j|                  j|                  j                  d       ntE        |j|                  j|                  t        j                        r|j|                  j|                  j                  sntE        |j|                  j|                  j                  d   t        j                        r3|j|                  j|                  j                  d   j                  d        j                  |       t        t        j                            } j                  .d  D ]  }||j                         z  }  j                  /d  D ]  }||j                         z  } d+./ fd} j                  dk7  r2t        j                  j                  j                  }!d, fd }"|D ]R  }# j                  j                  |#g       }$|!j                  |#   }%|!j                         j                  |%      sxd-d!}& |&|%j                        r' |"|#|%j                  k\  |# d"|%j                           |&|%j                        r' |"|#|%j                  k  |# d#|%j                          |$D ]  }'t        |'jN                        }(|( j                  z
  })|)r@t	        |)t
        $      }* j                  j                  |*g       j                  |'       j |"|'jN                  |'jN                           U  xj                  |z  c_        t        t        j                  j                  j                  j4                  j7                  d%i             }+|+J t        d& |+j                         D              },||,k\  s'J d'| d"|, d(j                          d) |                |S c c}w # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   #xY w).Nc                Z    t         j                  dt        j                        |        y )Nzlowering %s %s)r  r  r   format_node)msgrj  s    rk   r  z%GraphLowering.run_node.<locals>.debug  s    II&
1==(A3Grl   r   )CompilerBisectorr   inductorrE   c                     t               S rg   )reprrr  s   rk   <lambda>z(GraphLowering.run_node.<locals>.<lambda>  s    a rl   rC   F)add_to_fallback_setr,  -user_defined_triton_kernel_layout_constraintsr*  arg_kwarg_valsr&   z1Unknown triton_kernel_default_layout_constraint: r   rh  r   c              3  :   K   | ]  }|j                   d k(    yw)r   Nr   )r}   r  s     rk   r   z)GraphLowering.run_node.<locals>.<genexpr>  s     DDDGGx/Ds   c              3  :   K   | ]  }|j                   v   y wrg   )r   )r}   r  as_strided_opss     rk   r   z)GraphLowering.run_node.<locals>.<genexpr>	  s      *26~-*   inductor_realize_to_strides   )allow_paddingTr   d   )	thresholdorigin_nodec                     j                   d  D  cg c]  } d| j                          d|  d }} |j                  d j                  d  D               dj	                  |      S c c} w )Nunbacked_symbol_defs= in:

c              3  J   K   | ]  }d |j                          d| d  yw)r  r  r  N)get_unbacked_symbol_defs)r}   r   s     rk   r   zBGraphLowering.run_node.<locals>.format_new_defs.<locals>.<genexpr>  s2       ((C(C(E'FfRDPRSs   !#z***
)r   r  extendr   r   )r  rk  buffer_watermarkoperation_watermarkr4  s     rk   format_new_defsz/GraphLowering.run_node.<locals>.format_new_defs  s      <<(8(9: ((D(D(F'GvcURTUA  HH //*=*>?  <<?"s   A)r  c                z    t        j                  | |      }j                  |d       j                  |       y )NTr  )r(   AssertScalarr  r  )rE  r  	assert_opr4  s      rk   make_assertz+GraphLowering.run_node.<locals>.make_assert  s4    OOD#6	$$Y$>''	2rl   c                Z    | t         t          fv ry	 t        |        y# t        $ r Y yw xY w)NFT)r%   r  	TypeError)rm  s    rk   is_convertiblez.GraphLowering.run_node.<locals>.is_convertible  s5    & 11#()F#'( )#()s    	**z >= z <= )r  unbacked_bindingsc              3     K   | ]8  }t         j                  j                  j                  j	                  ||       : y wrg   )rY   r  r6  unbacked_renamingsr   )r}   rm  s     rk   r   z)GraphLowering.run_node.<locals>.<genexpr>  s5      3 %%88<<QB3s   >A zfailed z (inductor >= fx)
fx node is: z
new operations are:

)r  r   r   r  r   r   )rE  r    r  r   r   r  )rm  r   r   r}  )!torch._inductor.compiler_bisectorr  rA  r   r   r$   r   fetch_args_kwargs_from_envrO   r(   r  current_originsru  rY   r   r"  r#  rD   disable_subsystemrC   rt   r  r  r  r'   'triton_kernel_default_layout_constraintr   r   r@   rA   r   r/  r   r   rn   rD  SymFloatSymBoolr   rE  r   run_noder   
as_stridedr|  as_strided_as_strided_scatterresize	resize_asr   r  r   r>   r  r?  r  r  any_is_symbolicmaybe_get_strideget_stride_orderrZ  require_stride_orderrO  r]  r   r   _prims_commonis_non_overlapping_and_denser   rQ  r  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesrH   realize_hintr   mm_int_mmr   r  r   r(  _has_mkldnnr  _linear_pointwisebinarymkldnn_rnn_layeronednnqlinear_pointwiser  binary_tensor_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearri   r   r=  r;   r<   
mark_reusehas_exceeded_max_readsr=   has_large_inner_fnLoops_post_init_setattrBufferrP  MultiOutputindicesinputsr  ro   rp   r  r  r   r6  r   popvar_to_range _default_unspecified_value_rangeissubsetlowerupperr   r   r   
setdefaultr   r   r  )1r4  rj  r  r  originsis_call_functionri   rj   rh  r7  r8  r  inp_args
inp_kwargs	is_outputis_user_visibleis_input_for_as_stridedr  sym_stridesstride_orderr  denseunbacked_symbols_in_stridesrm  	num_usersr  need_fixed_layoutneed_fixed_channels_last_layoutcurrnew_unbacked_defsr  r   r  r6  r  i0rasvrr  rafvsmissingi1r  renamed_unbacked_bindingsr  r  r  r7  s1   ``                                           @@@rk   r  zGraphLowering.run_node  s   	H 	Gt||,!$//2 $.qc?44?2::1=LD&~dF33GII%%g.g	)!!!$g	) q!g	) 'HHH$4$449!<'99"K
 ()N)!((N
 'HH		 6 6 U UUBBFWWEFBB12  $H!'J)*4D)EE~E#1!#4%3A%6
'@ &(J(f (?q'R4'R6'Rf!//$GF++AxT6R&KFLzLzK{|  !* '(FF5MELL%..%--#P VVE]//44F"W-a0Fb	)!, 		))11		**22		1199		%%--		((00N DAGGDDI4#C#CCO&) *:;''* '# vvzz7?J	E  &&-..0#oo33CCWM**,7#%#6#6w#?L__AA&,WFvy1v{{BKK8  4*uu||; #">>BB1EGffUm224G&3w<!+;**A/.A%655 " "//LLuE 1':;a? 0 8! 12a7!@!@@ / 7"$"3"3"T"T"OO-u/B/B# 73w< 66%=113z"KK8 &(__%I%I & " 3 3G <.; &J &F *1'$% 0:!U\\/JPQ Q'G ' &(__%J%J &} &K &F Jqww/0I1}FI!>GG ;-D{{&;;++- "IINN??GG!IINN--55!IINN22::-)
 ;=7#-44UYY^^5O5O5W5WX 88//- %		 0 0 B B J J %		 0 0 B B I I %		 ? ? G G %		 0 0 B B J J %		 0 0 B B I I %		 0 0 B B I I %		 0 0 B B P P2 - < %		 0 0 G G O O %		 0 0 G G N N %		 0 0 H H O O %		 0 0 Q Q Y Y %		 0 0 B B J J %		 0 0 B B I I@ ;  %xx// 1eiimm6O6O6W6W5X X 1;;*;;%'__%I%I & " 3 3AFF5M4H4H4J K.2 &J &F !KK+JJ !TYYq\ 1%'__%I%I & " 3 3$B166%=CVCV$W!"&F ww(*%fkk&6&6I8NO"NN,w;-| !!#agg,/ &),1N1N1P ##% &),FKK1T{{''dI....=(Og	) g	) g	)b fi(ZR]]-S&++**BHH5  33M1EFKK,,bii8  33M1Efkk..0A0ABzKK$$))288H KK$$))<<]AN v{{//@"KK,,44!&++"2"2"9"9!"<biiH((//2EEmUVWv&&u||46<< 0 12 	@C!=!=!??	@//"5"67 	?B!<!<!>>	?		# 44= * ((22I3
 ( ;((,,R4++B/ AACLLRP) &bhh/#B"((Nrd$rxxj4IJ%bhh/#B"((Nrd$rxxj4IJ ;B/8C!D$?$??G c2**55b"=DDRH#BGGy:;);: ''+<<' 9  **AFFJJ7JB,O! %000 )3 3*//13 )% %(AA +,D1J0K L }}/ 0**9*;)<>A '[g	) g	) g	) g	) g	) g	)sr   AI".AIX4AI89AI
1S9AI,*AICAI1AI9AI"IAIIAIIAIIAI	IAI"I"AI,c                    t         j                  rt        d      t        j                  dvrt        dt        j                         y )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform )r'   disable_cpp_codegenr1   sysplatformr:  s    rk   !validate_can_generate_cpp_wrapperz/GraphLowering.validate_can_generate_cpp_wrapper)  s@    %%()BCC<<;;(+@)OPP <rl   c                   | j                   j                         }|j                  d       |j                  d       t        |      dk  s%J dj	                  dj                  |                   t        |      dk(  }|rdn|j                         | _        | j                  r| j                          t        | j                        | _        t        | j                  | j                        }|J d| j                   d       |j                  ||||      | _        | j                  r_| j                  j                  j                   | j                  _        | j                  j                  j"                  | j                  _        y y )	Nr   r   r&   zDoes not support mixing {}+r   zDevice z not supported)r   r   discardrA  formatr   r  r   r  r  r-   r   r.   r  r   r   _names_itersrc_to_kernel)r4  is_subgraphr  parent_wrapper_codepartition_signaturesr   only_cpuwrapper_code_gen_clss           rk   init_wrapper_codezGraphLowering.init_wrapper_code0  so    ((--/U#V$< A% 	
'C'J'JHH\"(
 	
% |$)$,5,2B2B2D2241$2B2BC=d.. 
 $/ 	
d&&'~6	
/ 177 	
  -1,=,=,J,J,V,VD)!!..<< +	 rl   c                    t         fddD              r1t        j                  j                  r j	                         S d _         j                         j                  }	 	 	 	 d
d}t        j                  j                  j                         }|t        t        j                  t              sy|j                   r|j                   j#                          |j$                  D cg c]  }|| }}t'        j(                  |t        j                        D cg c]
  } ||       }}nMt        t        j                  t              r j*                  nt        j                  D cg c]
  } ||       }} j,                  rddlm} t3         j4                        D 	
cg c]2  \  }	}
|
 j,                  v rt        ||	   t        j6                        r|	4 }}	}
|D ]/  }	||	   }t        |t        j6                        sJ  ||      ||	<   ~1 t        j8                  j:                  j=                         5   ||       ddd       ~d _         j>                  j#                           j@                  j#                           jB                  j#                          t        jD                  jF                  jH                  j#                          t        jD                  jF                  jJ                  j#                          tM        jN                          t        jP                  d	di      5   j	                         cddd       S  j	                         S c c}w c c}w c c}w c c}
}	w # 1 sw Y   .xY w# 1 sw Y   yxY w)zQ
        For GPU, Triton kernels are autotuned and stored as cubin files
        c              3  :   K   | ]  }|j                   v   y wrg   )r   )r}   r   r4  s     rk   r   z9GraphLowering.codegen_with_cpp_wrapper.<locals>.<genexpr>`  s     Ivv***Ir  )cudaxpuFc                .   | y t        | t        j                  t        j                  f      r| j                  j
                  S t        | t              rt        |       S t        | t        j                        sJ dt        t        |             z          | S )Nz&Unknown type when creating real inputs)rn   rt   rD  r  r   hintr   r   r   r   r  )r  s    rk   materializez;GraphLowering.codegen_with_cpp_wrapper.<locals>.materializej  sx     y##Aenn'EF vv{{*#Az2%ay()!U\\: Ds4PQ7|S:  !rl   Nr&   )clone_preserve_stridesTztriton.autotune_at_compile_time)r  z1Union[torch.SymInt, torch.SymFloat, torch.Tensor]r   zUnion[int, float, torch.Tensor]))r   r'   tritonautotune_at_compile_timecodegenr  compile_to_modulecallrt   _guardsTracingContexttry_getrn   rY   real_inputsrX   output_stridesclearparams_flatr/  chainr   r  
compile_fxr-  r   r   r   r  r  r	  r   r   r   r  r   precomputed_replacementsinv_precomputed_replacementsr)   resetpatch)r4  compiledr,  tracing_contextparamr9  r  r6  r-  r   r  r  mutated_inps   `            rk   codegen_with_cpp_wrapperz&GraphLowering.codegen_with_cpp_wrapperZ  s    III}}55 ||~% $) 11388!H!4!  #(--">">"F"F"H".zMM;8 '55'66<<> &5%@%@#! , #K # "+amm!L# $A#K #  *!--E !//!"# $A#K # &&B *343D3D)E*%C4#6#66&{3'7F *& *  2 ( '2#&6)+u||DDD+A++NC('( [[11HHJ *[)* $( $$**,''--/''--/  99??A  ==CCE\\#De"LM *<<>* * <<>!y#
##*&* ** *s0   &MM'M""7M'8	M-.M:-M7:Nc                    ddl m} t        j                  dd      5   || j                        | _         ddd       y# 1 sw Y   yxY w)z
        (Re)initializes the scheduler member.  When initializing the scheduler, no CUBIN
        files should be generated (to avoid biasing any benchmarks and pessimizing
        fusion decisions).
        r&   )	Schedulerztriton.store_cubinFN)r  rF  r'   r?  r   )r4  rF  s     rk   _update_schedulerzGraphLowering._update_scheduler  s;     	)\\.6 	8&t7DN	8 	8 	8s	   >Ac                   t        dd      5  | j                          | j                          t        j                  j                  | j                  | j                  j                         | j                  j                  |        | j                  j                          t        j	                  dt        j                  j                         t        j                  j                         }|r)|\  t!        dd fd       t!        dd	 fd
       | j                  j#                  | j$                        }| j                  j'                          |cd d d        S # 1 sw Y   y xY w)NzGraphLowering.codegenTlog_pt2_compile_eventzFFinished codegen for all nodes. The list of kernel names available: %sartifactc                     dddS )N)inductor_triton_kernel_to_post_grad_nodesjsonr  encodingrh   rh   rl   rk   r  z'GraphLowering.codegen.<locals>.<lambda>  s     K$*) rl   c                 .    t        j                         S rg   rN  dumps)
debug_infos   rk   r  z'GraphLowering.codegen.<locals>.<lambda>  s    tzz*'= rl   )metadata_fn
payload_fnc                     dddS )N*inductor_provenance_tracking_node_mappingsrN  rO  rh   rh   rl   rk   r  z'GraphLowering.codegen.<locals>.<lambda>  s     L$*) rl   c                 .    t        j                         S rg   rR  )node_mappingss   rk   r  z'GraphLowering.codegen.<locals>.<lambda>  s    tzz-'@ rl   )r   r%  rG  rY   r  draw_orig_fx_graphr$  r  r   r   push_codegened_graphr0  r  r  r.  1log_inductor_triton_kernel_to_post_grad_node_infor   generater   pop_codegened_graph)r4  provenance_inforh  rT  rZ  s      @@rk   r0  zGraphLowering.codegen  s*   1N *	""$""$GG&&t||T^^5I5IJ2248NN""$IIX00 IIK   $! !  > !!  A &&//0A0ABF113U*	 *	 *	s   EE%%E.c                   t        dd      5  |j                  | _        |j                  | _        |j                  | _        | j	                          | j
                  j                          ddd       y# 1 sw Y   yxY w)a  
        This is a more compact version of the `codegen()` above
        where we codegen this graph as a subgraph of some parent
        graph. The parent graph is passed as an argument: the
        intention is to inline codegening of the subgraph in
        the parent graph's wrapper code (including the generated
        kerenls). The wrapper code is not finalized (via `.generate()`
        call), as this will be done in the parent graph's `codegen()`.
        zGraphLowering.codegen_subgraphTrI  N)r   r   r   r  rG  r  r0  )r4  parent_graphs     rk   codegen_subgraphzGraphLowering.codegen_subgraph  sj     :RVW 	% , 9 9D*55DO+77D""$NN""$	% 	% 	%s   AA55A>c                    d}g }g }| j                   j                  D ]N  }|j                         }||z  }|j                  ||dz  f       |j                  ||j	                         f       P |||fS )Nr   r  )r  r   get_read_write_buffers_sizesr  get_estimated_runtime)r4  total_bytesnode_countsnode_runtimesr   	num_bytess         rk   count_byteszGraphLowering.count_bytes	  s    
 NN(( 	GD99;I9$Ki1n56  $(B(B(D!EF		G K66rl   zOptional[Callable[[str], None]]save_output_codec                l    t        dddd      5  | j                         cd d d        S # 1 sw Y   y xY w)NzGraphLowering.compile_to_modulecode_genT,inductor_code_gen_cumulative_compile_time_us)
phase_namerJ  dynamo_compile_column_us)r   _compile_to_moduler:  s    rk   r1  zGraphLowering.compile_to_module  s9    -!"&%S	
 	- **,	- 	- 	-s   *3c                  
 ddl m} | j                  r| j                         n| j	                         \  }t
        j                  j                  rcd| j                  j                  j                         z   | j                  j                  j                         z   dz   }|j                  z   _        t        j                  t        j                  j                         t        j                   dj                         t#        j$                         }t'        j(                  |j                         	 j*                  D cg c]  \  }}||j,                  f }}}|j/                  j                        \  }
t        j                   d
       t1        d
fd	fd
       t5        dd      5  |j7                  |
|i | j8                  | j:                        }	d d d        || _        
| _        || _         t
        jB                  r#t
        jD                  r	jG                  dd       	jH                  J tK        |	jH                         tL        j!                  d|	jH                         t        jN                  d|	jH                         t
        jP                  r(tS        d|	jH                   tT        jV                         tX        j                   j[                  |	jH                         tX        j                   j]                  t^        j`                  jc                  |	jH                        d   dz          |	S c c}}w # t2        $ r t1        dfd        w xY w# 1 sw Y   xY w)Nr&   )PyCodeCachez%"""
Compile-time auto-tuning block: 
z"""
zOutput code: 
%s)codezOutput code written to: %sinductor_output_codec                     d iS )Nfilenamerh   )paths   rk   r  z2GraphLowering._compile_to_module.<locals>.<lambda>N  s    T* rl   c                      j                   S rg   r  rw  s   rk   r  z2GraphLowering._compile_to_module.<locals>.<lambda>O      <#5#5 rl   )rV  c                      j                   S rg   r{  rw  s   rk   r  z2GraphLowering._compile_to_module.<locals>.<lambda>H  r|  rl   zPyCodeCache.load_by_key_pathTrI  )linemapattrs)timesrepeatzCompiled module path: )filer   z.debug)2	codecachert  r  rD  r0  r'   r.  r/  r   kernel_autotune_defsgetvaluekernel_autotune_callsr  r   rl  rc   r  rK   inductor_meta_from_configrL   begin_compileline_mapstack_tracewriter   r0  r   load_by_key_pathr   r   r  r  r   benchmark_harnessprofile_bandwidth_outputbenchmark_compiled_module__file__re   r  r  benchmark_kernelprintr  stderrrY   output_coder   osry  splitext)r4  rt  rH  tuning_codeinductor_metaline_nor   r~  r  modry  r   s             @@rk   rr  z GraphLowering._compile_to_module%  s   *
 04/?/?D))+T\\^ 	a ==117##88AACD ##99BBDE 	  "-|/A/A!AL))5**<+=+=>1<3E3EF&@@B**=|?Q?QR	 &2%:%:!GT $**+G  $)),*<*<=IC!!">E &*5
 8PTU 	..DD4+C+CD	 / C	 $##(G(G))!)< ||'''%		.=93<<H""*3<<.9

K	CLL)	RWW%%cll3A6AB
Y  	&5
 		 	s*   :M 	M	!7M 9.M-	M M*-M7c                   g }t        j                  d      }t        j                  d      }| j                  D ]  }t        |t        j
                        r*|j                  | j                   dt        |              Gt        |t        j                        r*|j                  | j                   dt        |              |j                  |j                                 |S )Nr   _none_shape)r/  r0  r   rn   r(   NoneAsConstantBufferr  r  r  rY  rc  )r4  namesshape_counternone_counterr   s        rk   get_output_nameszGraphLowering.get_output_namesl  s    !* q)&& 	.D$ 7 78		{%\0B/CDED"":":;		{&m1D0EFGT]]_-	. rl   c                .   || j                   j                         v xrh | j                   |   j                         dk(  xrF t        | j                   |   j	                               dk(  xr t        | j                   |         dk(  xs || j                  v S )Nr&   r   r   )r   r   r  rA  rQ  r8   r   r  s     rk   is_unspec_argzGraphLowering.is_unspec_argy  s     D%%**,, B!!$'113q8BD%%d+44671<B   1 1$ 78EA	3
 T222	3rl   )NNNFFNNFFFNNNNNN)$r5  torch.fx.GraphModuler   zOptional[Sequence[object]]r6  zOptional[ShapeEnv]r  zOptional[int]r  r}  r  r}  r   zOptional[bool]r  z4Optional[Callable[[list[ir.ExternKernelNode]], Any]]r   r}  r   r}  r   r}  r   zOptional[dict[str, int]]r   Optional[str]r   r  r   zOptional[GraphLowering]r  r  r   zOptional[Sequence[int]]r   r  )r   r  )rF  torch.Tensorr   z=tuple[Sequence[Union[int, Expr]], Sequence[Union[int, Expr]]])rF  r  r   z)tuple[list[sympy.Expr], list[sympy.Expr]])r   zOUnion[ir.TensorBox, ir.StorageBox, ir.Buffer, WorkspaceArg, ir.TorchBindObject]r   zSequence[Expr])r   z2Union[ir.Buffer, WorkspaceArg, ir.TorchBindObject]r   r   )r   z.Union[torch._inductor.ir.IRNode, device, None]rY  r*   r   r}  )r   torch.device)r   r  r   Iterator[None]r  )r5  r_   r   r}  r   r}  )r  r   r   r   )r5  r  r   zlist[torch.Tensor]r  r   r   r  )r   zOrderedSet[Node])r  r   r   r  )r   r  r   r  )r   z,torch._subclasses.fake_tensor.FakeTensorMode)r  r   r   z<Optional[Union[ir.TensorBox, ir.Buffer, ir.TorchBindObject]])r  
sympy.Exprr   r  )r  r   r   z2Union[ir.TensorBox, ir.Buffer, ir.TorchBindObject])r  r   r   ztorch.dtype)r  r   r   zUnion[int, Expr])ri   r   r   r   )r   zir.Operationr   r   )r  z	ir.Bufferr  r}  r   r   )r  	list[str]r   r   )r  r  r   r  )r  r   r   r  )r  r  rO  zUnion[Tensor]r   r   rg   )rO  r   r  r  r   r>   )r  r   r  zOptional[torch.device]r   r   )r   r   ri   tuple[object]rj   dict[str, object]r   zUnion[Expr, TensorBox, None])r   r   ri   r   rj   dict[str, Any]r   r   )r>  r  r   r}  )r   r   ri   z	tuple[()]rj   r  r   z8Union[Constant, TensorBox, ir.Subgraph, TorchBindObject])r   r   ri   r   rj   r   r   r   )r   r   ri   r  rj   r  r   r  )r   r   )r   r  )rl  r   r7  
tuple[Any]r8  r  r  r  r  r  r   r  )rj  r   r   objectFNNN
r   r}  r  r  r!  zOptional[PythonWrapperCodegen]r"  z!Optional[GraphPartitionSignature]r   r  )r   z)tuple[ValueWithLineMap, ValueWithLineMap])rb  r   r   r  )r   zVtuple[int, list[tuple[BaseSchedulerNode, int]], list[tuple[BaseSchedulerNode, float]]])r   r]   )r   r  )r  r   r   r}  )A__name__
__module____qualname____annotations__r   r9  rK  rM  rR  rW  rZ  r]  
contextlibr   r_  rd  staticmethodr   r  r  r  r  r  propertyr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r?  rJ  rO  rR  r   rg  ru  rx  r/  r  r  r%  rD  rG  r0  rc  rk  rl  r1  rr  r  r  __classcell__r7  s   @rk   r   r   
  s   ""
 6:(,"&!%) "!$7;,0+/04"37)k2 k2 3k2 &	k2
  k2 k2 k2 #k2!
k2 k2 k2 k2 5k2  *!k2" )#k2$ .%k2& 'k2( 1)k2* 
+k2Z1# # 	F# J	2#
#
 
#$EFE	EM>M  M 
	M4 ( ( a aF
 
 +
 	

 

60dA
D  	E$FQQ	;Q8,
8& FK (
@	
 !)6	@ 37

"/
	
 `` ` "	`
 
&`DVp 5 5/7/7 /7 "	/7
 
B/7b[
[
 [
 "	[

 
[
z  $ $ $ $H:H: H: #	H:
 H: #H: 
H:T{zQ "'+>BBF(( %( <	(
 @( 
(Tc"	2c"J	8+Z%$7
7" 9=5<-EN3rl   r   c                  L     e Zd ZdZd fdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 d fdZ xZS )r  z
    Mostly a helper class for the subgraph lowering. The main goal is to call
    init_wrapper_code with the subgraph related arguments.
    c                2    || _         t        |   |i | y rg   )r  r   r   )r4  r  ri   rj   r7  s       rk   r   zSubgraphLowering.__init__  s    $)&)rl   c                f    t         |   d| j                  | j                  j                         y )NT)r   r  r!  )r   r%  r  r  r   )r4  r   r  r!  r"  r7  s        rk   r%  z"SubgraphLowering.init_wrapper_code  s.     	!)) $ 8 8 	" 	
rl   )r  r   ri   r   rj   r   r   r  r  r  )r  r  r  __doc__r   r%  r  r  s   @rk   r  r    sV    
* "'+>BBF

 %
 <	

 @
 

 
rl   r  )ri   r   rj   r   r   r  )ry   r  r   zOptional[torch.dtype])r   r   r   r}  )r   r_   r   r   r   z1Union[Tensor, torch._C.ScriptObject, GraphModule])r   r`   r   dict[Node, tuple[int, ...]])r   r`   r   r  r   r  )
__future__r   r  r'  r/  rN  loggingr"  r  r  r  r  collectionsr   r   typingr   r   r   r	   r
   r   ro   r   rt   torch._loggingtorch.fxr   r   torch._decompr   torch._dynamo.utilsr   r   "torch._library.fake_class_registryr   r   r   torch._prims_commonr   r   torch._subclasses.fake_tensorr   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r   r   r    r!   torch.fx.noder"   torch.utils._mode_utilsr#   torch.utils._ordered_setr$   torch.utils._sympy.numbersr%   r   r'   r(   r)   codegen.commonr*   r+   r,   r-   r.   r/   r0   excr1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   rH  r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   runtimerK   runtime.autotune_cacherL   r   rM   r  rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   virtualizedrX   rY   collections.abcrZ   r[   r\   typesr]   torch._higher_order_ops.effectsr^   r_   torch.fx.graphr`   codegen.wrapperra   r  rb   torch._inductor.codecacherc   	getLoggerr  r  _logginggetArtifactLoggerr  r  r   r0  r  r  torch._inductor.fb.utilsre   rz   r   r   r   r   rB  Interpreterr   r  rh   rl   rk   <module>r     s   "       	 	 
  # % J J        , 4 ? 7 5 ? L    / / - ! !          $ 8 &   ( << ;$$5, 5 g!00<Hyy~~*9??, 68(
	!6
J/J/+FJ/	J/Zw3EHH(( w3t;
} 
rl   