
    Vh&V                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZmZ d dlmZmZ d dlmZ d dlmZ d dl	mZmZmZmZmZmZmZmZmZ d dlmZmZmZ d d	l m!Z! d dl"Z"d d
l"m#Z#m$Z$m%Z% d dl&m'c m(c m)Z* d dl+m,c m-Z. d dl/Z0d dl1Z0d dl2m-c m3Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z>m?Z?m@Z@mAZAmBZB d dlCmDZD d dlEmFZFmGZGmHZHmIZImJZJmKZK d dlLmMZM d dlNmOZOmPZPmQZQ d dlRmSZS ddlTmUZUmVZV ddlWmXZXmYZYmZZZ ddlVm[Z[m\Z\m]Z]m^Z^m_Z_ ddl`maZa ddlbmcZcmdZdmeZemfZf ddlgmhZh ddlimjZjmkZk ddl-mlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZtmuZumvZvmwZwmxZxmyZymzZzm{Z{ ddl|m}Z}m~Z~mZ erd dlmZ dd lmZ dd!lmZ dd"l-mZ neZd#ed$<   	 d dlZej                  Zd%Z ed'      Z ed(      Z ed)      Zeee#f   Zd#ed*<   eeee#f   Zd#ed+<    ej*                  e      Z ej$                  ej0                  d,-      Ze0j                  j2                  Z	 eed.eed.f   d/d0eeeeeed.f   d.d/d0f         f   Zd#ed1<    ej:                  d%2       G d3 d4             Zdd5Zdd6Zd	d7Zd	d8Z	 	 	 	 	 	 d
d9Zg d:Zg d;Z	 d	 	 	 	 	 dd<Zdd=Z	 d	 	 	 	 	 dd>Zeddd?       Zeddd@       Z	 d	 	 	 	 	 ddAZ	 	 	 	 ddBZ	 	 	 	 ddCZddDZddEZddFZ	 	 	 	 	 	 	 	 ddGZ	 	 	 	 	 	 ddHZ G dI d0      Z etd&2       G dJ dK             Zet G dL dMe             ZddNZet G dO dPe             Zet G dQ dRe             Z edS       edT       edU       edV       edW       edX      dYZdZed[<   	 d	 	 	 	 	 	 	 dd\Zet G d] d^e             Zeee#   ee#   ge~f   Z G d_ d`e      Z G da dbe      Z G dc dde      Zet G de dfe             Zet G dg dhe             Zet G di dje             ZddkZddlZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddmZ	 	 	 	 	 	 ddnZet G do dpe             Zet G dq dreū             Zet G ds dteū             Zet G du dveū             Zet G dw dxeū             Zet G dy dzeɫ             Zet G d{ d|eū             Zet G d} d~eū             Z G d deʫ      Zet G d de             Zet G d deΫ             Zet G d deΫ             Z	 	 	 	 	 	 ddZѐddZ G d d      Zet G d deӫ             Z G d deԫ      Z G d deԫ      Z G d deԫ      Z G d de      Z G d deի      Zet G d deӫ             Z G d deԫ      Z etd&2       G d de             Z etd&2       G d dee             Z G d deܫ      Z G d deޫ      Z G d deޫ      Zet G d de             Zet G d de             Z etd&2       G d deݫ             Z G d deݫ      Z G d de      Zeeeeeeeeeeef      f   Z G d d      Z G d de      Z G d de      Z G d de      Z G d de      Z etd&2       G d deݫ             Z G d de      Z G d de      Z etd&2       G d de             Z etd&2       G d de             Z G dÄ de      Z G dń de      Z G dǄ deܫ      Z G dɄ de      Z G d˄ de      Z G d̈́ de      Z G dτ de      Z G dф de      Z G dӄ de      Z G dՄ de      Z G dׄ de      Z G dل de      Z G dۄ de      Z G d݄ de      Z  G d߄ de      Z etd&2       G d d             Z G d de      Z etd&2       G d de             Zet G d deӫ             Z G d de      Zej:                   G d de             Z G d d.e      Z G d de      Z	 etd&2       G d de             Z
d dZ etd&2       G d de             Z etd&2       G d de             Z	 	 	 	 d!dZ etd&2       G d de             Z G d de      Z G d de      Zet G d de             Zet G d  de             Z G d de      Z G d de      Zd"dZy# e$ r dZd&ZY ,w xY w(#      )annotationsN)	GeneratorIterableSequence)AbstractContextManagernullcontext)Enum)partial)	AnyCallableClassVarLiteralOptionaloverloadTYPE_CHECKINGTypeVarUnion)assert_neverNever	TypeAlias)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metrics)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)compute_unbacked_bindingsfree_unbacked_symbolsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureget_scheduling_for_deviceindex_prevent_reordering)Depextract_free_unbacked_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningget_kernel_metadatair_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_subs)opsOpsValueV)Node)CUDATemplate)GraphLowering)IndentedBufferr   rX   TF_T_U_V_IntLike_NumLikez  prefix	TensorBoxr   IRNode_NodeOrNodes)frozenc                  6    e Zd ZU ded<   ded<   ded<   ded<   y	)
GraphPartitionSignaturez5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraphN__name__
__module____qualname____annotations__     B/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/ir.pyrg   rg      s     GF ('rt   rg   c                "    dfd |        y )Nc                   | y t        | t        t        f      r| D ]
  } |        y t        | t              r| j	                         D ]
  } |        y t        | t
        t        t        t        t        j                  j                  j                  t        t        t        t         f	      sJ dt#        |        d       y )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])
isinstancelisttupledictvalues
ExpandViewDynamicScalarAssertScalarrb   sympylogicboolalgBooleanr   intEffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     ru   r   z%validate_ir.<locals>._check_tensorbox   s     =e}- ' &'t$ ' &' ! KK''//#)
  e%jk rt   )r   Optional[_NodeOrNodes]returnNoners   )node_or_nodesr   s    @ru   validate_irr      s    < ]#rt   c                8     t         t              sJ d fd}|S )Nc                 0     t        t              | i |S N)getattrrT   )argskwargsnames     ru   fnzops_wrapper.<locals>.fn   s    !wsD!42622rt   )r   objectr   r   r   rU   )rx   str)r   r   s   ` ru   ops_wrapperr      s    dC   3 Irt   c           
     b    t        t        | t        t        |                         dfd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r   lenrange)indexi	inv_orders     ru   reindexz inverse_reorder.<locals>.reindex   s?    5zS^+++-23u:->?il#???   Ar   Sequence[_T]r   r   )r{   zipr   r   )orderr   r   s     @ru   inverse_reorderr      s*    Sc%j 123I@ Nrt   c                     d fd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r   r   )r   r   r   s     ru   r   zsame_reorder.<locals>.reindex   s>    5zSZ''').s5z):;AeAh;;;r   r   rs   )r   r   s   ` ru   same_reorderr      s    < Nrt   c                     d fd}|S )Nc                       |             S r   rs   )r   reindex1reindex2s    ru   r   z fuse_reindexing.<locals>.reindex  s    ((rt   )r   r   r   zSequence[_V]rs   )r   r   r   s   `` ru   fuse_reindexingr     s    ) Nrt   )   r      r1   )   r   r   r   r1   c                <    |t        |       }|S t        ||       }|S )z1
    Convert strides to fill order (argsort)
    )rD   rE   )seq	shape_env
sorted_idxs      ru   get_fill_orderr     s/     $+CL
  !C0
rt   c                    t        |       D ci c]  \  }}||
 }}}t        t        |             D cg c]  }||   	 }}|S c c}}w c c}w )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    )	enumerater   r   )r   idxposlookupr   
fill_orders         ru   stride_order2fill_orderr     sR     (1'7883c3h8F8%*3u:%67&)7J7 97s
   AAc                    t        | |      }t        t        |             D cg c]  }d }}t        |      D ]
  \  }}|||<    |S c c}w )z)
    Convert strides to stride order
    r   )r   r   r   r   )r   r   r   _outr   elems          ru   get_stride_orderr   *  sV     !/sI >JCHo
&1
&C
&Z( 4D	J 's   	Ac                     y r   rs   xguard_shapes     ru   ir_node_to_tensorr   7  s    KNrt   c                     y r   rs   r   s     ru   r   r   ;  s    LOrt   c                   | y |s%t         j                  j                  j                  }nt        }| j                         D cg c]
  } ||       }}t        |       r.| j                         j                  D cg c]
  } ||       }}nt        j                  |      }| j                         }| j                         }t        |      }t        |      }t         j                  j                  j                  j                         5  t!        j"                  ||||      j%                         }d d d        |S c c}w c c}w # 1 sw Y   S xY w)N)sizestridedtypedevice)rV   graphsizevars	size_hintr   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerI   r   suppress_guardstorchempty_stridedzero_)	r   r   shape_fnsr   r   r   r   ts	            ru   r   r   ?  s    	y 77##--!".AHQK.D.Q'(||~'<'<=!(1+==2248KKME\\^F"4(D$V,F	
			#	#	3	3	5 fE&

%' 	
 H / > Hs   D<;E
(EEc                0    t        | t              r| sd gS | S r   )rx   ry   values    ru   may_convert_to_optionalr   [  s     %u vLrt   c                    t        | t              s| | S t        | t        j                        r| j                  S t        | t
        t        f      rt        | j                               S t        d|  dt	        |       j                   d       y )Nzget_device_type(: ))rx   r   r   r   r   rc   
OutputSpecget_device_typer   r   ro   r   s    ru   r   r   e  sq     !SQY	Au||	$vv	A
+	,q||~..#A3ba)9)9(:!<=rt   c                    t        |       }|dv rt        t        | d      dk(  ryy|t        |      x}yddlm} t        |t              sJ t        ||      S )N)cpucuda_backendtritonTFr1   )TritonScheduling)	r   r   r2   r5   codegen.tritonr   rx   r   
issubclass)r   r   device_schedulingr   s       ru   	is_tritonr   q  sn    QF  6fXX./8;!:6!BBK0'...')9::rt   c                    t        |       dk(  S )Nr   )r   r   s    ru   is_cpur     s    1&&rt   c           	         t         t              r j                         yt         fdt	        t         j                               dz
        D              }t        j                  j                  j                   j                         d         dk(  xs= t        j                  j                  j                   j                         d         dk  }|xr |S )NFc              3     K   | ]D  }t         j                  j                  j                  j	                         |         z  d k(   F yw)r   N)rV   r   r   r   
get_stride).0r   	alignmentr   s     ru   	<genexpr>z-is_aligned_realized_tensor.<locals>.<genexpr>  sC       
			#	#ALLN1$5	6	BqHs   A
Ar1   )rx   rc   maybe_get_strideallr   r   r   rV   r   r   r   r   )r   r   aligned_stridesaligned_last_dims   ``  ru   is_aligned_realized_tensorr    s    a A$6$6$8$@ s1<<>*Q./ O 	
""1<<>"#56!; 	=77%%ajjl2&671<  //rt   c                   t        |      t        |       k(  rt        |       t        |      k(  sJ t        || |      D ]  \  }}}t        j                  j                  j                  |d      r2t        j                  j                  j                  ||      r]t        j                  j                  j                  |      t        j                  j                  j                  |      k(  r y y)zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    r1   FT)r   r   rV   r   r   statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         ru   significant_strides_equalr    s     u:X&3x=CM+III5(H5 	R7700a8ww77
''""0048H8H8V8V9
 
 	 rt   c                D   t        |       s| S t        d t        || j                               D              r| S t	        || j                         | j                               s| S t        |       \  }}g |j                  }t        | j                               D ]8  \  }}t        j                  j                  j                  |d      s1||   ||<   : t        |j                  |j                  |j                   ||j"                        }t%        t'        ||            S )a  
    Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
    dimensions - size 0 or 1 - will be updated.

    If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
    c              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wr   rV   r   r   r  r   r  r  s      ru   r   z2try_match_insignificant_strides.<locals>.<genexpr>  s3      B 	
00R8   79r1   datalayout)r   r   r   r   r  r   as_storage_and_layoutr   r   rV   r   r   r  FixedLayoutr   r   r   offsetrb   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           ru   try_match_insignificant_stridesr!    s    !(
 '6#4#4#67  $Wf.?.?.A6??CTU/7GZ%:$$%J&//+, '17700A6#AJJqM' J _'*EFFrt   c                     e Zd ZU  e       Zded<    ej                  d      Zded<    ej                  d      Z	ded<    ej                  d      Z
d	ed
<   eej                  dCd              ZdDdZdEdZdFdZdGdZdHdZdIdZdJdKdZ	 dL	 	 	 	 	 	 	 dMdZdNdZdOdZdPdZdQdZdRdZdSdZdTdZdUdZdVdZe dWd       Z!dXdZ"dTdZ#dYd Z$dZd[d"Z%d\d#Z&d]d$Z'dTd%Z(d^d&Z)d_d'Z*d`d(Z+dVd)Z,dad*Z-dYd+Z.dZdbd,Z/dcd-Z0dEd.Z1ddd/Z2dEd0Z3	 de	 	 	 	 	 dfd1Z4dgd2Z5dhd3Z6	 de	 	 	 	 	 did4Z7djd5Z8dkd6Z9dld7Z:dmd8Z;dnd9Z<dYd:Z=dod;Z>dTd<Z?dTd=Z@dpd>ZAdqd?ZBdad@ZCdqdAZDeEr
e dNdB       ZFy!y!)rrc   zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodec              #     K   t         j                  }|| z  t         _        	 d  |t         _        y # |t         _        w xY wwr   )rc   r#  )r&  olds     ru   current_originszIRNode.current_origins  s7      %%"%-	*&)F#cF#s   A2 A?Ac                2    t         j                  | ||       y r   )r   __setattr__)selfattrr   s      ru   _post_init_setattrzIRNode._post_init_setattr  s     	4u-rt   c                    | j                  dt        | j                               | j                  dt        j                  rt        j                         nd        | j                  dd        y )Nr&  r(  r*  )r2  r,   r#  r2   debug_ir_tracebackr(  format_stackr0  s    ru   __post_init__zIRNode.__post_init__  sV    	:d6K6K+LMV5N5N//1TX	
 	t4rt   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr   r   r   deps     ru   r   z(IRNode.get_read_names.<locals>.<genexpr>       ?s#((?   r,   	get_readsr6  s    ru   get_read_nameszIRNode.get_read_names      ?dnn.>???rt   c                    | j                   S r   )r(  r6  s    ru   get_tracebackzIRNode.get_traceback  s    ~~rt   c                    | j                   S r   r*  r6  s    ru   get_origin_nodezIRNode.get_origin_node      rt   c                     y r   rs   r6  s    ru   get_defining_opzIRNode.get_defining_op      rt   c                X    dt        | dd       }|rt        |      dkD  r|d d  d}|gS )Nzorigins=r&   @   =   z...)r   r   )r0  shortenr&  s      ru   common_reprzIRNode.common_repr  s@    WT9b9:;s7|b( "c*Gyrt   c                $   t        |      t        | j                  |            z   }t        t        t        |            }|r5t	        dj                  |            }t        |       j                   d| dS t        |       j                   d| dS )Nz,
z(
z
)(r   )ry   rQ  mapr   indentjoinr   ro   )r0  linesrP  	multiline	new_liness        ru   
str_helperzIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44rt   c                    | j                   S r   r   r6  s    ru   r   zIRNode.get_dtype      zzrt   c                B    	 | j                         S # t        $ r Y y w xY wr   )r   NotImplementedErrorr6  s    ru   maybe_get_dtypezIRNode.maybe_get_dtype  s&    	>>##" 		    	c                2    t        dt        |        d      )Nz#get_layout() is not implemented by !r_  r   r6  s    ru   r   zIRNode.get_layout  s    !$GT
|ST"UVVrt   c                B    	 | j                         S # t        $ r Y y w xY wr   )r   r_  r6  s    ru   maybe_get_layoutzIRNode.maybe_get_layout  &    	??$$" 		ra  c                "    | j                         S r   )r   r6  s    ru   get_output_speczIRNode.get_output_spec%  s      rt   c                B    	 | j                         S # t        $ r Y y w xY wr   )ri  r_  r6  s    ru   maybe_get_output_speczIRNode.maybe_get_output_spec(  s(    	''))" 		ra  c                >    t        | j                         t              S )z4True for single tensor output (excludes MultiOutput))rx   rk  Layoutr6  s    ru   has_tensor_outputzIRNode.has_tensor_output.  s    $446??rt   c                2    t        dt        |        d      )Nz!get_size() is not implemented by rc  rd  r6  s    ru   r   zIRNode.get_size2  s    !$Ed4j\QR"STTrt   c                B    	 | j                         S # t        $ r Y y w xY wr   )r   r_  r6  s    ru   maybe_get_sizezIRNode.maybe_get_size5  %    	==?"" 		ra  c                "    | j                         S r   r   r6  s    ru   r
  zIRNode.shape;  s    }}rt   c                4    t        | j                               S r   )rR   r   r6  s    ru   	get_numelzIRNode.get_numel?  s    T]]_--rt   c                    t         j                  j                  j                  t	        j
                  | j                         d            S Nr   rV   r   r   is_expr_static_and_truer   Eqrv  r6  s    ru   is_zero_elementszIRNode.is_zero_elementsB  0    ww77AQST8UVVrt   c                0    t        dt        |              )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on rd  r6  s    ru   realizezIRNode.realizeE  s      "ODJ<"@AArt   Nc                0    t        dt        |              )Nzcodegen_reference NYI on rd  r0  writers     ru   codegen_referencezIRNode.codegen_referenceW  s    !$=d4j\"JKKrt   c                     y r   rs   r6  s    ru   r   zIRNode.get_deviceZ  rK  rt   c                .    | j                         }|J |S r   )r   r0  r   s     ru   get_device_or_errorzIRNode.get_device_or_error]  s    "!!!rt   c                     yNFrs   r6  s    ru   has_exceeded_max_readszIRNode.has_exceeded_max_readsb      rt   c                >    t        t        |       j                        r   r_  r   ro   r6  s    ru   make_loaderzIRNode.make_loadere      !$t*"5"566rt   c                >    t        t        |       j                        r   r  r6  s    ru   make_indexerzIRNode.make_indexerh  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   r   zIRNode.get_stridek  r  rt   c                B    	 | j                         S # t        $ r Y y w xY wr   )r   r_  r6  s    ru   r   zIRNode.maybe_get_striden  rg  ra  c                >    t        t        |       j                        r   r  r6  s    ru   get_namezIRNode.get_namet  r  rt   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  r_  r6  s    ru   maybe_get_namezIRNode.maybe_get_namew  rr  ra  c                     yr  rs   r0  	thresholds     ru   has_large_inner_fnzIRNode.has_large_inner_fn}  r  rt   c                     y r   rs   r0  userss     ru   
mark_reusezIRNode.mark_reuse      rt   c                     y r   rs   r6  s    ru   realize_hintzIRNode.realize_hint  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   unwrap_viewzIRNode.unwrap_view  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   freeze_layoutzIRNode.freeze_layout  r  rt   c                >    t        t        |       j                        r   r  r0  r   allow_paddings      ru   freeze_layout_with_stride_orderz&IRNode.freeze_layout_with_stride_order       "$t*"5"566rt   c                >    t        t        |       j                        r   r  r0  r   s     ru   freeze_layout_with_fill_orderz$IRNode.freeze_layout_with_fill_order  r  rt   c                >    t        t        |       j                        r   r  r0  r   s     ru   freeze_layout_with_same_orderz$IRNode.freeze_layout_with_same_order  r  rt   c                >    t        t        |       j                        r   r  r0  exact_stridesr  s      ru    freeze_layout_with_exact_stridesz'IRNode.freeze_layout_with_exact_strides  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_read_writeszIRNode.get_read_writes  r  rt   c                6    | j                         j                  S r   r  readsr6  s    ru   r@  zIRNode.get_reads      ##%+++rt   c                4    t        | j                               S r   )r   r@  r6  s    ru   	num_readszIRNode.num_reads  s    4>>#$$rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_storage_numelzIRNode.get_storage_numel  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_unbacked_symbol_useszIRNode.get_unbacked_symbol_uses  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_reduction_typezIRNode.get_reduction_type  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_reduction_sizezIRNode.get_reduction_size  r  rt   c                     yr  rs   r6  s    ru   	is_externzIRNode.is_extern  r  rt   c                     yr  rs   r6  s    ru   is_no_opzIRNode.is_no_op  r  rt   c                >    t        t        |       j                        r   r  r  s     ru   constant_to_devicezIRNode.constant_to_device  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_mutation_nameszIRNode.get_mutation_names  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_operation_namezIRNode.get_operation_name  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   get_inputs_that_alias_outputz#IRNode.get_inputs_that_alias_output  r  rt   c                     y r   rs   r6  s    ru   r   zIRNode.dtype  s    (+rt   )r&  zOrderedSet[Node]r   zGenerator[None, None, None])r1  r   r   r   r   r   r   r   r   OrderedSet[str])r   r'  r   r)  r   zOptional[Operation]T)rP  rl   r   Sequence[str])TT)rW  zSequence[object]rP  rl   rX  rl   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   rm  )r   zOptional[Layout]r   r   )r   zOptional[OutputSpec]r   rl   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r   r   Optional[str]r   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r  Optional[int]r   rl   r  r   r   r   r   rc   Fr   	list[int]r  rl   r   r   r   r  r   r   r   list[_IntLike]r   r   r  r  r  rl   r   r   r   zdependencies.ReadWritesr   zOrderedSet[Dep]r   r   r   r^   r   OrderedSet[Symbol]r   Sequence[sympy.Expr]r   r  r   rc   r   r  )Gro   rp   rq   r,   r#  rr   dataclassesfieldr&  r(  r*  staticmethod
contextlibcontextmanagerr-  r2  r7  rA  rD  rG  rJ  rQ  rZ  r   r`  r   rf  ri  rk  rn  r   rq  propertyr
  rv  r|  r  r  r   r  r  r  r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r@  r  r  r  r  r  r  r  r  r  r  r  r   r   rs   rt   ru   rc   rc     s   2<,/>  1{00e<G_<%6[%6%6E%BI"B+<;+<+<%+HK(H*  *.5@  PT	5%	504	5HL	5		5W!@U  .WB$L
777777 7<77/37	7
77 DI7+7<@7	7
7,%77777777 	+ 
+ rt   c                      e Zd ZddZddZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZddZddZy)	Operationc                    d | _         y r   operation_namer6  s    ru   r7  zOperation.__post_init__  s
    -1rt   c                    t         r   r_  r6  s    ru   r   zOperation.get_device      !!rt   c                6    t        | d      sJ | j                  S Nr*  )hasattrr*  r6  s    ru   rG  zOperation.get_origin_node  s    t]+++rt   c                6    t        | d      sJ | j                  S )Nr&  )r  r&  r6  s    ru   get_originszOperation.get_origins  s    tY'''||rt   c                6    | j                   J | j                   S r   r  r6  s    ru   r  zOperation.get_operation_name  s     ""..."""rt   c                     yr  rs   r6  s    ru   r  zOperation.is_extern  r  rt   c                     yr  rs   r6  s    ru   r  zOperation.is_no_op  r  rt   c                    t         r   r  r6  s    ru   r  zOperation.get_read_writes  r  rt   c                &    || j                         v S r   )rA  )r0  r   s     ru   
is_user_ofzOperation.is_user_of  s    t**,,,rt   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr   r:  r;  s     ru   r   z+Operation.get_read_names.<locals>.<genexpr>  r=  r>  r?  r6  s    ru   rA  zOperation.get_read_names  rB  rt   c                6    | j                         j                  S r   r  r6  s    ru   r@  zOperation.get_reads  r  rt   c                    t         r   r  r6  s    ru   get_outputszOperation.get_outputs  r  rt   c                    t               S r   r+   r6  s    ru   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs  
    |rt   c                    t               S )a  
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.
        r+   r6  s    ru   r  z"Operation.get_unbacked_symbol_uses  s     |rt   c                     y)z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   rs   r6  s    ru   get_workspace_sizezOperation.get_workspace_size  s    
 rt   Nr  r  r  )r   r%  r  r  r  )r   r   r   rl   r  r  r   list[Buffer]r   zOrderedSet[sympy.Symbol]r  )ro   rp   rq   r7  r   rG  r  r  r  r  r  r  rA  r@  r#  r%  r  r)  rs   rt   ru   r  r    sN    2" #"-@,""rt   r  c                  0    e Zd ZU ded<   ded<   ded<   ded<   dd	Zdd
Zd  fdZd!dZeZd"dZ	d#dZ
d$dZd$dZed%d       Zeej"                  fd&d       Zed'd       Zd(dZed!d       Zd)d*dZddZd+dZd,dZd-dZd.dZd/dZd0dZ xZS )1Loopsr  r   r  r   Callable[..., Any]inner_fnr  rangesc                x     t               j                  g d | j                  D        | j                          S )Nc              3  2   K   | ]  }t        |        y wr   r&   r   es     ru   r   z1Loops.get_unbacked_symbol_uses.<locals>.<genexpr>  s     <1#A&<   )r,   unionr1  inner_fn_free_unbacked_symbolsr6  s    ru   r  zLoops.get_unbacked_symbol_uses  s;    !z|!! 
<<
//1
 	
rt   c                   | j                  d| j                  j                   dt        | j                        | j                         g|D cg c]  }| dt        | |        c}z   d| j                  gz         S c c}w )N'=origin_node=)rZ  r   r   r   r   inner_fn_strr   r*  )r0  namesr   s      ru   _to_strzLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA4$qt,-.AB d..1234
 	
 Bs   A?
c                "    t         |           y r   )superr7  r0  	__class__s    ru   r7  zLoops.__post_init__&  s    rt   c                $    | j                  d      S )Nr1  r@  r6  s    ru   __str__zLoops.__str__)  s    ||K((rt   c                    | j                   S r   r   r6  s    ru   r   zLoops.get_device.      {{rt   c                    | j                   S r   rF  r6  s    ru   rG  zLoops.get_origin_node1  rH  rt   c                    | j                   S r   rF  r6  s    ru   r   zLoops.get_size4  rK  rt   c                    | j                   S r   rF  r6  s    ru   get_pointwise_sizezLoops.get_pointwise_size7  rK  rt   c                    |j                  dd       }|j                  dd       } | |i |}|j                  d|       |j                  d|xs |j                         t        j	                  |      S )Nr*  r(  )popr2  r(  rb   create)clsr   r   r*  tbrs         ru   rR  zLoops.create:  so    jj5ZZT*    	
]K8	["*;<""rt   c                    t        |       D cg c]0  \  }}|dk(  rt        j                  j                  nt	        ||      2 c}}S c c}}w Nr1   )r   r   SZerorQ   )r1  ra   nr   s       ru   _indexzLoops._indexH  sH     "&)
1 FEGGLL(Fvq(QQ
 	
 
s   5Ac                `   t        t        j                               }t        j                  |      5  t	        j
                  t        dd      5   | j                  | j                           |j                         cd d d        cd d d        S # 1 sw Y   nxY wd d d        y # 1 sw Y   y xY wNallow_indexingT)
r=   rV   MockHandlerset_ops_handlerr   r   r   r0  inner_fn_argsgetvalue)r0  	opcounters     ru   inner_fn_opcountzLoops.inner_fn_opcountO  s     1	i(	(LL)94@	( DMM4--/0%%'	( 	( 	( 	( 	( 	( 	(s#   B$-B<	B$B	B$$B-c                :    | j                  | j                        fS r   )r[  r1  r6  s    ru   ra  zLoops.inner_fn_argsY  s    DKK(**rt   c                r    t        j                  j                  | j                  g| j	                          S r   )rV   KernelFormatterHandlerir_to_stringr0  ra  r6  s    ru   r>  zLoops.inner_fn_str\  s3    ''44MM
 ..0
 	
rt   c                x    |d}t        |t        j                        }| j                         j                  |kD  S rx  )maxr2   realize_opcount_thresholdrd  num_opsr  s     ru   r  zLoops.has_large_inner_fnb  s9    I	6#C#CD	$$&..::rt   c                d    | j                  | j                        }t        | j                  |      S r   )r[  r1  r8   r0  )r0  r   s     ru   r9  z$Loops.inner_fn_free_unbacked_symbolsh  s%    DKK(,T]]EBBrt   c                |   t        j                  t        dd      5  | j                         rJt	        | j                         | j                         | j                               j                  cd d d        S t	        | j                         | j                               j                  cd d d        S # 1 sw Y   y xY wr]  )	r   r   r   r  r:   r  r   r  r  r6  s    ru   r@  zLoops.get_readsl  s    \\.*:DA 	&&(*$$&MMO++- %	 	 +$$&MMO %	 	 	s   AB271B22B;c                H    t        | j                         j                        S r   )r,   rd  read_buffersr6  s    ru   rA  zLoops.get_read_namesz  s    $//1>>??rt   c                H    t        | j                         j                        S r   )r   rd  rp  r6  s    ru   r  zLoops.num_reads}  s    4((*7788rt   c                2    t        dt        |        d      )Nz+get_reduction_size() is not implemented by rc  rd  r6  s    ru   r  zLoops.get_reduction_size      !9$t*QG
 	
rt   c                2    t        dt        |        d      )Nz+get_reduction_type() is not implemented by rc  rd  r6  s    ru   r  zLoops.get_reduction_type  rs  rt   c                2    t        dt        |        d      )Nz+constant_to_device() is not implemented by rc  rd  r  s     ru   r  zLoops.constant_to_device  rs  rt   r   )r?  r  r   r   r  r  r  r  r  )r   r   r   r   r   rb   )r1  r  ra   r0   r   r  )r   r>   r   zSequence[Sequence[_IntLike]]r   r  r  r  r  r  r  r  ) ro   rp   rq   rr   r  r@  r7  rH  __repr__r   rG  r   rO  classmethodrR  r  r0   INDEXr[  rF   rd  ra  r>  r  r9  r@  rA  r  r  r  r  __classcell__rD  s   @ru   r.  r.    s      
	
 ) H  # # :>** 
 
 ( (+ 
 

;C@9




rt   r.  c                   |j                   rt        j                  t        d      |      S t        j                  d|      S )Nnanr   )is_floating_pointrT   constantfloat)r   r   s     ru   nop_loader_fnr    s1    ||E%L%00||Au%%rt   c                  D    e Zd ZddZddZd	dZ	 	 	 	 	 	 	 	 d
dZddZy)	Pointwisec                p    | j                         rt        t        | j                        S | j                  S Nr\  )r|  r
   r  r   r0  r6  s    ru   r  zPointwise.make_loader  s)      "=

;;}}rt   c                    g S r   rs   r6  s    ru   r  zPointwise.get_reduction_size  s    	rt   c                     y r   rs   r6  s    ru   r  zPointwise.get_reduction_type  rK  rt   c                p    | j                         }t        j                  |xs d ||       ||            S Nunnamed)r  rT   storer0  output_nameindexervarsloaders        ru   store_outputzPointwise.store_output  s2     !!#yy1	74=&,OOrt   c                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                        S FMove this to a given device. Requires that all reads are to constants.override_devicer   r   r0  r1  )r  r   r   ConstantBufferr  r   r1  r0  r   r  s      ru   r  zPointwise.constant_to_device  sH    !!#Hn.?HPfT[[
 	
rt   Nr  r  r  r  r  r  !Callable[[Sequence[Expr]], Never]r  r  r   r   r  )ro   rp   rq   r  r  r  r  r  rs   rt   ru   r  r    sF    P"P 3P 	P
 
P
rt   r  c                  F    e Zd ZU ded<   dZded<   ddZ	 	 	 	 	 	 	 	 d	dZy)
Scatterr  output_indexerNr@   scatter_modec                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                        S )r  r  )r   r   r0  r1  r  r  )	r  r   r   r  r  r   r1  r  r  r  s      ru   r  zScatter.constant_to_device  s]    !!#Hn.?HP**;;..**
 	
rt   c                    | j                         }|d}t        j                  | || j                  |             ||      | j                        S )Nr  )mode)r  rT   r  r  r  r  s        ru   r  zScatter.store_output  sT     !!##KyyD''-.4L""	
 	
rt   r  r  )ro   rp   rq   rr   r  r  r  rs   rt   ru   r  r    sB    44"L)"

"
 3
 	

 

rt   r  
logical_ormaximumminimummuladdbitwise_xor)anyrj  minprodsumxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                      t         v r	t             S  dv r	 	 	 	 	 	 d fd}|S  dk(  r	 	 	 	 	 	 dd}|S t        d        )Nargmaxargminc                   | \  }}|\  }}dk(  rt        j                  ||      }nt        j                  ||      }t        j                  ||      }t	              rt        j
                  ||      }t        j
                  ||      }	t        j                  |t        j                  ||	            }t        j                  |t        j                  ||	            }rt        j                  ||      nt        j                  ||      }
t        j                  |t        j                  ||
            }t        j                  |||      t        j                  |||      fS )Nr  )	rT   ltgteqr!   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              ru   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !GW GW)vvgw/vvgw/FF7G,Ee$&&'2&&'2~~dCFF7G,DEucoogw.OP ' w(VVGW- 
 >>$s(CDD		$1		$1 rt   welford_combinec                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS r   rs   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              ru   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fn  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB rt   zunknown reduction_type=)r  tuple[object, object]r  r  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r  r   r  )r  r_  )r  r   r  r  r  s   ```  ru   get_reduction_combine_fnr    s     --#N33	/	/	$	)>	&	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMrt   c                      e Zd ZU ded<   ded<   ded<   ded<   dd	ZeZd fd
Zd dZd!dZ	 	 	 	 	 	 	 	 	 	 d"dZ	d#dZ
d$dZddZd%dZe	 d&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Ze	 	 	 	 	 	 	 	 	 	 d(d       Zeej&                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d)d       Ze	 	 	 	 	 	 d*d       Ze	 	 	 	 	 	 d*d       Ze	 	 	 	 	 	 	 	 d+d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 	 	 	 	 	 	 d-d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d.d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d/d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d0d       Z xZS )1	Reductionr  reduction_rangesr?   r  r  	src_dtyperC   reduction_hintc                $    | j                  d      S )N)r1  r  r  rG  r6  s    ru   rH  zReduction.__str__,  s    ||LMMrt   c                r    t         |           t               j                  d | j                  D         z  S )Nc              3  2   K   | ]  }t        |        y wr   r4  r5  s     ru   r   z5Reduction.get_unbacked_symbol_uses.<locals>.<genexpr>3  s     F1#A&Fr7  )rB  r  r,   r8  r  rC  s    ru   r  z"Reduction.get_unbacked_symbol_uses1  s8    w/14FJL4F4FF0E0EF5
 
 	
rt   c                    | j                   S r   )r  r6  s    ru   r  zReduction.get_reduction_size6  s    $$$rt   c                    | j                   S r   )r  r6  s    ru   r  zReduction.get_reduction_type9      """rt   c           	         t        j                  | j                  | j                  | j                  | j                  ||            }t        j                  |xs d ||      |      S r  )rT   	reductionr   r  r  r0  store_reduction)r0  r  r  r  reduction_varsr   s         ru   r  zReduction.store_reduction<  sW     JJNNMM$/	
 "";#;)WT]ERRrt   c                X    t        | j                        t        | j                        z   S r   )r   r1  r  r6  s    ru   index_lengthzReduction.index_lengthK  s!    4;;#d&;&;"<<<rt   c                    | j                  | j                        }| j                  | j                  t        j                        }||fS r   )r[  r1  r  r0   R0_INDEXr0  r   rindexs      ru   ra  zReduction.inner_fn_argsN  s8    DKK(T22DMMBvrt   c                    | j                  | j                        }| j                  | j                  t        j                        }t        | j                  ||      S r   )r[  r1  r  r0   r  r8   r0  r  s      ru   r9  z(Reduction.inner_fn_free_unbacked_symbolsS  sA    DKK(T22DMMB,T]]E6JJrt   c           
     
   | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                  | j                  t        j                        S )r  r  r   r   r0  r1  r  r  r  r  )r  r   r   r  r  r   r1  r  r  r  rC   DEFAULTr  s      ru   r  zReduction.constant_to_deviceX  sm    !!#Hn.?HP**;;!22..nn(00	
 		
rt   Nc	           
        dd}	t         j                  j                  j                  |      }
t         j                  j                  j                  t	        |            }|dk(  xsG t         j                  j                  | t        j                         xr |dvxr t        j                  } |	|
      r |	|      st        j                  dfS t        j                  |       }|j                  }d}|rat        j                   t         j"                  j$                  | d      }t        j                   t         j"                  j$                  | d      }n	 	 	 	 	 	 dd	}|}|dk(  r ||
|      }|dk(  rt        j&                  |fS |t)        |t*              rt-        j.                  t0        d
d      5  t3        |      \  }}d d d        hft         j                  j                  j                  t	        ||z               }|
|k(  r,t4        j7                  d|||||       t        j&                  dfS t        j&                  |fS |
|k  s||dz  dz  k\  rt        j                  dfS t9        | |||||dk7  r|nd|t        j                        }dd} ||      \  }}|r ||      \  }}t;        |      dk(  rt        j                  dfS t=        j>                  |jA                         |jC                               \  \  }}}d}d}|D ]  }t         j                  j                  jE                  ||      } t         j                  j                  jG                  | |tI        |jK                                     }!tM        d |!D              }"|"r|dz  }|dz  } ||kD  rt        j&                   ||
|      fS t        jN                   ||
|      fS # 1 sw Y   xY w)Nc                .    t        | t        t        f      S r   )rx   r   r   r   s    ru   
_is_staticz(Reduction.num_splits.<locals>._is_statics  s    a#w00rt   scanr  r1       T)inner_reductionFc                     yrW  rs   )reduction_numel_hint
numel_hints     ru   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splits  s     rt   r^  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr   r   r  r  c           	     &   t        d t        | j                         | j                         | j	                               |       }|j                         }|j                  J |j                  D  cg c].  } t        | t              rt        | t        j                        s| 0 }} g }d}t        |j                  d       D ]  t        fd|D              s|j                  j                         j                   t"        j$                  j&                  v sZt"        j$                  j&                  j                      }t)        |j*                  dd       }|j-                          t)        |j*                  dd       |k7  sd} ||fS c c} w )	Nr   r   r   r   r  r  Fc                    | j                   S r   r:  r   s    ru   <lambda>z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s
    aff rt   keyc              3  N   K   | ]  }|j                   j                  v   y wr   )r   free_symbols)r   rU  mds     ru   r   zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>  s      FaqBHH111Fs   "%r   T)ComputedBufferr   r   r   r   r  
range_varsrx   r   r   Numbersortedr  r   appendr   r   rV   r   name_to_bufferr   r  decide_layout)	rU  cbread_writesr  indiceschangedbuforiginal_strider  s	           @ru   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indices  sW   %<<>++-
 B ,,.K ))555 %//a&z!U\\/J J 
 GG[..4DE +F:FFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G+ G##!s   23Fr   c              3  &   K   | ]	  }|d kD    ywr1   Nrs   r   r   s     ru   r   z'Reduction.num_splits.<locals>.<genexpr>  s     /!A/   )r   r   r   rl   )r  r   r  r   r   r   )rU  r  r   ztuple[Sequence[Expr], bool])(rV   r   r   r  rR   has_featurer4   REDUCE_TO_SINGLE_ELEMENTr2   split_reductionsrC   r  rB   rR  multi_processor_count	functoolsr
   choicesreduction_split_factorINNERrx   rb   r   r   r   r9   logdebugr  r   r3   index_vars_squeezer   r  simplify_with_rangesstride_hintsry   keysr   OUTER)#r   	dst_dtyper  r0  r1  r  r  reduction_numel
input_noder  r  r  should_splitpropsnum_smmin_elements_per_threadr  outer_reduction_splitssplit
new_rangesnew_reduction_rangesextracted_numel_hintrU  r  r  r  r   r  ranges1	num_outer	num_innerr   jr  outers#                                      ru   
num_splitszReduction.num_splitsg  s   	1  !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /0Z
5K ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K\\.2BDI H <JG",H
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- $;;VaZ"_, ((!++--;v-E>5(00	
	$B ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		 		A  55aAAgg&&33>4#7G /w//EQ	Q			 y  &&(>$j)   !&&(>$j)  MH Hs   4OOc                @    D cg c]+  }t         j                  j                  j                  |      - c}t	        ||      dfd|dv rAt        ddt        j                              j                         	 	 	 	 	 	 d fdfdS  S c c}w )z1Convert inner_fn from a reduction to an pointwisec                     t        j                   fdt        j                  D cg c]  }t	        |       c} D              S c c}w )Nc              3  0   K   | ]  } |        y wr   rs   )r   r  r   value_fns     ru   r   z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s        UF+   )r  reduce	itertoolsproductr   )r   r   
combine_fnr  r:  s   ` ru   r   z*Reduction._unroll_reduction_fn.<locals>.fn  sH    ##"+"3"3,<=q%(=# 
 >s   A
r  r  Nc                    |D cg c]  }t        j                  |       }} | |      t        j                   |      t        j
                        fS c c}w r   )r   expandrT   
index_exprr   int64)r   r  r   flatten_indexr0  s      ru   r:  z0Reduction._unroll_reduction_fn.<locals>.value_fn-  sP     4::a%,,q/::UF+NN=#8%++F  ;s   Ac                     |       d   S rW  rs   )r   r   s    ru   r  z0Reduction._unroll_reduction_fn.<locals>.<lambda>6  s    E1 rt   )r   r  r   r   )r   r  r  r  r   r  )	rV   r   r   evaluate_static_shaper  r  r   r   r  )	r0  r  r  r  r   r?  rE  r   r:  s	   ``   @@@@ru   _unroll_reduction_fnzReduction._unroll_reduction_fn  s     @P
:;AGG2215
 .niH
		 11' 112BC	
 ln )3E* .-HIM
s   0Bc
                <   t         j                  j                  j                  t	                    }
|
dk(  rkdfd} |d       |d       |d       |d      dj                         v s
J  d       dfd}t        j                  |||t        |            S |
dk(  r+dv rdfd	}ndfd
}t        j                  |||      S t        |
t              rt         j                  j                  j                  |
      t        j                  k  rNt	        |      dk7  st        |j                        r+t        j                  || j!                  |      |      S | j#                  ||||
|		      \  }}|t$        j&                  k(  r|}|dk(  r4|	J t)        |	      \  }}|J |J | j+                  ||||||
      S |dkD  r| j-                  |||||	      S t.        j                  t1        ||||            S )Nr   c                    t         j                  k(  rt        |       S j                  r't        | t        j
                        sJ t        |       S t        | t        j                        sJ t        |       S r   )	r   rl   r~  rx   typingSupportsFloatr  SupportsIntr   )valr%  s    ru   py_cnstz!Reduction.create.<locals>.py_cnstN  sa    

*9$00%c6+?+?@@@ :%%c6+=+=>>>s8Ort   r1   )r  r  r  r  z* not supported for zero-dimension tensors!c                6    t        j                           S r   rT   r  )r   r%  r  rtypes_to_initss    ru   const_fnz"Reduction.create.<locals>.const_fnd  s    ||ON$CYOOrt   r  r@  c                0    t        j                  d      S rx  rQ  )r   r%  s    ru   r   zReduction.create.<locals>.fnr  s    <<955rt   c                n    D cg c]  }t         j                  j                   }} | |      S c c}w r   r   rX  rY  )r   r   reduction_indexr0  r  s      ru   r   zReduction.create.<locals>.fnw  s1    =M&Nuww||&NO&N#E?;; 'O   !2r   r  )rN  r   r   zUnion[bool, float, int])r   r   r   rU   )rV   r   r   simplifyrR   r#  r  rR  ry   rx   r   r   r2   unroll_reductions_thresholdrN   r   rH  r6  rC   r  r9   !create_multilayer_existing_rangescreate_multilayerrb   r  )rS  r   r%  r  r0  r1  r  r  r  r'  r&  rO  rS  r   hintr-  r.  r/  rR  s     ` ` ``          @ru   rR  zReduction.create;  s    ''**33MBR4STa$ qz"1:
qz	O "_%9%9%;; !""LM;P ##!F|	 $   a!556
< ##YF $  
 0  **?;001v&!+vfkk/B ##11.	  $   nn

e ]222!NB;)))/R0,J, )))'33388 $  QY(( 
 
 !!1-#-	
 	
rt   c           	        | dv rAt        |      rt        d      S t        |      ryt        j                  |      j
                  S | dv rAt        |      rt        d      S t        |      ryt        j                  |      j                  S t        |      rdnd}t        |      rdnd}|||||||f|||ft        d      |fd	|    S )
N)rj  r  z-infF)r  r  infTr   r1   )r  r  r  r  welford_reducer  online_softmax_reduce)r!   r  r    r   iinfor  rj  )r  r   zeroones       ru   default_accumulatorzReduction.default_accumulator  s     ..e$V}$!%({{5)---..e$U|#!%({{5)---(/uQ&u-d1#T40 $dD1&+FmT%:
  	rt   c                :    | dk(  ryt         j                  | |      S )Nr`  r   )r  re  r  r   s     ru   default_valuezReduction.default_value  s#     --,,^UCCrt   c                    | dk(  r|S | dk  r(|dk  r#|t         j                  k(  rt         j                  S | dk  r(|dk  r#|t         j                  k(  rt         j                  S |S )Nr      i      )rC   r$  
OUTER_TINY)r-  r  r  s      ru   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++rt   c                   	 t         j                  |g      	t        j                  j                  j                  t        j                  |z  d             	 	 	 	 	 	 d	fd}|S )Nr   c                0   |\  }| ^ }|z  |z   d	fd}
rqt        j                  t        j                  t        j                        t        j                  t        j                              }t        j
                  ||      S  |       S )Nc                 $       g            S r   rs   )r  r  	new_indexr   s   ru   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<rt   )r   rU   )rT   r  rC  r   int32masked)r   rW  reduction_blockrr  r  r  rq  
block_sizedefaultr  	need_maskr&  r   s        @@ru   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = vvNN7EKK8NN?EKK@ zz$g66vrt   )r   Sequence[Symbol]rW  rz  r   rU   )Viewdynamic_reshape_indexerrV   r   r   rz  r   r{  )
rS  r  r  r&  r-  rv  rw  ry  rx  r   s
    ` ` `` @@ru   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loader  sw     ../?/ARS((@@HH_u,a0
 
		#	6F		 	& rt   c                    t        d D              s
J d       t        j                  |t        |      t        |      z         	 	 	 	 	 	 dfd}|S )Nc              3  &   K   | ]	  }|d k(    ywr  rs   r   rU  s     ru   r   zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>4  s     3a163r  z8Only enabled for numel_hint == 1, found original_ranges=c           	         | d t               }| t              d  } | t        |      t        |      z               S r   )r   rz   )merged_indexnew_reduction_indexoriginal_idxrq  r  original_rangesr   s       ru   ry  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn;  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF rt   )r  r  r  r  r   rU   )r   r{  r|  rz   )rS  r  r  original_reduction_rangesr.  r/  ry  r   s    ``    @ru   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges+  sy     3?33 	
G6HI	
3 ..%uZ'85AU;V'V
		.		!5		 		 rt   c                   |t         j                  t         j                  fvr|nt         j                  }t        j                  |||||||	|      }|j                          |j                         	 	 	 	 	 	 dfd}t        j                  j                  j                  t        |            }| j                  |
||      }||dt        |       k(  sJ t        j                  t	        |||||t        |      d |	||            S )a
        Break a large reduction up into multiple smaller reductions
        recursively
        c                     g | |      S r   rs   )r   rW  intermediate_loaders     ru   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fnp  s     ''A'A'ABBrt   Nr  )r   r  rW  r  r   rU   )r   float16bfloat16r  r  rR  r  r  rV   r   r   r   rR   rm  r   rb   )rS  r   r%  r  ry  r  r  r.  r/  r  r-  r  intermediate_dtypeintermediater  r  r  s                   @ru   create_multilayer_helperz"Reduction.create_multilayer_helperH  s(   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%//o0NO
99:~
 *-Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
rt   c
                    t        |      }
t        |
|dz
  z   |      }| j                  ||      }| j                  |||
|||      }| j	                  ||||||g |||g|||	      S )r  r1   )rR   r.   rh  r}  r  )rS  r   r%  r  r0  r1  r  r  r-  r  r&  rv  rw  ry  s                 ru   r\  zReduction.create_multilayer  s    $ ((89o;UC
##NI>00&
G

 ++feL
 	
rt   c                j    | j                  |||||      }| j                  ||||||g ||||	d|
      S )r  r   )r  r  )rS  r   r%  r  r0  r  r  r.  r/  r  r  ry  s               ru   r[  z+Reduction.create_multilayer_existing_ranges  sc    $ @@% 

 ++%+o+
+ 
 	
rt   r  r   r  r  
r  r  r  r  r  r  r  rz  r   r   r  r   zSequence[Sequence[Expr]]r  r   )r   r  r%  r  r  r  r0  Callable[..., OpsValue]r1  r  r  r  r  z%Union[ReductionType, Literal['scan']]r&  r   r'  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
r0  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r  r  r  r   r  r  r   z(Callable[[Sequence[_IntLike]], OpsValue])r   r  r%  r  r  r  r0  r/  r1  r  r  r  r  r?   r  rC   r'  r  r   rb   r  r   r   r  r   #Union[_NumLike, Sequence[_NumLike]])r-  r^   r  r   r  rC   r   rC   )r  r  r  r  r&  r^   r-  r^   rv  r^   rw  r  r   Callable[..., object])r  @Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue]r  r  r  r  r.  Sequence[Integer]r/  r  r   r  )r   r  r%  r  r  r  ry  r/  r  r  r  r  r.  
list[Expr]r/  list[Integer]r  r?   r-  r^   r  rC   r   rb   )r   r  r%  r  r  r  r0  r/  r1  r  r  r  r  r?   r-  r^   r  rC   r   rb   )r   r  r%  r  r  r  r0  r/  r  r  r  r  r.  r  r/  r  r  r?   r  rC   r   rb   )ro   rp   rq   rr   rH  rw  r  r  r  r  r  ra  r9  r  r  r6  rH  rx  rC   r  rR  re  rh  rm  r}  r  r  r\  r[  rz  r{  s   @ru   r  r  $  s   ((!!!!N H

%#S"S 3S 	S
 )S 
S=
K

  (,aaa a *	a
 #a -a >a a %a 
(a aF -N-,- - 	-
 
2- -^  )6(=(='+Q
Q
 Q
 	Q

 %Q
 Q
 )Q
 &Q
 &Q
 %Q
 
Q
 Q
f $/	, < DD$/D	,D D %(:G	   !'! -! "	!
 ! ! 5! 
! !F P ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
=
 =
~ $
$
 $
 	$

 %$
 $
 )$
 &$
 $
 &$
 
$
 $
L $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
$
 $
rt   r  c                  l     e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ xZS )MultiOutputReductionr   output_indexc
           
         t              rft              dk(  rd   }
n	 	 	 	 	 	 dfd}
t        |   |||
|||||       |	| _        y )Nr1   r   c                2     t         fdD              S )Nc              3  0   K   | ]  } |        y wr   rs   )r   r   r   reduction_idxs     ru   r   z@MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HR]3Hr;  )rz   )r   r  	inner_fnss   ``ru   r  z-MultiOutputReduction.__init__.<locals>.loader  s     HiHHHrt   r  )r   r  r  r  r   ztuple[OpsValue, ...])callabler   rB  __init__r  )r0  r   r%  r  r1  r  r  r  r  r  r  rD  s      `       ru   r  zMultiOutputReduction.__init__  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )rt   c           	     <   t        j                  | j                  | j                  | j                  | j                  ||            }t        |t        t        f      sJ t        |              || j                     }t        j                  |xs d ||      |      S r  )rT   r  r   r  r  r0  rx   rz   ry   r   r  r  )r0  r  r  r  r  r|   r   s          ru   r  z$MultiOutputReduction.store_reduction  s     JJNNMM$/	
 &5$-0CT&\NC0t(()"";#;)WT]ERRrt   )r   r  r%  r  r  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r1  r  r  r  r  r?   r  r  r  rC   r  r   r  )ro   rp   rq   rr   r  r  rz  r{  s   @ru   r  r    s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
Srt   r  c                  ^    e Zd Zeej
                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)OnlineSoftmaxReductionNc
           	         t        fdt        |      D              }
|
D ]  }|j                           |
S )z>
        Create the reduction disregarding splitting.
        c              3  j   K   | ]*  }t         j                  t        d |	             , yw)ra  N)rb   rR  r  )	r   
output_idxr   r%  r0  r1  r  r  r  s	     ru   r   z0OnlineSoftmaxReduction.create.<locals>.<genexpr>%  sI      
  $$+"

s   03)rz   r   r  )rS  r   r%  r  r0  r1  r  
num_outputr  r'  resultsr   s    `````` `   ru   rR  zOnlineSoftmaxReduction.create  sG       
 
 $J/
 
   	AIIK	rt   )r   r  r%  r  r  r  r0  r/  r1  r  r  r  r  r   r  rC   r'  r  r   Sequence[TensorBox])ro   rp   rq   rx  rC   r  rR  rs   rt   ru   r  r    s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
! !rt   r  c                      e Zd Zeej
                  f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z	y)WelfordReductionc                   |dv sJ t         j                  j                  j                  t	                    }d
fd}	|dk(  r |	d      }
 |	d      } |	d      }|
||fS |dk(  r@	 	 	 	 dfd|dk(  r |d          |	d       |	d      fS t        fd|D              S t        j                  |d   ||      \  }}|t        j                  k(  r|}|dkD  r| j                  ||||      S t        d	      D cg c](  }t        j                  t        ||||	            * }}|D ]  }|j                           |S c c}w )N)r`  r  c                X     d fd}t         j                  |t                    S )Nc                0    t        j                        S r   rQ  )r   r   rN  s    ru   r0  z8WelfordReduction.create.<locals>.const.<locals>.inner_fnK  s    || rt   r  r   r  r   rU   r  rR  ry   )rN  r0  r   r   r1  s   ` ru   constz&WelfordReduction.create.<locals>.constJ  s2     ##!F|	 $  rt   r   r1   c                X     d fd}t         j                  |t                    S )Nc                n    D cg c]  }t         j                  j                   }} | |      S c c}w r   rV  )r   r   rW  r  r  s      ru   r0  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fnc  s1    =M&Nuww||&NO&N!#77 'OrX  r  r  r  )r  r0  r   r   r1  r  s   ` ru   copyz%WelfordReduction.create.<locals>.copy`  s2    8 !''!%<	 (  rt   r`  c              3  .   K   | ]  } |        y wr   rs   )r   r   r  s     ru   r   z*WelfordReduction.create.<locals>.<genexpr>q  s     :"T"X:   )r  r&  r   )rN  r   r   rb   )r  z4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r   rb   )rV   r   r   rY  rR   rz   r  r6  rC   r  r\  r   rb   rR  r  r  )rS  r   r   r  r1  r  r  r  r&  r  meanm2weightr]  r-  r  r  r   r  s    `` ``            @ru   rR  zWelfordReduction.create;  s    !FFFF''**33MBR4ST	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
e ]222!N19(( 	 	2 $Ah
   $""

 
   	AIIK	%
s   -Ec                     y)N)r   r   r   rs   rg  s     ru   rh  zWelfordReduction.default_value  s     rt   c	                    t              t        j                  j                  j	                  t        j                  z  d             }	|	rH|dk7  rC	 	 	 	 	 	 	 	 d
fd}
 j                  ||d   t        |
d      t        |
d      f|d|      S t        dz
  z         t        j                  |t         fd|D              g |g||      }|D ]  }|j                           	 	 	 	 	 	 	 	 ddt        j                  j                  j                  t        |            } j                  ||      }t        j                  |t        fd	|D              |gd|      S )r  r   r  c                0    t        j                  |      S r   rQ  )r   r  r   r   s      ru   r  z4WelfordReduction.create_multilayer.<locals>.constant  s     ||E511rt   r   r1   )r   r   r  r1  r  r  r-  r  c           	   3  L   K   | ]  }j                  |d         yw)r   )rw  N)r}  )r   r  rv  rS  r&  r  r-  s     ru   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s=      
  ++$# , 
s   !$c                     |g | |      S r   rs   )r   rW  r  s      ru   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn  s    
 4E4O455rt   c              3  T   K   | ]  }t        |j                                 ! yw))r  N)r
   r  )r   r   r  s     ru   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s*       .q}}GG   %()r   r  r  r  r   r   r   rU   )r   r  rW  r  r  r  r   rU   )rR   rV   r   r   rz  r   r{  r\  r
   r.   r  rR  rz   r  r   rm  )rS  r   r   r  r1  r  r  r-  r  rx  r  intermediatesr   r  rv  r  r&  s   ` `  ` `      @@@ru   r\  z"WelfordReduction.create_multilayer  s     ((89((@@HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 (
 
 feL#
&  	AIIK		6!	6+	6 9	6 		6 WW%%//f0EF
99:~
  && &  G
 	
rt   N)r   r  r   r  r  Sequence[Callable[..., Any]]r1  r  r  r  r  r?   r  rC   r   r  r  )r   r  r   r  r  r  r1  r  r  r  r  r?   r-  r^   r  rC   r   r  )
ro   rp   rq   rx  rC   r  rR  r  rh  r\  rs   rt   ru   r  r  :  s    )6(=(=vv v 0	v
 v (v &v &v 
v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
Z
 Z
rt   r  c                  ^    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   d fdZd fdZ	 	 	 	 	 	 	 	 	 	 d dZd!dZd"dZd#dZ	d#dZ
d$dZd%dZddZeej                   fdd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Z xZS )(Scanr  scan_rangesr   =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r?  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rC   r  r   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                    t         |           t               j                  d | j                  D         z   t               j                  d | j
                  D         z  S )Nc              3  2   K   | ]  }t        |        y wr   r4  r5  s     ru   r   z0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>,       "V#8#;"Vr7  c              3  2   K   | ]  }t        |        y wr   r4  r5  s     ru   r   z0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>-       "O#8#;"Or7  )rB  r  r,   r8  r  r   rC  s    ru   r  zScan.get_unbacked_symbol_uses&  s[    
 G,. jl  "VTEUEU"VWX jl  "OTYY"OPQ	
rt   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r   )r   r1  r  r   rB  r7  rC  s    ru   r7  zScan.__post_init__0  =    4;;#d&6&6"773tyy>IIIrt   c                   | j                  ||      t        fd| j                  D              }t        j                  | j
                  | j                  |      }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wr   rs   r   r0  r   s     ru   r   z'Scan.store_reduction.<locals>.<genexpr><       Dx}Dr  r  )	r   rz   r  rT   r  r  r?  r  r  )r0  r  r  r  	scan_varsr|   resultr   s          @ru   r  zScan.store_reduction4  so     ll4+DT^^DD$++t?yy$9gclF4;L;L4M
 	
rt   c                     y)Ncustomrs   r6  s    ru   r  zScan.get_reduction_typeB  s    rt   c                    | j                   S r   )r  r6  s    ru   r  zScan.get_reduction_sizeF  rH  rt   c                    | j                   S r   r   r6  s    ru   r   zScan.get_sizeI      yyrt   c                    | j                   S r   rF  r6  s    ru   rO  zScan.get_pointwise_sizeL  rK  rt   c                X    t        | j                        t        | j                        z   S r   )r   r1  r  r6  s    ru   r  zScan.index_lengthO  !    4;;#d&6&6"777rt   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r   )r[  r1  r  r0   r  r   r0  r   r  r   s       ru   ra  zScan.inner_fn_argsR  E    DKK(T--t}}=ll5&)vrt   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  |      S r   )r[  r1  r  r0   r  r   r8   r0  r  s       ru   r9  z#Scan.inner_fn_free_unbacked_symbolsX  O    DKK(T--t}}=ll5&),T]]C@@rt   T)can_fallback_to_atenc                  g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t        |      dkD  r=t         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t        j                  |||   ||   |      # c}S | j!                  ||d   |d   ||      \  }}t"        }|dkD  r[t$        j&                  j(                  d u xs t*        xr	 t,        dk\  xr t        |      dk(  }|s|rd gt        |      z  S d}nt.        }dfd}t        t        |            D cg c]0  }t0        j                   |d	|||   |||   ||||||d|	      2 }}|D ]  }|j3                           |S c c}w c c}w )
Nr1   r  r   )r   r   r0  axispointwise_rangesr  r?  
scan_numelz3.3.0c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r   r   )r   
scan_indexr  r  r  s     ru   r   zScan.create.<locals>.reindex  S    z?c+&6666u:%5!6666>U5D\>J>tu>>rt   )r   r   r  r0  r  r   r1  r  r?  r   r  r  )r   r  r  r  r   r  rs   )rV   r   r  r4   SCANr   TUPLE_REDUCTIONr   rY  rR   rz  r   Ler   r  rR  r6  r  r   versionhip
has_tritontriton_version	SplitScanrb   r  )rS  r   r  r  r   r  r?  r  r  r   r   r  r  r6  	scan_typesupports_splitr   r  r  r  r  s        `             @@ru   rR  zScan.create^  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''v;?177#6#6N22$
 6CK''77##&&}['AB
6{c)n,,, ++EHHZ,CD %*#f+$6 !   ! .&|4	 !   &)^^)q\-#!! &4 	&
"
 	>!!T)Wj.V^w=V%v;!#  "' 6CK//!"J%		?. !&c&k 2%
$ #  ! .!&|4'+ +)##1!- 
 
*  	FNN	 N
s   )&I85Ic	           
     N    dfd}	t         j                  ||||	||d|      S )Nc                ,     g | d  || d        S r   rs   )r   r  r  r0  s     ru   ry  z#Scan.num_splits.<locals>.wrapper_fn  s*    Fc%4jF=F3tu:FGGrt   r  )r   r%  r  r0  r1  r  r  r&  )r   r  r  r  r   rU   )r  r6  )
rS  r   r   r0  r  r  r  r?  r  ry  s
      ``     ru   r6  zScan.num_splits  s;    	H ###(!& $ 	
 		
rt   r   r  )
r  r  r  z%Callable[[Sequence[_IntLike]], Never]r  r  r  rz  r   r   r  r  r  r  rv  )r   r  r  r  r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r   r  r  r   r?  r  r  rC   r  rl   r   r   r   Sequence[Optional[TensorBox]])r   r  r   r  r0  r  r  r   r  r  r  r  r?  r  r  r   r   r  )ro   rp   rq   rr   r  r7  r  r  r  r   rO  r  ra  r9  rx  rC   r  rR  r6  rz  r{  s   @ru   r  r    s   
MMSS!!##--
 
"
 7
 	

 $
 

 8A  )6(=(=_ &*__ (_ ?	_
 _ _ R_ &_ #_ _ 
'_ _B 

 
 7	

 
 (
 #
 R
 
 
(
 
rt   r  c                      e Zd Zy)r  Nro   rp   rq   rs   rt   ru   r  r    s    rt   r  c                  ,    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   d fdZd fdZ	 	 	 	 	 	 	 	 	 	 ddZddZd dZd dZ	d dZ
d!dZd"dZddZeej                   f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d#d       Z xZS )$Sortr  sort_rangesr   z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rC   r  r   r  r  r  r  r  rl   stable
descendingc                    t         |           t               j                  d | j                  D         z   t               j                  d | j
                  D         z  S )Nc              3  2   K   | ]  }t        |        y wr   r4  r5  s     ru   r   z0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>  r  r7  c              3  2   K   | ]  }t        |        y wr   r4  r5  s     ru   r   z0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>  r  r7  )rB  r  r,   r8  r	  r   rC  s    ru   r  zSort.get_unbacked_symbol_uses  sY    G,. jl  "VTEUEU"VWX jl  "OTYY"OPQ	
rt   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r   )r   r1  r	  r   rB  r7  rC  s    ru   r7  zSort.__post_init__  r  rt   c                ,   | j                  ||      t        fd| j                  D              }t        j                  | j
                  || j                  | j                        }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wr   rs   r  s     ru   r   z'Sort.store_reduction.<locals>.<genexpr>	  r  r  r  )
r   rz   r  rT   sortr  r
  r  r  r  )r0  r  r  r  r  r|   r  r   s          @ru   r  zSort.store_reduction  su     ll40DT^^DD$++vt{{DOOLyy$9gclF4;L;L4M
 	
rt   c                     y)Nr  rs   r6  s    ru   r  zSort.get_reduction_type	  s    rt   c                    | j                   S r   )r	  r6  s    ru   r  zSort.get_reduction_size	  rH  rt   c                    | j                   S r   r  r6  s    ru   r   zSort.get_size	  r  rt   c                    | j                   S r   rF  r6  s    ru   rO  zSort.get_pointwise_size	  rK  rt   c                X    t        | j                        t        | j                        z   S r   )r   r1  r	  r6  s    ru   r  zSort.index_length	  r  rt   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r   )r[  r1  r	  r0   r  r   r  s       ru   ra  zSort.inner_fn_args	  r  rt   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  |      S r   )r[  r1  r	  r0   r  r   r8   r0  r  s       ru   r9  z#Sort.inner_fn_free_unbacked_symbols!	  r  rt   c	                   g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }d}t        j                  j                  xr% |
j                  t        j                  ||            }|sd gt        |      z  S t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t         j#                  |||   ||   |      # c}S dfd}t        t        |            D cg c]4  }t$        j#                  t'        d|||   |||   |||||||d|	      6 }}|D ]  }|j)                           |S c c}w c c}w )Nr1   rj  r  c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r   r  )r   
sort_indexr  r  r	  s     ru   r   zSort.create.<locals>.reindexV	  r  rt   )r   r   r  r0  r  r   r1  r	  r   r  r  r
  r  )r   r  r  r  r   r  rs   )rV   r   r  r4   SORTr   r   rY  rR   r2   r   persistent_reductionsrz  r   r  r   r  rR  rb   r  r  )rS  r   r  r  r   r  r
  r  r  r   r   
sort_numel
max_rblockis_persistent_kernelr  r   r  r  r  r	  s        `            @@ru   rR  zSort.create'	  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''77##&&}['AB
 
MM// S00*j1QR 	 $6CK''6{c)n,,, ++EHHZ,CD %*#f+$6 !   ! .&|4	 !  	?0 !&c&k 2'
& %  ! .!&|4'+ +##1!-!) 
 
,  	FNN	 Q
s   2&G99Gr   r  )
r  r  r  r  r  r  r  r  r   r   r  r  r  r  )r   r  r  r  r  z'tuple[Callable[[list[Expr]], Any], ...]r   r  r  r   r
  rl   r  rl   r  rC   r   r   r   r  )ro   rp   rq   rr   r  r7  r  r  r  r   rO  r  ra  r9  rx  rC   r  rR  rz  r{  s   @ru   r  r    s*    
GG!!##--L
 
"
 2
 	

 '
 

 8A  )6(=(=LL (L ;	L
 L L L L &L L 
'L Lrt   r  c                >    	 t        | d       y# t        $ r Y yw xY w)NFfreezeT)r  r_  r   s    ru   r   r   w	  s&    a. s    	c                    	 t        | d      \  }}|j                         r|j                          |j                         S # t        $ r Y yw xY wNFr#  )r  should_pad_stridespad_stridesis_contiguousr_  )r   _bufferr  s      ru    is_contiguous_storage_and_layoutr+  	  sR    /%@ $$& ##%% s   ?A 	AAc                   t        | t              rt        | j                  |||||      S t        | t              r:t        | j                  |||||      \  }}| | j                  j                         fS t        | t              r|rn|r0| j                          | j                         j                         s>J || j                  ||       n&|| j                  ||       n| j                          t	        |       | j                         fS t        | t              r(t        | j                  |      \  }}|| j                  fS t        )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    r$  want_contiguousstride_orderr  r  r  r#  )rx   rb   r  r  
StorageBoxr   Bufferr  r)  r  r  r
  r  r  r_  )	r   r$  r.  r/  r  r  r   r  buffers	            ru   r  r  	  sG    !Y$FF+%''
 	
 !Z )FF+%''
	6 !&&##%%%!V!||~33555)11  2  *22! 3  !!}alln,,!_% *FF
	 qxx
rt   c                d    	 t        | d      \  }}|j                  |      S # t        $ r Y yw xY wr&  )r  is_stride_orderedr_  )r   r/  r*  r  s       ru   "is_stride_order_storage_and_layoutr6  	  s:    /%@''55 s    # 	//c                      e Zd ZU ded<   ddZddZddZddZedd       Z	ddZ
dd	Zd d
Zd!dZd"dZd#dZd$dZd%dZd Zd Zd$dZd$dZd&dZd'dZd Zd(dZy))BaseViewrc   r  c                6    | j                   j                         S r   r  r  r6  s    ru   r  z!BaseView.get_unbacked_symbol_uses	      yy1133rt   c                    t        d|        )Nzmake_reindexer NYI on r  r6  s    ru   make_reindexerzBaseView.make_reindexer	  s    !$:4&"ABBrt   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S r   rs   r   innerr   s    ru   r  z&BaseView.make_indexer.<locals>.indexer	      &&rt   )r   r  r   r   )r  r  r=  )r0  r  rA  r   s     @@ru   r  zBaseView.make_indexer	  s/    		&&(%%'	' rt   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S r   rs   r@  s    ru   r  z$BaseView.make_loader.<locals>.loader	  rB  rt   r  )r  r  r=  )r0  r  rA  r   s     @@ru   r  zBaseView.make_loader	  s/    		%%'%%'	' rt   c                6    | j                   j                         S r   )r  r   r6  s    ru   r   zBaseView.dtype	  s    yy""$$rt   c                6    | j                   j                         S r   r  r   r6  s    ru   r   zBaseView.get_layout	      yy##%%rt   c                6    | j                   j                         S r   r  r   r6  s    ru   r   zBaseView.get_device	  rH  rt   c                     y r   rs   r6  s    ru   rG  zBaseView.get_origin_node	  rK  rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zBaseView.get_name	      yy!!##rt   c                "    | j                         S r   rt  r6  s    ru   rO  zBaseView.get_pointwise_size	      }}rt   c                8    | j                   j                  |      S r   r  r  r  s     ru   r  zBaseView.mark_reuse 
      yy##E**rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zBaseView.has_exceeded_max_reads
      yy//11rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zBaseView.realize
      yy  ""rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zBaseView.realize_hint	
      yy%%''rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zBaseView.get_storage_numel
      yy**,,rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zBaseView.is_extern
      yy""$$rt   c                6    | j                   j                         S r   )r  is_module_bufferr6  s    ru   rd  zBaseView.is_module_buffer
      yy))++rt   c                6    | j                   j                         S r   r  rA  r6  s    ru   rA  zBaseView.get_read_names
      yy''))rt   c                    t        j                  t        dd      5  t        | j	                         | j                               j                  cd d d        S # 1 sw Y   y xY wr]  )r   r   r   r:   r  r   r  r6  s    ru   r@  zBaseView.get_reads
  sL    \\.*:DA 	&  " e		 	 	s   2AA!c                d    | }t        |t              r|j                  }t        |t              r|S r   )rx   r8  r  )r0  r   s     ru   r  zBaseView.unwrap_view
  s+    H%A H%rt   c                    | j                         } t        j                  t        d|      |      }t	        || j                         || j                               S r  )r  r   r   r  r  r   r   r  s      ru   r  zBaseView.constant_to_device%
  sP    !!#Hn.?HP.."==?	
 	
rt   Nr   )r   z*Callable[[Sequence[Expr]], Sequence[Expr]]r  r  r  r  r  r  r  r  r  r  r  r  r  r  )ro   rp   rq   rr   r  r=  r  r  r  r   r   r   rG  r  rO  r  r  r  r  r  r  rd  rA  r@  r  r  rs   rt   ru   r8  r8  	  s    
L4C % %&&$+2#(-%,*	
rt   r8  c                  F    e Zd ZU ded<   ed        Zed        ZddZd Z	y)	r}   r  r   c                r   t         j                  j                  }t        t	        t
        j                  |            }| j                         }dgt        |      t        |      z
  z  t        |      z   }t        |      t        |      k(  sJ t        t        |            D ]  }||   dk(  r||   J ||   ||<   ||   Lt         j                  j                  j                  j                  t        j                  ||   d      d      rm|j                  ||   ||   z
  d      dk(  rJ d        |S )	zReplace `-1` with correct sizesNr   r1   Tsize_obliviousr   fallbackzKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})rV   r   r   ry   rT  r   rB  r   r   r   r   evaluate_exprr{  r   )r   new_sizer   old_sizer   s        ru   _normalize_sizezExpandView._normalize_size5
  s1    77##ELL(34::<6S]S]:;d8nL8}H---s8}% 	A{b {...&qk!$(8(8(B(B(P(P!a( )Q )   ))(1+*Ca)PTUU aU	" rt   c           	        | j                  ||      }t        |      r3t        |      \  }}t        |      t        |j                        z
  }|dk\  sJ t
        j                  j                  g|z  }t        |j                  |j                        D ]y  \  }}|j                  t        j                  j                  j                  j                  t        j                   |d      d      s|nt
        j                  j                         { t#        |j$                  |j&                  t)        |      ||j*                        }	t-        ||	      S t/        ||      S )Nr   r1   Trn  r  )r  r   )ru  r   r  r   r   r   rX  rY  r   r   r  rV   r   r   r   rr  r{  r  r   r   ry   r  r  r}   )
rS  r   rs  r  r  skipr  r   r   r   s
             ru   rR  zExpandView.createP
  s'   &&q(3 #"7":GZx=3z#77D199'',,$.J #J$5$5z G !!77++55CCq)$ D    %!!  X!!J #
CCqx00rt   c                    | j                   S r   r  r6  s    ru   r   zExpandView.get_sizel
  r  rt   c                    | j                         }| j                  j                         t        |      t              z
  fd}|S )Nc                    t        | d        } t        |       t              k(  sJ t        t                    D ](  }|   dk(  st        j                  j
                  | |<   * | S rW  )ry   r   r   r   rX  rY  )r   r   actualrw  s     ru   r   z*ExpandView.make_reindexer.<locals>.reindext
  sd    tu&Eu:V,,,3v;' ,!9>$ww||E!H, Lrt   )r   r  r   )r0  targetr   r{  rw  s      @@ru   r=  zExpandView.make_reindexero
  s>    ##%6{S[(	 rt   Nr  )
ro   rp   rq   rr   r  ru  rx  rR  r   r=  rs   rt   ru   r}   r}   1
  s8    
 4 1 16rt   r}   c                  F    e Zd ZU ded<   ed        Zed        ZddZd Zy)	PermuteViewr  dimsc           
        | j                  |      }t        |      t        t        t        |                  k(  sJ t	        |      r~t        |      \  }}t        |j                  |j                  |D cg c]  }|j                  |    c}|D cg c]  }|j                  |    c}|j                        }t        ||      S t        ||      S c c}w c c}w )Nr  )r  r  )_map_neg_dimsr,   r   r   r   r  r  r   r   r   r   r  r  r~  )rS  r   r  r  r  r   r   s          ru   rR  zPermuteView.create
  s      &$:eCI.>#???? #"7":GZ$!!  -12#2/34!""1%4!!J #
CC-- 34s   5CCc                R    |D cg c]  }|dk\  r|nt        |      |z    c}S c c}w rx  r  )rS  r  r  s      ru   r  zPermuteView._map_neg_dims
  s)    @DEsaxSY_4EEEs   $c                   t        | j                  | j                              t        t        t	        | j                                    k(  sJ | j
                  j                         }| j                  D cg c]  }||   	 c}S c c}w r   )r,   r  r  r   r   r  r   )r0  r   r   s      ru   r   zPermuteView.get_size
  so    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   7Bc                B   t        | j                        D ci c]  \  }}||
 c}}t        t        | j                              D cg c]  }|   	 c}t	              t	        t        t        | j                                    k(  sJ fd}|S c c}}w c c}w )Nc                4    D cg c]  }| |   	 c}S c c}w r   rs   )r   r   invs     ru   r   z+PermuteView.make_reindexer.<locals>.reindex
  s    &)*E!H***s   )r   r  r   r   r,   )r0  r   r4  r   r  s       @ru   r=  zPermuteView.make_reindexer
  s}     )$)) 451q!t5$S^45!s1v5#*U3tyy>-B"CCCC	+  65s   BBNr  )	ro   rp   rq   rr   rx  rR  r  r   r=  rs   rt   ru   r~  r~  
  s:    
. ." F F,rt   r~  c                  <    e Zd Zeddd       Zedd       ZddZy)SqueezeViewN)r  c          	        t        |      rt        |      \  }}g }g }|6t        |t              sJ d       d|k  r|t	        |j
                        k  sJ t        t        |j
                  |j                              D ]g  \  }\  }}	|)|dk7  s|j                  |       |j                  |	       4||k7  r#|j                  |       |j                  |	       \|dk(  rbJ d        t        |j                  |j                  |||j                        }
t        ||
      S |8t        j!                  ||j#                         D cg c]
  }|dk7  s	| c}      S |j#                         |   dk(  sJ t        j!                  |t        |j#                               D cg c]  \  }}||k7  s| c}}      S c c}w c c}}w )Nzexpected integer dim argumentr   r1   zexpected squeezed size to be 1r  )r   r  rx   r   r   r   r   r   r   r  r  r   r   r  r  r{  rR  r   )rS  r   r  r  r  rs  r  r   r   r   r   r   s               ru   rR  zSqueezeView.create
  s    #"7":GZHJ!#s+L-LL+CxC#joo*>$>>>%.s:??JDUDU/V%W 
K!>D&;qy -"))&1Cx -"))&1#qyJ*JJy
K %!!  !!J #
CC;;;qajjl"Ea1f1"EFF::<$)));;q1::<1H"UAAQTH1"UVV #F #Vs   
F<
F<
"G0Gc                    | D cg c]
  }|dk7  s	| }}t        |       D cg c]  \  }}|dk7  s| c}}t        |       dfd}||fS c c}w c c}}w )Nr1   c                    t        |       t              k(  sJ |  d        t        j                  j                  gz  }t	        |       D ]
  \  }}|||<    t        |      S )N )r   r   rX  rY  r   rz   )r   rq  r   r   lengthnot_ones       ru   r   z%SqueezeView.squeezer.<locals>.reindex
  sk    u:W-C%'/CC-/Igu- #Q!"	####rt   )r   zlist[sympy.Expr]r   ztuple[sympy.Expr, ...])r   r   )r   r   rs  r   r   r  r  s        @@ru   squeezerzSqueezeView.squeezer
  s]    #.!qAvA..!*4;AAF1;T	$    /;s   
AAAAc                    t        d      )Nzuse SqueezeView.create())AssertionError)r0  r  s     ru   r  zSqueezeView.__init__
  s    788rt   )r   r  r  )ro   rp   rq   rx  rR  r  r  r  rs   rt   ru   r  r  
  s3    " #W #WJ ! !9rt   r  c                  T    e Zd ZU ded<   ded<   d ZddZddZeZed        Z	dd	Z
y
)GenericViewr  r   r/  r   c                    | j                   S r   )r   r6  s    ru   r=  zGenericView.make_reindexer
      ||rt   c                   t        t        | j                              D cg c]  }t        t        j
                  |       }}t        | j                  |            }ddj                  t        t        |             d| S c c}w )Nzlambda , r   )r   r   r   rQ   r0   ry  ry   r   rV  rT  r   )r0  rZ  	index_old	index_news       ru   reindex_strzGenericView.reindex_str
  ss    CHTYYCX
>?*4::q9
	 
 i01	3sI#6789+FF	
s   !Bc                z    | j                  | j                  d| j                   d| j                          g      S )Nsize=zreindex=)rZ  r  r   r  r6  s    ru   rH  zGenericView.__str__
  s=    YY%		{+x8H8H8J7K-LM
 	
rt   c                *     | |t        |      |      S )Nr  r   r   )ry   )rS  r   rs  r   s       ru   rR  zGenericView.create
  s    X@@rt   c                    | j                   S r   r  r6  s    ru   r   zGenericView.get_size  r  rt   Nr  r  )ro   rp   rq   rr   r=  r  rH  rw  rx  rR  r   rs   rt   ru   r  r  
  s>    
G

 HA Art   r  c                  \    e Zd Zed        Zed        Zed        Zed        Zed        Z	y)r{  c                    t        j                  |       } t        j                  |      }t        j                  j                  j
                  j                  } |t        j                  | d            r| |z   } | S rx  )r   rB  rV   r   r   r   rr  Lt)r   r   rr  s      ru   handle_negative_indexzView.handle_negative_index  sZ    ll3||D!((22@@#q)**C
rt   c                  	 t        |t        t        f      sJ | j                  |j	                         |      \  	}t
        j                  j                  j                  	|      r|S d}t        t        	            dkD  st        t        |            dkD  rd}d|v r	fd} | |t        |      |      S t        |      s|r|rBt        |      s7t        j                  |t        j                  |j	                                     }t!        |d      \  }}t#        |j$                  |j&                  |t        j                  |      |j(                        }t+        ||      S | j-                  	|      } | |t        |      |      S )NFr   Tc                4    t        dgt              z        S rx  )rz   r   )r   rt  s    ru   fake_reindexz!View.create.<locals>.fake_reindex"  s    aS3x=011rt   r  )r.  r  )rx   rz   ry   resolve_negative_sizer   rV   r   r   statically_known_list_equalsr   r&   r+  ExternKernelrequire_exact_stridesr   r   r  r  r   r   r  r  r|  )
rS  r   rs  unbacked_symbols_in_sizesr  r  r  r   r   rt  s
            @ru   rR  zView.create  sd   (UDM222 66qzz|XN( 77888LH$)!%h/014(23a7(,%=2 ADNLII-a04M(2RST2U !66~88F #84"PGZ$!!  11(;!!J #
CC--hAX@@rt   c                F   |D cg c]+  }t         j                  j                  j                  |      - }}| D cg c]+  }t         j                  j                  j                  |      - } }t	        |      }t        t        |            D ]J  }||   dk(  st        j                  j                  ||<   t        t        |       t        |            ||<    n t         j                  j                  j                  t        |       t        |             | |fS c c}w c c}w )Nr   )rV   r   r   rY  ry   r   r   r   rX  Oner-   rR   guard_equals)rt  rs  r   r   s       ru   r  zView.resolve_negative_size?  s    :BCQAGG$$--a0CC:BCQAGG$$--a0CC>s8}% 	A{b #ggkk&}X'>h@WX		 	
%%mH&=}X?VW!! DCs
   0D0Dc                    	 | j                  ||      }|S # t        t        f$ r@ t        |      g}| j                  ||      }| j                  ||      }t	        ||      }Y |S w xY wr   )_dynamic_reshape_indexerr  
IndexErrorrR   r   )rS  rt  rs  r   flatr   r   s          ru   r|  zView.dynamic_reshape_indexerN  sx    	:228XFG  
+ 	:!(+,D33HdCH33D(CH%h9G	:s    AA%$A%c                F   t         j                  j                  j                  }t	        t        |            D cg c]  }t        t        j                  |       c}t        t        |            }t        |       }g |r=|r:|j                         }|j                         \  }}|dk(  r>j                  t        j                  j                         |j                  ||f       n|dk(  r|j                  |       n ||       ||      k(  r=j                  |       t         j                  j                  j!                  ||       nh ||       ||      k  r ||       ||      k  r2|j                         \  }	}
|	|z  |z   }||
z  } ||       ||      k  r2j                  |       t         j                  j                  j!                  ||       n ||       ||      kD  rt        j                  j"                  }|}j                  t%        |||             ||z  } ||       ||      kD  rH|j                         }j                  t%        |||             ||z  }||z  } ||       ||      kD  rHt         j                  j                  j!                  ||       nt&        |r|r:|rf|j                         }t         j                  j                  j!                  |d       j                  t        j                  j                         |rf|r@|j                         \  }}t         j                  j                  j!                  |d       |r@j)                          t              t        |       k(  sJ fd}|S c c}w )zG
        Perform a reshape entirely by modifying indexing math
        r1   c                    t        |       t              k(  sJ t        |       t              f       t        t        |             t        fdD              S )Nc              3  6   K   | ]  }t        |        y wr   )rS   )r   r   replacementss     ru   r   zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>  s     HA|4Hs   )r   r{   r   rz   )r   r  r  	view_exprs    @ru   r   z.View._dynamic_reshape_indexer.<locals>.reindex  sM    u:T*CSZT,CC*D% 01LHiHHHrt   )rV   r   r   r   r   r   rQ   r0   VIEWry   r   rQ  r  r   rX  rY  r  r  r/   r  reverse)rt  rs  r   r   	stack_new	stack_oldsize_oldvarsize_newvar2	size_new2divisormodulusr   r  r  s                 @@ru   r  zView._dynamic_reshape_indexerZ  s	   
 GG$$..	 CHHBV
=>*499a8
 T8,-	N		I }}H%MMOMC1}  .  #x1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD)/C/C')3H  )Ih,??   %  --hA8$y'::''++"  gw!GH!G+)Ih,??'mmoG$$_S'7%KL%/G''1H	  )Ih,??
   --hA$$= I@  }}HGG))(A6UWW\\* 
 %MMOMCGG))(A6  	9~X...	I
 s
s   !NN)
ro   rp   rq   r  r  rx  rR  r  r|  r  rs   rt   ru   r{  r{    si      ,A ,A\ " " 	 	 @ @rt   r{  c                       e Zd ZU dZded<   d fdZddZeZd ZddZ	ddZ
ed	        Zdd
Zd ZddZddZddZd ZddZdddZddZ xZS )r  z*Pretend our storage has a different layoutrm  r  c                    t         |           t        | j                  t              r0t
        j                  | d| j                  j                                y y )Nr  )rB  r7  rx   r  r8  r   r/  r  rC  s    ru   r7  zReinterpretView.__post_init__  s@    dii*tVTYY-B-B-DE +rt   c                P    | j                  | j                  | j                  g      S r   )rZ  r  r  r6  s    ru   rH  zReinterpretView.__str__  s&    		
 	
rt   c                6    | j                   j                         S r   rM  r6  s    ru   r  zReinterpretView.get_name  rN  rt   c                .    | j                   j                  S r   )r  r   r6  s    ru   r   zReinterpretView.get_device  s    {{!!!rt   c                     y r   rs   r6  s    ru   rG  zReinterpretView.get_origin_node  rK  rt   c                .    | j                   j                  S r   )r  r   r6  s    ru   r   zReinterpretView.dtype  s    {{   rt   c                @    t        | j                  j                        S r   )ry   r  r   r6  s    ru   r   zReinterpretView.get_size  s    DKK$$%%rt   c                @    t        | j                  j                        S r   )ry   r  r   r6  s    ru   r   zReinterpretView.get_stride  s    DKK&&''rt   c                     d fd}|S )Nc                T   j                   j                         }t        j                  j	                          ||             }j                   j
                  j                  j
                  k7  r5t        j                  |j
                  j                  j
                        S |S r   )r  r  rT   loadr  r   r  to_dtype_bitcast)r   r  
tmp_loaderr0  s      ru   r  z+ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!rt   r   r  r   rU   rs   r0  r  s   ` ru   r  zReinterpretView.make_loader  s    	" rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zReinterpretView.make_indexer      {{''))rt   c                    | j                   S r   r  r6  s    ru   r   zReinterpretView.get_layout  rK  rt   c                     y r   rs   r6  s    ru   r  zReinterpretView.freeze_layout  r  rt   c                    t        | j                  j                        t        | j                  j                        z  t        | j                  j                        z  S r   )r&   r  r   r   r  r6  s    ru   r  z(ReinterpretView.get_unbacked_symbol_uses  sG    !$++"2"23#DKK$6$678#DKK$6$678	
rt   c                t   t         j                  j                  j                  | j                  | j
                  j                  | j
                  j                  | j
                  j                  ||j                  n#t         j                  j                  j                  | j
                  j                        S r  )rV   r   wrapper_codecodegen_reinterpret_viewr  r  r   r   r  	writeliner   r  s     ru   r  z!ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
rt   c                     yrW  rs   r6  s    ru   r  zReinterpretView.num_reads      rt   r  r  r  r  r  r  r  r  r,  r   r  r  )ro   rp   rq   __doc__rr   r7  rH  rw  r  r   rG  r  r   r   r   r  r  r   r  r  r  r  rz  r{  s   @ru   r  r    sm    4NF

 H$" ! !&(	*

rt   r  c                  X    e Zd ZU dZded<   ed        Zd
dZeZe	d        Z
ddZddZy	)	DtypeViewz(Pretend our storage has a different typer  target_dtypec                    t        |      rRt        |      \  }}t        |j                  ||j                  |j
                  |j                        }t        ||      S t        ||      S )Nr  )r  r  )	r   r  r  r   r   r   r  r  r  )rS  r   	new_dtyper  r  r   s         ru   rR  zDtypeView.create  sd     #"7":GZ$!!!!!!J #
CCai88rt   c                P    | j                  | j                  | j                  g      S r   )rZ  r  r  r6  s    ru   rH  zDtypeView.__str__  s     		4+<+<=>>rt   c                    | j                   S r   )r  r6  s    ru   r   zDtypeView.dtype  s       rt   c                6    | j                   j                         S r   r  r   r6  s    ru   r   zDtypeView.get_size  rN  rt   c                J      j                   j                          fd}|S )Nc                z    t        j                   |       j                  j                  j                        S r   )rT   r  r  r  r   )r   rA  r0  s    ru   r  z%DtypeView.make_loader.<locals>.loader  s*    ''c
D4E4EtyyWWrt   r  r  )r0  r  rA  s   ` @ru   r  zDtypeView.make_loader  s"    		%%'	X rt   Nr  r  r  )ro   rp   rq   r  rr   rx  rR  rH  rw  r  r   r   r  rs   rt   ru   r  r    sE    29 9? H! !$rt   r  c                  .    e Zd Zed        Zedd       Zy)	SliceViewc                T   	
 t         j                  j                  
|j                         |   t	        d ||fD              r!t
        j                  	t
        j                  n
j                  	
j                  	
fd fd} ||dd      } |||      }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  2   K   | ]  }t        |        y wr   r4  r   r   s     ru   r   z0SliceView.normalize_start_end.<locals>.<genexpr>%  s     HA$Q'Hr7  c                    j                  | |      r| n | |      }j                  ||      r|}|S  ||      }|S r   )statically_known_geqr  )r   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr   s        ru   clampz,SliceView.normalize_start_end.<locals>.clamp,  s`    221e<(1eBT 
 00F  
   mU3 
  rt   c                D    | |S j                  |       }  | ||      S r   )r  )rN  r  r  rw  r  rS  dim_sizes       ru   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wrap7  s.    {++C:CeU++rt   r   )
rV   r   r   r   r  r   MinMaxevaluate_minevaluate_max)rS  r   r  startendr  r  r  r  r  r   s   `     @@@@@ru   normalize_start_endzSliceView.normalize_start_end  s     77##::<$H%h1GHHyyHyyH,,H,,H		 	, 5!Xq1eXx8czrt   c           	        t        j                        t        t         j                        sdkD  sJ 	 dk(  r|dk\  rdk(  r|S t        |j                               |r| j                  ||      \  }t        |z
  dz
  z         <   t        |      r{t        |      \  }}t        |j                        }	|	   z  |	<   t        |j                  |j                  |	|j                  |j                     z  z         }
t!        ||
      S fd}t#        ||      S # t        $ r Y w xY w)Nr   l    r1   r  c                    t        |       t              k(  sJ d|  d        t        |       } |    z  z   | <   | S )Nzwrong ndim r  )r   ry   )r   r  rs  r  steps    ru   r   z!SliceView.create.<locals>.reindexc  sP    u:X.P+eWAhZ0PP.KEsd*U2E#JLrt   r  )r   rB  rx   r   	TypeErrorry   r   r  r.   r   r  r   r  r   r   r  r  r  )rS  r   r  r  r  r  r  r  r  r  r   r   rs  s     `` `      @ru   rR  zSliceView.createA  sJ   ||D!$

+tax77	zcY.419 

%
 00CDJE3 uq!94@ #"7":GZj//0J(o4JsO$!!  !!J$5$5c$:U$BBJ #
CC	 ah@@E  		s   D3 3	D?>D?N)r1   T)ro   rp   rq   rx  r  rR  rs   rt   ru   r  r    s+    " "H (A (Art   r  c                  B    e Zd ZU ded<   ded<   d
dZddZddZddZy	)BaseConstantr  r   r  r   c                     yNrs   rs   r6  s    ru   r   zBaseConstant.get_sizer  s    rt   c                    | j                   S r   rJ  r6  s    ru   r   zBaseConstant.get_deviceu  rK  rt   c                     y r   rs   r6  s    ru   rG  zBaseConstant.get_origin_nodex  rK  rt   c                    t               S r   r+   r6  s    ru   r@  zBaseConstant.get_reads{  r&  rt   Nr  r  r  r  )ro   rp   rq   rr   r   r   rG  r@  rs   rt   ru   r	  r	  m  s"    rt   r	  c                  D    e Zd ZU ded<   ded<   ded<   ddZddZdd	Zy
)Constantr   r   r  r   r  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S r   )rT   r  r   r   r   r0  s    ru   r  z$Constant.make_loader.<locals>.loader  s    <<

DJJ77rt   r  rs   r  s   ` ru   r  zConstant.make_loader  s    	8 rt   c                     y r   rs   r6  s    ru   r  zConstant.realize  r  rt   c                F    t        | j                  | j                  |      S )N)r   r   r   )r  r   r   r  s     ru   r  zConstant.constant_to_device  s    djj

6JJrt   Nr  r  r  )ro   rp   rq   rr   r  r  r  rs   rt   ru   r  r    s#    JKrt   r  c                  <    e Zd ZU ded<   ded<   ded<   d
dZddZy	)IndexingConstantr   r   r  r   r  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S r   )rT   rC  r   r   r  s    ru   r  z,IndexingConstant.make_loader.<locals>.loader  s    >>$**djj99rt   r  rs   r  s   ` ru   r  zIndexingConstant.make_loader  s    	: rt   c                F    t        | j                  | j                  |      S )N)r   r   r   )r  r   r   r  s     ru   r  z#IndexingConstant.constant_to_device  s    djj

6RRrt   Nr  r  )ro   rp   rq   rr   r  r  rs   rt   ru   r  r    s    JSrt   r  c           	     b    t        d t        | t        j                  |      |      D              S )Nc              3  <   K   | ]  \  }}}|d k(  xs ||k(    ywr  rs   )r   leftrightr   s       ru   r   z2is_contiguous_strides_for_shape.<locals>.<genexpr>  s.      D% 		"TU]"   )r   r   r   r   )r   r
  s     ru   is_contiguous_strides_for_shaper     s5      !$N55e<e"
  rt   c                <    t         j                  | j                  z  S r   )r2   padding_alignment_bytesitemsizer\  s    ru   get_align_for_dtyper$    s    ))U^^;;rt   c                       e Zd ZdZddZddZy)r   zxAbstract base for Layout, MultiOutputLayout, NoneLayout.
    Represents the memory layout of the output of an Operation.c                >    t        t        |       j                        r   r  r6  s    ru   r   zOutputSpec.get_device  r  rt   c                >    t        t        |       j                        r   r  r6  s    ru   storage_sizezOutputSpec.storage_size  r  rt   Nr  r  )ro   rp   rq   r  r   r(  rs   rt   ru   r   r     s    C77rt   r   c                      e Zd Zd ed      f	 	 	 	 	 	 	 	 	 	 	 ddZddZeZddZddZe		 	 	 	 	 	 dd       Z
ddZdd	Zd
 Ze	d        Zd Zd Zd ZddZddZddZy)rm  Nr   c                    |t         j                  |      }|| _        || _        t	        |      t	        |      k(  sJ d| d|        t        d |D              sJ || _        || _        || _        y )Nr  	, stride=c              3  H   K   | ]  }t        |t        t        f        y wr   )rx   r   r   r  s     ru   r   z"Layout.__init__.<locals>.<genexpr>  s     <!:a$-<    ")	r   r   r   r   r   r   r   r   r  )r0  r   r   r   r   r  s         ru   r  zLayout.__init__  sx     >#66t<F
4yCK'H5ix)HH'<t<<<< $	"("rt   c                X   d}| j                   dk7  rd| j                    }| j                  j                  dnd| j                  j                   }t        |       j                   d| j                  j                   | d| j
                   d| j                   d| j                   | d	S )
NrM  r   z	, offset=:z('z', z, size=r+  r   )r  r   r   r   ro   r   r   r   )r0  r  device_index_strs      ru   rH  zLayout.__str__  s    ;;! .F!%!2!2!:2!DKKDUDUCV@WDz""#2dkk&6&6%78H7ITZZL YII;i}VHA?	
rt   c                    | j                   S r   rJ  r6  s    ru   r   zLayout.get_device  rK  rt   c                B    t        | j                  | j                        S r   )r   r   r   r6  s    ru   r)  zLayout.is_contiguous  s    .t{{DIIFFrt   c                    t        |       }|dvs| d   dk(  ryt        |t        |       |       D ]  \  }}}|dk7  s||k7  s y y)N)r      r1   FT)r   r   r"   )r
  r  ndimr  r  r   s         ru   is_channels_last_contiguousz"Layout.is_channels_last_contiguous  sa     5zvqQ!$3E:E"
 	D% qyTU]		
 rt   c                    t        | j                  t        t        j	                  t        t        | j                                          | j                        D ]  \  }}}|dk7  s||k7  s y y)Nr1   FT)r   r   reversedr   r   ry   r   )r0  r  r  r   s       ru   is_transposedzLayout.is_transposed  sc    !$KK^66tHTYY<O7PQRII"
 	D%
 qyTU]	 rt   c                   t        | j                        t        |      k(  sJ t        | j                        D cg c]5  \  }}t        j
                  j                  j                  |d      dk7  r|7 }}}|D cg c]  }| j                  |    }}|D cg c]  }||   	 }}d } ||      }dgt        |      z  }t        t        |            D ]  }||   |||   <    t        t        |      dz
        D ][  }||   ||dz      kD  }t        |t              s7t        j
                  j                  j                  ||   ||dz      kD  d      }|s[ y yc c}}w c c}w c c}w )	Nr   rp  r1   c                `    t        |       }| D cg c]  }|j                  |       c}S c c}w r   )r  r   )arr
sorted_arrelements      ru   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indices  s*    J=@A'J$$W-AAAs   +r   Trn  F)r   r   r   r   rV   r   r   r   r   rx   rl   
_shape_envrr  )	r0  r   r   r  non_1_indicesr   r?  stride_orderedexprs	            ru   r5  zLayout.is_stride_ordered  s~   4;;3u:---
 $DII.
3ww))#):a? 
 
 +88Q$++a.88#01aq11	B
 u% E
*s5z" 	1A'-ayN58$	1 s5zA~& 	A!!$~a!e'<<DdD)ww))77"1%q1u(==d 8  	 ;
 91s   :E=E!E&c                    dgt        t        t        dt        | j                        dz
                    z   }t        |      g|z   }| j                  |      S Nr   r1   )ry   r8  r   r   r   r5  r  s     ru   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,rt   c                   t        |      }t        |       dk(  r| S t        j                  st        j                  ||       r| S t        j                         }t        |d      r|j                  j                  dd      r| S t        d t        j                  | |      D              s| S t        |       }t        |      }t!        t        |             D cg c]  }d }}d||d   <   d}	t#        |dd d      D ]I  \  }
}||
dz
     }||   ||   z  }|t        j$                  kD  r||z  dk7  rt'        ||      |z  }d	}	|||<   K |	s| S t(        xj*                  dz  c_        |S c c}w )
z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   metadislike_paddingFc              3  \   K   | ]$  }t        |t        t        j                  f       & y wr   )rx   r   r   r   r  s     ru   r   z&Layout._pad_strides.<locals>.<genexpr>>  s(      
 q3./
   *,r1   N)r  T)r$  r   r2   pad_channels_lastrm  r6  rV   get_current_noder  rH  getr   r=  chainr   r   r   r   padding_stride_thresholdrG   r   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noder/  r   r   new_stridespaddedrankr   prev_idxr   s                 ru   _pad_strideszLayout._pad_strides$  s    $E*z?a''F,N,N*-
 ,,.?F+0D0D0H0Hu1
   
__Z6
 
 '
3,\:
"'J"89Qq99 &'JqM"":ab>; 	&ID#!$(+H *T(^;F777FUNa<O /%7%K	&  ))Q.)- :s   	E&c                    t        | t              sJ | j                  J | j                  | j                  | j                  | j
                        | _        y r   )rx   r   r   rY  r   r   r6  s    ru   r(  zLayout.pad_strides_  sD    $///{{&&&''TYY

Krt   c                F    t         j                  xr t        | t              S r   )r2   comprehensive_paddingrx   r   r6  s    ru   r'  zLayout.should_pad_stridesd  s    ++P
40PPrt   c                    t        | t              r| S | j                         r| j                          t        | j                  | j
                  | j                  | j                  | j                        S r   )	rx   r  r'  r(  r   r   r   r   r  r6  s    ru   as_fixedzLayout.as_fixedg  sY    dK(K""$KKJJIIKKKK
 	
rt   c                    t         j                  sJ dt        |       j                   d       | j	                         j                         S )Nzconvert z to FixedLayout first)r   r^  r   ro   r^  r  r6  s    ru   r  zLayout.make_indexeru  sG    ,, 	
tDz**++@A	
, }}++--rt   c                   | j                   |j                   k(  xrj | j                  |j                  k(  xrO | j                  |j                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S r   r   r   r   r   r  )r0  others     ru   __eq__zLayout.__eq__{  so    KK5<<' ,

ekk),		UZZ', u||+, u||+	
rt   c                X    t        | j                  | j                  | j                        S r   )r   r   r   r  r6  s    ru   r(  zLayout.storage_size  s    .tyy$++t{{SSrt   )r   r  r   r  r   r  r   zOptional[list[Expr]]r  r   r   r   r  r  r  )r
  r  r  r  r   rl   r  r   z
sympy.Expr)ro   rp   rq   r   r  rH  rw  r   r)  r  r6  r9  r5  rF  rY  r(  r'  r^  r  rc  r(  rs   rt   ru   rm  rm    s     (,qz## # 	#
 %# # 
#$	
 HG !,>	 !F- 8 8tL
Q
.
Trt   rm  c                      e Zd ZdZddZy)r  z A Tensor layout we cannot changec                      fd}|S )z1A closure containing math to read a given elementc                   t        |       t        j                        k(  sJ t        |       t        j                        k(  sJ j                  }t	        | j                  j                        D ]  \  }}}|dk7  s|||z  z   } |S rW  )r   r   r   r  r   )r   r  r   r   szr0  s        ru   r  z)FixedLayout.make_indexer.<locals>.indexer  s    u:T[[!1111u:TYY///[[F#&udkk499#E 3VR7#cFl2F3 Mrt   rs   r0  r  s   ` ru   r  zFixedLayout.make_indexer  s    	 rt   Nr  )ro   rp   rq   r  r  rs   rt   ru   r  r    s
    *rt   r  c                       e Zd ZdZdZed        Zed        Zed        Zed        Z	ed        Z
ddZdd	Zd
 Zd Zdd fdZ xZS )r   z(A Tensor layout we are allowed to changeFc                    t        |       dk(  rg S t        j                  j                  g}t	        | dd        D ]  }|j                  ||d   z          t        t	        |            S )Nr   r1   r   )r   r   rX  r  r8  r  ry   )sizesreversed_stridesr   s      ru   r   z!FlexibleLayout.contiguous_strides  sh    u:?I!GGKK=U12Y' 	AD##D+;B+?$?@	AH-.//rt   c                    t        t        t        |                   t        |      k(  s	J | |f       t        j                  j
                  }dgt        |      z  }|D ]  }|||<   || |   z  } |S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        N)r,   r   r   r   rX  r  )rm  r   next_strider  r   s        ru   fill_orderedzFlexibleLayout.fill_ordered  sx     %E
+,
50AAQE5>QAggkk&3u:% 	1A$GAJ%a0K	1 rt   c                    t        t        t        |                   t        |      k(  sJ t        |      }t        j                  | |      S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r,   r   r   r   r   rq  )rm  r   r   s      ru   rB  zFlexibleLayout.stride_ordered  sB     %E
+,
50AAAA,U3
**5*==rt   c                >   |t         j                  k(  rt        j                  | t              S |t         j
                  k(  rt        j                  | t              S |t         j                  k(  rt        j                  |       S t        j                  d|       t        )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r   channels_lastr   rB  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr   r  r  r_  )rm  memory_formats     ru    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_format  s     E///!008IJJe444!008JKKe555!44U;;IIP &%rt   c                (   t        |       t        |      k(  sJ |D cg c]+  }t        j                  j                  j	                  |      - }}t        t        t        |            |j                        }t        j                  | |      S c c}w )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        r  )
r   rV   r   r   r   r  r   __getitem__r   rq  )rm  r   r   r   s       ru   same_orderedzFlexibleLayout.same_ordered  sv     5zS[(((9?@A!''"",,Q/@@E#f+.F4F4FG
**5*== As   0Bc                   | j                  | j                  |      }| j                         r)|r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S r   )rB  r   r'  rY  r   r  r   r  )r0  r   r  r  s       ru   as_stride_orderzFlexibleLayout.as_stride_order  sn    ((E:
""$**:tyy$**MJKKJJIIKK
 	
rt   c                    |}| j                         r)|r'| j                  || j                  | j                        }t	        | j
                  | j                  | j                  || j                        S r   )r'  rY  r   r   r  r   r  )r0  r  r  r  s       ru   as_exact_strideszFlexibleLayout.as_exact_strides  s]    "
""$**:tyy$**MJKKJJIIKK
 	
rt   c                   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S r   )rq  r   r'  rY  r   r  r   r  )r0  r   r  s      ru   as_fill_orderzFlexibleLayout.as_fill_order  sl    &&tyy%8
""$**:tyy$**MJKKJJIIKK
 	
rt   c                   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S r   )r}  r   r'  rY  r   r  r   r  )r0  r   r  s      ru   as_same_orderzFlexibleLayout.as_same_order  sl    &&tyy&9
""$**:tyy$**MJKKJJIIKK
 	
rt   c                    |rt         j                  ||      }nt         j                  |      }t        |   ||||       y r   )r   rq  r   rB  r  )r0  r   r   r   r/  r  rD  s         ru   r  zFlexibleLayout.__init__  s;    $11$EG$77=Gg6rt   r  r   r  )ro   rp   rq   r  r^  r  r   rq  rB  rz  r}  r  r  r  r  r  rz  r{  s   @ru   r   r     s    2N 0 0    	> 	> & &0 
> 
>





7 7rt   r   c                  2     e Zd ZdZd fdZddZd Z xZS )NonOwningLayoutz,Is a view into the storage of another tensorc                    |j                         }t        | 	  |j                  |j                  |j
                  |j                         || _        y r   )r   rB  r  r   r   r   r   view)r0  r  r  rD  s      ru   r  zNonOwningLayout.__init__)  sA    "MMLLKKMM		
 	rt   c                >    | j                         j                         S r   )r^  r  r6  s    ru   r  zNonOwningLayout.make_indexer3  s    }}++--rt   c                    | j                   j                         j                  }|dk(  ryddlm} t
        j                  j                  j                  ||      S )Nr   Tr1   )	ALIGNMENT)	r  r   r  utilsr  rV   r   r   statically_known_multiple_of)r0  r  r  s      ru   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned6  sD    %%'..Q;$ww<<VYOOrt   )r  zUnion[BaseView, TensorBox]r   r   r  )ro   rp   rq   r  r  r  r  rz  r{  s   @ru   r  r  &  s    6.Prt   r  c                      e Zd ZdZy)CommBufferTypesymm_memN)ro   rp   rq   SYMM_MEMrs   rt   ru   r  r  ?  s    Hrt   r  c                  F     e Zd ZU dZded<   ded<   	 	 	 	 	 	 d fdZ xZS )CommBufferLayoutax  
    A layout that signifies the buffer is a comm buffer.
    In terms of striding, the layout is identical to `FixedLayout`.

    Buffers with this layout do not participate in in-place reuse - it can be
    neither the source nor the target for in-place reuse.

    For detailed motivation and usage of this layout, see
    NOTE [lowering-time collective optimization].
    r  comm_buffer_typer   
group_namec                   t        |t              st        d| d      |j                         }t        |   |j                  |j                  |j                  |j                  |j                         || _        || _        y )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).ra  )rx   r   r  r^  rB  r  r   r   r   r   r  r  r  )r0  r  r  r  fixedrD  s        ru   r  zCommBufferLayout.__init__R  s     &.1 ++1("6 
 !<<++<<<< 	 	
 !1$rt   )r  r   r  r  r  r   )ro   rp   rq   r  rr   r  rz  r{  s   @ru   r  r  C  s;    	 %$O%% )% 	% %rt   r  c                      e Zd ZU ded<    ej
                  d       Zded<    ej
                  d       Zded<   dd	Zd
 Z	ddZ
y)
NoneLayoutr  r   c                     dgS rx  rs   rs   rt   ru   r  zNoneLayout.<lambda>u  s     rt   default_factoryr  r   c                     dgS rx  rs   rs   rt   ru   r  zNoneLayout.<lambda>v  s    1# rt   r   c                     yrx  rs   r6  s    ru   r(  zNoneLayout.storage_sizex  r  rt   c                    | S r   rs   r6  s    ru   r^  zNoneLayout.as_fixed{      rt   c                    | j                   S r   rJ  r6  s    ru   r   zNoneLayout.get_device~  rK  rt   Nr  r  )ro   rp   rq   rr   r  r  r   r   r(  r^  r   rs   rt   ru   r  r  j  sG     #"'k''DD)D)))+FFIFrt   r  c                       e Zd Zd
 fdZedd       Zej                  dd       ZddZddZd Z	e
dd       Zd Zdd	Z xZS )MutationLayoutSHOULDREMOVEc                   t         |   |j                         |j                         |j	                         d        || _        | j                         j                         }t        j                  j                  |       y r   )rB  r  r  r   r   r|  
get_bufferr  rV   r   mark_buffer_mutated)r0  r|  r   rD  s      ru   r  z#MutationLayoutSHOULDREMOVE.__init__  se    &&(OO		
  ))+	##D)rt   c                6    | j                         j                  S r   )real_layoutr   r6  s    ru   r   z!MutationLayoutSHOULDREMOVE.stride  s    !(((rt   c                     y r   rs   )r0  r   s     ru   r   z!MutationLayoutSHOULDREMOVE.stride  s    rt   c                >    | j                         j                         S r   )r  r(  r6  s    ru   r(  z'MutationLayoutSHOULDREMOVE.storage_size  s    !..00rt   c                d    fd | j                         }t        |t              sJ d       |S )Nc                    t        | t              r | j                        S t        | t              r | j	                               S t        | t
              r | j                        S | S r   )rx   r  r|  r8  r  
MutableBoxr  )r|  unwrap_viewss    ru   r  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  sY    &"<=#FMM22&(+#F$6$6$899&*-#FKK00Mrt   z1MutationLayoutSHOULDREMOVE must refer to a buffer)r|  rx   r2  )r0  r  r  s     @ru   r  z%MutationLayoutSHOULDREMOVE.get_buffer  s9    	 dkk*&&) 	
?	
) rt   c                6    | j                         j                  S r   )r  r  r6  s    ru   r  z&MutationLayoutSHOULDREMOVE.real_layout       '''rt   c                   |j                          t        j                  j                  |j	                                t        |t              r|j                  }|j                          |st        j                  |j                         |j                         |j                         t        |j                         |j                               D cg c]/  \  }}t        j                  j                   j#                  ||      1 c}}      j                  }|j                          t        |j                  j$                  t&              sJ t)        |      |j                  _        |j                  S c c}}w )Nr  )r  rV   r   r  r  rx   rb   r  r  r  rR  r   r   r  r   r   r   r  r  r   r  )rS  srcdstunsafe_aliasr  r  s         ru   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into  s    	
##CLLN3c9%((C 	""~~'mmo* !$CLLNCLLN C1 GG$$11!Q7	 #  d  	#((//>:::4S9xxs   4E6c                    | S r   rs   r6  s    ru   r^  z#MutationLayoutSHOULDREMOVE.as_fixed  r  rt   c                6    | j                   j                         S r   )r|  r  r6  s    ru   r  z'MutationLayoutSHOULDREMOVE.make_indexer  r  rt   )r|  rc   r   r   r   r  )r   r   r   r   re  )r   r2  r  r  )ro   rp   rq   r  r  r   setterr(  r  r  rx  r  r^  r  rz  r{  s   @ru   r  r    sb    	* ) ) ]] 1 (    D*rt   r  c                      e Zd ZU ded<   ded<   d! fdZd"dZd#dZd$dZd%d	Ze	d&d
       Z
d'dZd(dZd)dZd*dZd+dZd Zd Zd,d!dZd!dZd!dZ	 d,	 d!dZd Zd-dZd.d/dZd Zd0dZd0dZd1dZd2dZd2dZd3dZd4d Z  xZ!S )5r2  r  r   r   r  c                F    t         |           | j                  dd        y r  )rB  r7  r2  rC  s    ru   r7  zBuffer.__post_init__  s    t4rt   c                >    | j                         j                         S r   )r   r  r6  s    ru   r  zBuffer.make_indexer  s     --//rt   c                @    | j                   sJ |        | j                   S r   r:  r6  s    ru   r  zBuffer.get_name  s    yy$yyyrt   c                >    | j                         j                         S r   )ri  r   r6  s    ru   r   zBuffer.get_device  s    ##%0022rt   c                     y r   rs   r6  s    ru   rJ  zBuffer.get_defining_op  rK  rt   c                6    | j                         j                  S r   )r   r   r6  s    ru   r   zBuffer.dtype  s     &&&rt   c                :    g | j                         j                  S r   )r   r   r6  s    ru   r   zBuffer.get_size  s    ("''((rt   c                :    g | j                         j                  S r   )r   r   r6  s    ru   r   zBuffer.get_stride  s    *"))**rt   c                6    | j                         j                  S r   )r   r  r6  s    ru   
get_offsetzBuffer.get_offset  r  rt   c                    t        | j                  t              r| j                  S t        t	        | j                        j
                        r   )rx   r  rm  r_  r   ro   r6  s    ru   r   zBuffer.get_layout  s4    dkk6*;;!$t{{"3"<"<==rt   c                    | j                   S r   r  r6  s    ru   ri  zBuffer.get_output_spec  rK  rt   c                "    | j                         S r   )rv  r6  s    ru   r  zBuffer.get_storage_numel  s    ~~rt   c                    t        | j                  t              r;t        | j                  t              s | j                  j	                         | _        y y y r   )rx   r  rm  r  r^  r6  s    ru   r  zBuffer.freeze_layout	  s>    dkk6*:KK4
 ++..0DK4
*rt   c                    t        | j                  t              sJ | j                  j                  ||      | _        y Nr0  )rx   r  r   r  r  s      ru   r  z&Buffer.freeze_layout_with_stride_order  s1    $++~666kk11%}1Urt   c                |    t        | j                  t              sJ | j                  j                  |      | _        y r   )rx   r  r   r  r  s     ru   r  z$Buffer.freeze_layout_with_fill_order  s,    $++~666kk//6rt   c                |    t        | j                  t              sJ | j                  j                  |      | _        y r   )rx   r  r   r  r  s     ru   r  z$Buffer.freeze_layout_with_same_order  s,    $++~666kk//7rt   c                    t        | j                  t              sJ | j                  j                  ||      | _        y r  )rx   r  r   r  r  s      ru   r  z'Buffer.freeze_layout_with_exact_strides  s8     $++~666kk22 3 
rt   c                    t         j                  j                  j                  t	        j
                  | j                         d            S rx  ry  r6  s    ru   r|  zBuffer.is_zero_elements#  r}  rt   c                p      j                         rt        t         j                               S  fd}|S )Nr\  c                x    j                         }t        j                  j                  xs d ||             S r  )r  rT   r  r   r   r  r0  s     ru   r  z"Buffer.make_loader.<locals>.loader+  s/    '')G88DII2GENCCrt   )r|  r
   r  r   r  s   ` ru   r  zBuffer.make_loader&  s0      "=0@AA	D rt   c                "    | j                         S r   r  r  s     ru   r  zBuffer.codegen_reference1  rP  rt   c                     y r   rs   r6  s    ru   r
  zBuffer.decide_layout4  r  rt   c                    t        | j                  t              r%| j                  j                  j	                         gS yr  )rx   r  r  r  r  r6  s    ru   r  z#Buffer.get_inputs_that_alias_output7  s/    dkk?3KK$$--/00rt   c                    t        | j                  t              r%| j                  j                  j	                         gS yr  )rx   r  r  r|  r  r6  s    ru   r  zBuffer.get_mutation_names<  s0    dkk#=>KK&&//122rt   c                6    t        | j                         g      S r   )r,   r  r6  s    ru   rA  zBuffer.get_read_namesA  s    4==?+,,rt   c                    t               S r   r+   r6  s    ru   r  zBuffer.get_unbacked_symbol_usesD  r&  rt   c                    t               S r   r+   r6  s    ru   r%  zBuffer.get_unbacked_symbol_defsG  r&  rt   c                     y r   rs   r6  s    ru   r  zBuffer.realizeJ  r  rt   c                     yr  rs   r6  s    ru   should_allocatezBuffer.should_allocateM  s    rt   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r,  r  r  )"ro   rp   rq   rr   r7  r  r  r   rJ  r  r   r   r   r  r   ri  r  r  r  r  r  r  r|  r  r  r
  r  r  rA  r  r%  r  r  rz  r{  s   @ru   r2  r2    s     
503 ' ')+(>
 1V78
 ,1
	
W	

-rt   r2  c                  <    e Zd ZddZddZej                  ZddZy)OperationBufferc                    | gS r   rs   r6  s    ru   r#  zOperationBuffer.get_outputsU  s	    vrt   c                    | S r   rs   r6  s    ru   rJ  zOperationBuffer.get_defining_opX  r  rt   c                X    t         j                  |        t        j                  |        y r   )r2  r7  r  r6  s    ru   r7  zOperationBuffer.__post_init__^  s    T"%rt   Nr*  r   r  r  )ro   rp   rq   r#  rJ  r  r  r7  rs   rt   ru   r  r  R  s     #55&rt   r  c                      e Zd ZddZy)InputBufferc                     yrW  rs   r6  s    ru   r  zInputBuffer.num_readsd  r  rt   Nr  )ro   rp   rq   r  rs   rt   ru   r  r  c  s    rt   r  c                      e Zd ZdZy)DonatedBufferaY  
    Represents a donated buffer which is a saved tensor that is not alias to any
    fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
    reuse the input tensor memory during backward since it might be used in another
    function. However, donated buffer can be inplace reused during backward
    to save memory.
    N)ro   rp   rq   r  rs   rt   ru   r  r  h  s    rt   r  c                  ,    e Zd ZU dZded<   ddZddZy)r  Nr  r  c                     d fd}|S )Nc                    j                         j                         }t        j                  t        j
                  j                  j                         j                         ||             S r   )	r   r  rT   r  rV   r   constant_namer  r  r  s     ru   r  z*ConstantBuffer.make_loader.<locals>.loaderv  sP    oo'446G88%%dmmot7K7KL rt   r  rs   r  s   ` ru   r  zConstantBuffer.make_loaderu  s    	 rt   c                    t        t        j                  j                  | j	                         |      | j
                        S N)r   r  )r  rV   r   r  r  r  r  s     ru   r  z!ConstantBuffer.constant_to_device  s/    &&t}}?
 	
rt   r  r  )ro   rp   rq   r  rr   r  r  rs   rt   ru   r  r  r  s    .2O+2
rt   r  c                  6    e Zd ZddZddZd	d
dZddZddZy)NoneAsConstantBufferc                    t               S r   r+   r6  s    ru   r@  zNoneAsConstantBuffer.get_reads  r&  rt   c                    t               S r   r+   r6  s    ru   r  z-NoneAsConstantBuffer.get_unbacked_symbol_uses  r&  rt   Nc                J    t         j                  j                  j                  S r   )rV   r   r  none_strr  s     ru   r  z&NoneAsConstantBuffer.codegen_reference  s    ww##,,,rt   c                    t        d       S NrJ  )r  r6  s    ru   ri  z$NoneAsConstantBuffer.get_output_spec  s    &&rt   c                     yr  rs   r6  s    ru   rn  z&NoneAsConstantBuffer.has_tensor_output  r  rt   r  r,  r   r  r  r  )ro   rp   rq   r@  r  r  ri  rn  rs   rt   ru   r  r    s    -'rt   r  c                  2    e Zd ZU ded<   ddZdd	dZd
dZy)r   r   rC  c                ,    t        | j                        S r   )r&   rC  r6  s    ru   r  z.ShapeAsConstantBuffer.get_unbacked_symbol_uses  s    $TYY//rt   Nc                h    t         j                  j                  j                  | j                        S r   )rV   r   r  codegen_sizevarrC  r  s     ru   r  z'ShapeAsConstantBuffer.codegen_reference  s!    ww##33DII>>rt   c                     yr  rs   r6  s    ru   rn  z'ShapeAsConstantBuffer.has_tensor_output  r  rt   r,  r   r  r  )ro   rp   rq   rr   r  r  rn  rs   rt   ru   r   r     s    
J0?rt   r   c                       e Zd ZU ded<   ddZddZddZddZddZddZ	d fd	Z
dd
ZddZddZe	 	 dd       Z	 	 d 	 	 	 	 	 d!dZe	 d"d       Zd#dZddZd$dZd$dZd%dZ xZS )&r  r.  r  c                    | j                   | j                   S t        | j                  d      r| j                  j                   S y)z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nr   )r   r  r  r6  s    ru   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_name  s7    
 99 99499f%99>>!rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  zComputedBuffer.num_reads  rb  rt   c                6    | j                   j                         S r   r  r@  r6  s    ru   r@  zComputedBuffer.get_reads  rb  rt   c                6    | j                   j                         S r   rg  r6  s    ru   rA  zComputedBuffer.get_read_names  rh  rt   c                   t        j                  t        dd      5  | j                  j	                         rTt        | j                         | j                  j                         | j                  j                               cd d d        S t        | j                         | j                  j                               cd d d        S # 1 sw Y   y xY wr]  )
r   r   r   r  r  r:   get_store_functionrO  r  r   r6  s    ru   r  zComputedBuffer.get_read_writes  s    \\.*:DA 	yy++-*++-II002II002	 	 +++-II&&(	 	 	s   A%C1CCc                    t        | j                               t        | j                               z  t        | j                               z  | j                  j                         z  S r   )r&   r   r   r  r  r  r6  s    ru   r  z'ComputedBuffer.get_unbacked_symbol_uses  sV    & "$--/2#DOO$567#DOO$567 ii0023	
rt   c                    | j                         sS| j                  t        j                  j                  vr-| j                         dk(  r| j                  j                         S t        |          S rx  )	r  r   rV   r   mutated_buffersr  r  r  rB  rC  s    ru   r  zComputedBuffer.make_loader  sW    '')		!8!88 A% 99((**w"$$rt   c                   | j                         j                         j                         }t        | j                  t
        t        t        f      r+t        | j                  j                  | j                  |      S t        | j                  t              sJ t        | j                  j                  | j                  |      S r   )r   r^  r  rx   r  r  r  r  r
   r  r   r  r  rj  s     ru   r  z!ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!8949944diiIIdii33349911499gFFrt   c                P   t        | j                  t              r{t        j                  | j
                  j                         | j
                  j                               \  \  }}}| j                         j                  }t        d |D              sJ |D cg c]_  }t        |t        j                        rCt        |j                  |D ci c]#  }|dk7  s	|t        j                  j                   % c}      a }}}|rt        | j
                  t"        t$        f      r| j
                  j'                  ||      }n|}|D cg c],  }t(        j*                  j,                  j/                  ||      . }	}ddlm}
  |
|	| j5                               S yc c}w c c}}w c c}w )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  p   K   | ].  }t        |t        j                  t        j                  f       0 y wr   )rx   r3   StarDep	MemoryDepr  s     ru   r   z0ComputedBuffer.get_fill_order.<locals>.<genexpr>  s0       1|33\5K5KLMs   46r   r1   pick_loop_orderN)rx   r  r   r3   r   r  rO  r  r  r  r   r  rS   r   r   rX  rY  r  r  r   rV   r   r   r"  	schedulerr  r   )r0  
index_varsr  r   r  rU  vr  rC  stride_lengthsr  s              ru   r   zComputedBuffer.get_fill_order  sj    dkk>2.:.M.M		,,.		0L0L0N/+(Z! ((*00E      a!7!78 177n$WPQUVPVQ_$WXE  dii$6"ii//
NKG(GMR"EIAGG$$11$@" " 7&~t}}GG# %X"s$   3F
FF6	F1F#Fc                    t        | j                  t              r5| j                         }|r| j	                  |       y | j                          y y r   )rx   r  r   r   r  r  r  s     ru   r
  zComputedBuffer.decide_layout  s@    dkk>2'')E2259""$ 3rt   c                z   t        j                  | j                  j                         | j                  j	                         d      \  }}t        j                  t        d| j                               5  t        | j                         | j                         r|n|d d |g| }d d d        g }g }g }g }|j                         D ]^  \  }}	||d   v r'|rJ |j                  |       |j                  |	       4||d   v sJ |j                  |       |j                  |	       ` ||f||ffS # 1 sw Y   xY w)Nqr`   r  r1   r   )r3   r   r  rO  r  r   r   r  r   r<   r  r  itemsr  )
r0  r   
var_rangesrr  r  reduce_vars
index_sizereduce_sizer  r   s
             ru   get_default_sizes_bodyz%ComputedBuffer.get_default_sizes_body%  sI    (::II((*DII,H,H,JSV
j \\.*;T__=NO 	'')002Ra 	D	 
!#
$$& 	&DAqDG|&&!!!$!!!$DG|#|""1%""1%	& K($[0III)	 	s   52D11D:c                     j                         \  \  }}}\  }}|r |||f|||f      \  \  }}}\  }}g |j                  j                         |t        |t              rt        |      dk(  sJ |\  }}	t        |t              sJ t        |	t              sJ t        d |	D              sJ |j                  }
|
|k(  s	J |
|f       |	D cg c]	  }|vs| }	}|	z  g |j                         t        j                  j                   t        j                        sj!                  |j#                                 fd}||z   }t%        t'                      xs t(        j*                   } |||||      \  }}} |||||      \  }}}t-        j.                  ||d      \  \  }}}t1        | ||       ||      g|||      }||f|fS c c}w )an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        r   c              3  <   K   | ]  }t        |t                y wr   )rx   r   )r   fs     ru   r   z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>t  s     Hqz!T*Hr  c           	         j                  | ||
      \  }}} ||       } |rGt        j                  j                  j	                  | |t        	| |            \  }}}t        ||      }n|}|||fS r   )_apply_loop_reorderingrV   r   r   _simplify_loopsr6   r   )x_varssupport_varsrm  simplify_loopsreindex0r   r   _pruner   index_formulasmemory_addrsr0  s            ru   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  s    (,(C(Ce\)%E8X f%F*+''*:*:*J*J,^VUK+'x
 *(H="'8++rt   pr`   )r  indexing_exprsr|   rx   rz   r   r{   ry   r   r  get_write_exprsrV   r   r  r4   PREFER_STORE_LOOP_ORDERextendget_read_exprsrN   r   r2   loop_ordering_after_fusionr3   index_vars_no_squeezer<   )r0  extra_indexing_constraintsrecompute_sizes_body_funcr  r  rr  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr6  r-  r'  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r+  r,  s   `                     @@ru   r-  z#ComputedBuffer.simplify_and_reorderF  s`   4 '')		
%Z%Z %
 *[)4*k1J	)[)[
 94..5578%15u=23q89 :T6!#63T:::14888H4GHHHH"&//&*?? #%B ? /#!>2I# # 11N0--/0ww""4)O)OP 3 3 56	,$ "K/t,--VV5V5V1V 	 (<	(
$\1 ,@{4F,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11w#s   	GGc           
     X   ddl m} |g }	 |D cg c]-  }t        j                  j                  j                  || |      / }}t        |      t        |      k(  rt        |d         t        |       k(  sJ t        t         ||||                  }|D 	cg c]  }	||	   	 }}	|t#        |      t%        |      fS c c}w # t        $ rZ t        j                  r*t        j                  dt        t        | |            |       t        t!        t        |                  }Y w xY wc c}	w )zU
        Shuffle the order of loops around to hopefully improve performance.
        r1   r  r   z%Did not simplify complex index:
%s
%s)r  r  rV   r   r   r"  r   ry   r8  	Exceptionr2   r  r  warningr{   r   r   r   r   )
r  r'  rm  r,  priority_idxr  rC  r  r   r   s
             ru   r$  z%ComputedBuffer._apply_loop_reordering  s'    	/L	, )   --dJMG  w<3|#44WQZCM :   /'5,"OPQE $))aq))l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 2B<AC D'<C A D$#D$c                6    | j                   j                         S r   r  r  r6  s    ru   r  z!ComputedBuffer.get_reduction_size      yy++--rt   c                6    | j                   j                         S r   r  r  r6  s    ru   r  z!ComputedBuffer.get_reduction_type  rG  rt   c                6    | j                   j                         S r   )r  r|  r6  s    ru   r  zComputedBuffer.is_no_op  re  rt   c                     yNTrs   r6  s    ru   r  zComputedBuffer.should_allocate  rK  rt   c                8    | j                   j                  |      S )r  r  r  r  s     ru   r  z!ComputedBuffer.constant_to_device  s    yy++F33rt   r  r  r  r  r  r,  r  )r   zCallable[..., None])r   zOptional[list[int]]r  )r   zetuple[tuple[list[sympy.Expr], list[sympy.Expr]], LoopBody, tuple[list[sympy.Expr], list[sympy.Expr]]]NN)r6  *Optional[tuple[dict[Any, Any], list[Any]]]r7  Optional[Callable[..., Any]]r   z:tuple[tuple[list[sympy.Expr], list[sympy.Expr]], LoopBody]r   r  r  r  )ro   rp   rq   rr   r  r  r@  rA  r  r  r  r  r   r
  rF   r  r-  r  r$  r  r  r  r  r  rz  r{  s   @ru   r  r    s    
K	%%*
2%G%N% J
J JD RVBFq2$Nq2 $@q2 
D	q2f  !B !BF..,4rt   r  c                  n     e Zd ZdZ	 	 	 	 	 	 	 	 d	 fdZd
dZd ZddZddZddZ		 	 d	 	 	 ddZ
 xZS )TemplateBufferzt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    c                    t         |   d |       t        j                  |      | _        || _        t        j                  j                  |       | _	        t        j                  j                  |        y r  )rB  r  InputsKernelunwrap_storageinputsmake_kernel_renderrV   r   register_bufferr   register_operation)r0  r  rW  rX  rD  s       ru   r  zTemplateBuffer.__init__  sY     	d62"11&9"4GG++D1		""4(rt   c                &    | j                  d      S )NT	normalize)r:   r6  s    ru   r  zTemplateBuffer.get_read_writes   s    ''$'77rt   c           	        | j                         | j                         j                         fd}t        j                  || j                         d|      }| j                  D ]f  j                  j                         fd}|xj                  t        j                  |j                         dd      j                  z  c_        h |S )Nc                ^    t        |      dk(  sJ t        j                   |       d      S )Nr   fake)r   rT   r  )r   r  r  r   s     ru   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummy  s,    v;!###99T75>6::rt   rs   r\  c                z    t        |      dk(  sJ t        j                  j                          |              y rx  )r   rT   r  r  )r   r  r  inps     ru   ra  z1TemplateBuffer.extract_read_writes.<locals>.dummy  s-    6{a'''8rt   T)	r  r   r  r3   r:   r   rW  r  r  )r0  r]  ra  depsr  rc  r   s       @@@ru   r:   z"TemplateBuffer.extract_read_writes  s    }}//#002	; //4==?B)
 ;; 		Cjj--/G9 JJ,::s||~rTeJ		 rt   c                6    t         j                  j                  S r   )r   rX  r  r6  s    ru   r  z!TemplateBuffer.get_reduction_size  s    ww{{rt   c                     y r   rs   r6  s    ru   r  z!TemplateBuffer.get_reduction_type  rK  rt   c                     yrL  rs   r6  s    ru   r  zTemplateBuffer.should_allocate"  rK  rt   c                *    | j                         dfd fS r  rt  )r0  r6  r7  s      ru   r-  z#TemplateBuffer.simplify_and_reorder%  s$      
 	
rt   )r  rm  rW  Sequence[IRNode]rX  r/  r   r   r  r  r  r  rO  )r6  rP  r7  rQ  )ro   rp   rq   r  r  r  r:   r  r  r  r-  rz  r{  s   @ru   rS  rS    sn    

)
) !
) /	
)
 

)82
 RVBF
$N
 $@
rt   rS  c                  H     e Zd Z	 	 d	 	 	 	 	 d fdZddZddZd	dZ xZS )
TritonTemplateBufferc           
     *   t         
|   |||       || _        | g| _        |t        j
                  j                  j                  t        j
                  j                  j                  f}t        j                  j                  j                  }||v sJ d| d|        | j                  d   j                         }| xj                  |D 	cg c]  }	t        t!        |      |	|        c}	z  c_        |r|| _        yt#               | _        yc c}	w )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currenlty our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nz$Mutated inputs are only allowed for z	 but got r   rJ  )rB  r  mutated_inputsoutputsr   rT   higher_orderflex_attentionflex_attention_backwardrV   r   current_noder|  rW  r   MutationOutputr  r,   allowed_prologue_inps)r0  r  rW  rX  rm  rt  allowed_setrr  r   r  rD  s             ru   r  zTritonTemplateBuffer.__init__4  s   " 	);<,&*V% 		&&55		&&>>K 77//66L;. 6{m9\N[. [[^..0FLL) z8#tD L &;! 	"@J 	"s   Dc                    | j                   S r   )rn  r6  s    ru   r#  z TritonTemplateBuffer.get_outputs\  r  rt   c                    | j                   S r   )rt  r6  s    ru   get_allowed_prologue_inpsz.TritonTemplateBuffer.get_allowed_prologue_inps_  s    )))rt   c                &    d| j                    d}|S )NzTritonTemplateBuffer(layout=r   r  )r0  r   s     ru   rH  zTritonTemplateBuffer.__str__b  s    ,T[[M;
rt   rO  )rm  zOptional[Iterable[IRNode]]rt  zOptional[OrderedSet[str]]r   r   r*  r  r  )ro   rp   rq   r  r#  rx  rH  rz  r{  s   @ru   rk  rk  3  s>     6:;?&

 3&
  9&
 
&
P*rt   rk  c                  n     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d
 fdZddZddZd ZddZddZ	ddZ
dd	Z xZS )ChoiceCallera.  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUDATemplateCaller.
    c                Z    t         |           || _        || _        || _        || _        y r   )rB  r  r   r  rh   description)r0  r   rh   r  r}  rD  s        ru   r  zChoiceCaller.__init__s  s0     		& 'rt   c               T    | j                         }t        j                  ||d|i      S )Nr   )to_callablerA   	benchmark)r0  r   r   algos       ru   r  zChoiceCaller.benchmark  s)    !$$T4%>>rt   c                    t         r   r  r6  s    ru   	call_namezChoiceCaller.call_name  r  rt   c                    t         r   r  r6  s    ru   r  zChoiceCaller.to_callable  r  rt   c                    t         r   r  r6  s    ru   hash_keyzChoiceCaller.hash_key  r  rt   c                    t         r   r  r6  s    ru   output_nodezChoiceCaller.output_node  r  rt   c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.rs   r6  s    ru   	info_dictzChoiceCaller.info_dict  s    	rt   c                     y)Nunsupported_choicers   r6  s    ru   autoheuristic_idzChoiceCaller.autoheuristic_id  s    #rt   )
r   r   rh   r+  r  rm  r}  r   r   r   )r   r  r  )r   rb   )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]])ro   rp   rq   r  r  r  r  r  r  r  r  r  rz  r{  s   @ru   r{  r{  j  s`    '' "' 	'
 ' 
'?""""$rt   r{  c                      e Zd ZddZy)TritonTemplateCallerBasec                    t         r   r  r6  s    ru   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  r  rt   N)r   r   )ro   rp   rq   r  rs   rt   ru   r  r    s    "rt   r  c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 d fdZed	d       Zed
d       Zej                  dd       Z
ddZddZ xZS )MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    c                    t         |   ||d |       || _        d | _        || _        t        d |D              | _        y )N)r  rW  rX  rt  c              3     K   | ]R  }t        |t              xs< t        |t        j                  j                  j
                        xr |j                   T y wr   )rx   r  r   	_inductorselect_algorithmExternKernelCallerhas_out_variant)r   choices     ru   r   z/MultiTemplateBuffer.__init__.<locals>.<genexpr>  sT      %
  v78 65??#C#C#V#VW +**%
s   AA)rB  r  _choice_timings_fn_choice_timingsoriginal_inputsr   _output_plannable)r0  r  rW  choice_timingsunfiltered_choicesrt  rD  s         ru   r  zMultiTemplateBuffer.__init__  sY     	#"7	 	 	
 #1DH%!$ %
 -%
 "
rt   c                    | j                   S )z^
        Are all possible choices TritonTemplates or Extern Kernels with out variants
        )r  r6  s    ru   output_plannablez$MultiTemplateBuffer.output_plannable  s    
 %%%rt   c                \    | j                   | j                         | _         | j                   S r   )r  r  r6  s    ru   r  z"MultiTemplateBuffer.choice_timings  s+    '#'#:#:#<D ###rt   c              #    K   t        |t        j                  j                  j                        sJ | j
                  |j
                  k(  sJ | j                  }|j                         | _        	 d  || _        y # || _        w xY wwr   )rx   r   r  r  TritonTemplateCallerr  rX  r  )r0  callerrenders      ru   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller  sp     &%//"B"B"W"WXXX{{fmm+++(("("?"?"A	-&,D#fD#s   A-B0A< 4B<	BBc                2   t        |t        j                  j                  j                        sJ | j                         |j                  j                  k(  sJ | j                         |j                  j                  k(  sJ |j                         | _        y r   )rx   r   r  r  r  r   r  r   r   r   r  rX  )r0  r  s     ru   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller  sp    &%//"B"B"W"WXXX}}&--"4"4444 FMM$8$8888"("?"?"Art   c                z    t        | j                  | j                  j                        }|| j                  |   fS )Nr  )r  r  rN  )r0  
min_choices     ru   get_min_choicez"MultiTemplateBuffer.get_min_choice  s6    ,,$2E2E2I2IJ
D//
;<<rt   )r  rm  rW  ri   r  z'Callable[[], dict[ChoiceCaller, float]]r  zlist[ChoiceCaller]rt  r  r   r   r  )r   zdict[ChoiceCaller, float])r  r  )r  r  r   r   )r   ztuple[ChoiceCaller, float])ro   rp   rq   r  r  r  r  r  r	  r
  r  r  r  rz  r{  s   @ru   r  r    s    

 
 @	

 /
  /
 

4 & & $ $
 	- 	-B=rt   r  c                  2     e Zd Z	 	 	 	 	 	 d fdZd Z xZS )CUDATemplateBufferc                D    t         |   |||       || _        || _        y r   )rB  r  workspace_sizetemplate)r0  r  rW  rX  r  r  rD  s         ru   r  zCUDATemplateBuffer.__init__  s&     	);<, rt   c                6    | j                   | j                   S dS rx  )r  r6  s    ru   r)  z%CUDATemplateBuffer.get_workspace_size  s    &*&9&9&Et""L1Lrt   )r  r   r  rX   r   r   )ro   rp   rq   r  r)  rz  r{  s   @ru   r  r    s,    !
 ! ! 
!Mrt   r  c                  ,     e Zd Zd fdZd fdZ xZS )CppTemplateBufferc                R    t         |   |||       || _        || _        d | _        y r   )rB  r  r  r  rn  )r0  r  rW  rX  r  r  rD  s         ru   r  zCppTemplateBuffer.__init__  s*    );< /3rt   c                   t        | j                  t              r]t        | j                  t              sJ | j                  d   }t        |t
              sJ |j                  }t        |t              sJ |S t        | !         S rx  )	rx   r  MultiOutputLayoutrn  r   r2  rm  rB  r   )r0  first_outputr  rD  s      ru   r   zCppTemplateBuffer.get_layout  sq    dkk#45dllH555<<?LlF333!((Fff---M7%''rt   r  r  )ro   rp   rq   r  r   rz  r{  s   @ru   r  r    s    4	( 	(rt   r  c                  Z    e Zd ZU ded<   d
dZddZedd       Zed        Z	ddZ
ddZy	)rU  r+  rW  c                   t        t        j                            }t        j                  | j                  D ]c  }t        |t              r|j                  fd|D               .t        |t              r?|j                   |j                                      e t        t        j                     fd| j                         D              }t        j                  ||t                     S )Nc              3  J   K   | ]  } |j                                 y wr   r  )r   r   r  s     ru   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>  s     BqWQZZ\2B    #c              3  J   K   | ]  } |j                                 y wr   r  )r   r  r  s     ru   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>  s!      .
(+GCLLN#.
r  )r  writesindex_exprs)r,   r3   r7   r  rW  rx   ry   updater   r  r  r#  
ReadWrites)r0  r  inputr  r  s       @ru   r  zInputsKernel.get_read_writes  s    <++,.&&[[ 	5E%&BEBBE#89		'%.."234	5 L,,- .
/3/?/?/A.
 
 &&"
 	
rt   c                6    | j                         j                  S r   r  r6  s    ru   r@  zInputsKernel.get_reads%  r  rt   c                   t        |t              r|j                  }t        |t              r|j                  }t        |t              r%t        |t
              st        j                  |      }t        |t              r| j                  |      S t        |t              r|S t        |t        t
        f      sJ |       |S r   )rx   rb   r  r1  r8  r  r  realize_inputunwrap_storage_for_inputTorchBindObjectr2  rS  r   s     ru   r  z%InputsKernel.unwrap_storage_for_input(  s    a#Aa$Aa":a+I**1-Aa#
 //22a)H!fo67::7rt   c                    g }| D ][  }t        |t              r#|D cg c]  }t        j                  |       }}nt        j                  |      }|j	                  |       ] |S c c}w r   )rx   ry   rU  r  r  )rW  
inputs_newr   r   s       ru   rV  zInputsKernel.unwrap_storage;  sj    
 	!A!T"GHI!\::1=II 99!<a 	! 	 Js   A%c                     yrL  rs   r6  s    ru   r  zInputsKernel.is_externF  rK  rt   c                     yrW  rs   r6  s    ru   r  zInputsKernel.num_readsI  r  rt   Nr  r  )r   rc   r   rc   r  r  )ro   rp   rq   rr   r  r@  rx  r  r  rV  r  r  rs   rt   ru   rU  rU    sD    
,,  $  rt   rU  c                      e Zd ZddZddZy)	NopKernelc                     yrL  rs   r6  s    ru   r  zNopKernel.is_no_opN  rK  rt   c                    t               S r   r+   r6  s    ru   r@  zNopKernel.get_readsQ  r&  rt   Nr  r  )ro   rp   rq   r  r@  rs   rt   ru   r  r  M  s    rt   r  c                  J    e Zd ZdZed        Zedd       Zed        ZddZy)	ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    c                ,	   |d   j                         }|d   j                         }t        |d   j                               }dg}||   g}d|cxk  rt	        |      k  sJ  J t        dt	        |            D ]  }||   j                         }	|j                  ||          t	        |	      t	        |      k(  sJ ||   j                         |k(  sJ ||   j                         |k(  sJ t        t	        |            D ]I  }
|
|k(  r||
   |	|
   z   ||
<   t        j                  j                  j                  ||
   |	|
         ||
<   K |j                  ||           t        j                  |      }t        j                  r$t        j!                  |||d   j"                        }t        t	        |            D ]k  }||   }t%        |      s|j'                         }t)        |t*              s5t        j-                  |j.                  |j0                        s`t3        |      } n t5        d |D              }t        j                  j6                  j8                  d   }t)        |t              sJ |du rt5        d |D              rt3        |      }t;        d t+        ||||      g       }t=        |      }g }t        t	        |            D ]  }| j?                  ||   t@        jC                  ||||   ||   d            }|jD                  j                  |       t)        ||   jF                  tH              r||   jF                  jK                         }n||   jF                  }|jM                         stO        ||   j                         jP                        stS        |      r|j                  |jU                                 t	        |      dkD  rMt        j                  jW                  |tX        jZ                        rt        j                  j]                  |       t        j                  j_                  |      |_0        | jc                  |jD                        |_"        t        j                  je                  |       |S )	Nr   r1   c              3  2   K   | ]  }t        |        y wr   )r   r  s     ru   r   z&ConcatKernel.create.<locals>.<genexpr>  s     -W1.CA.F-Wr7  Fc              3     K   | ]p  }d |j                   v xr\ |j                   d    j                  t        j                        xs- |j                   d    j                  t        j                         r yw)rN  ry  N)rH  r)  r   rt  rv  )r   args     ru   r   z&ConcatKernel.create.<locals>.<genexpr>  sq      <
  SXX --E<O<O-P W88E?00u?U?U0V<
s   A6A8)r   r   r   r   r   r  rW  )r  )3r   r   ry   r   r   r   r  rV   r   r   r  r   r   r2   r\  rm  rY  r   r   r   rx   r  r6  r   r   r"   r  rr  r   r  r1  r  r  rR  rW  r  r8  r  is_input_bufferrN   r   rM   r  r  r4   FOREACHregister_operation_listrY  r   rV  rZ  )rS  rW  r  r   r   rs  offsets_startoffsets_endr   
input_sizer4  output_strider   r  any_input_is_storage_and_layoutfx_node_argsconcat_kernelkernelop_namesinput_bufferinput_unwrappeds                        ru   rR  zConcatKernel.create[  s   %%'q	##%q	**,-}oC'#h-'''''q#f+& 	.A++-J  #/z?c(m333!9&&(E111!9'')V3333x=) 8"*1+
1"=HQK"#''"2"2"?"? Z]#HQK	 x}-	. '99(C''"//xM
 s6{# 		Aq	A$Q'K88fmmT$B8$LM		 +.-WPV-W*W'ww++003,---*e3 <
 $<
 9
 ;8DM$$	 	
 M*s6{# 	CA++q	  Cq!1;q> ! L   ''5&)..(3"().."<"<">"()..  //16!9//1667"<0 ? ? AB'	C* x=1!4!4V^=S=S!TGG++H5WW44]C"11-2F2FG	""=1rt   Nc                d   t        |t              r| j                  |j                  |      S t        |j                  t              rt        |j                  j
                  t              r|j                  j                  sy|yt        |j                               t        |j                               k(  syt        d t        |j                         |j                               D              S t        |j                  j
                  t              xr t        |j                  t               S )NFTc              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wr   r  r  s      ru   r   z=ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s3      B   88R@r  )rx   rb   can_realize_into_without_copyr  r  r  r  r  r   r   r   r   r   ExternKernelAlloc)rS  r  r  s      ru   r  z*ConcatKernel.can_realize_into_without_copy  s    c9%44SXXsCCchh 34sxx<xx00 { s~~'(C0@,AA !#.."2CNN4DE  
 #((//>: 
:HH'D
 @
 	
rt   c                L   t        |t              s&t        |      rt        |      \  }}t        ||      }t        |t              sJ |       t        |t              r| j                  |j                  |      S t        |t              r`|j                          t        |j                  d      sJ | j                  ||      r&t        |      |j                  _        |j                  S t        j                  |j                         |j!                         |j#                         t%        |j'                         |j'                               D cg c]/  \  }}t(        j*                  j,                  j/                  ||      1 c}}      }| j                  ||      S c c}}w )Nr  r  r  )rx   r  r   r  rb   r  r  r1  r  r  r  r  r  r  rR  r   r   r  r   r   rV   r   r   r  )rS  r  r  r  r  r  r  pws           ru   r  zConcatKernel.realize_into  sH   
 #/$S)"7"<%76B#/44/c9%##CHHc22c:&KKM388X...00c:"1#"6xx>>#--/__&  ?Aq   --a3	  
 C((s   4F c                     yrL  rs   r6  s    ru   r  zConcatKernel.should_allocate  rK  rt   r   r  )	ro   rp   rq   r  rx  rR  r  r  r  rs   rt   ru   r  r  U  sL    
 ^ ^@ 
 
< ) )@rt   r  c                      e Zd ZU dZded<    ej                  e      Zded<   dZ	ded	<   dZ
d
ed<   dZd
ed<    ej                  e      Zded<   dZded<   dZded<   dZded<    ej                  e      Zded<    ej                  e      Zded<   	 	 	 	 	 	 	 d9	 d: fdZd;dZd<dZd Zd Zd:dZd Zd=d>d Zd?d!Zd" Zed#        Ze	 	 d@d$       Z ed%        Z!ed&        Z"ed'        Z#e	 	 	 dA	 	 	 dBd(       Z$edCd)       Z%edCd*       Z&ed+        Z'ed,        Z(ed-        Z)d:d.Z*d/ Z+d=dDd0Z,d1 Z-d2 Z.dCd3Z/d:d4Z0d5 Z1d6 Z2d<d7Z3dEd8Z4e4Z5 xZ6S )Fr  rs   ztuple[Any, ...]constant_argsr  zdict[str, Any]r   NzOptional[ReinterpretView]output_viewr  python_kernel_namecpp_kernel_namezIterable[str]ordered_kwargs_for_cpp_kernelzFOptional[Union[torch._ops.OpOverload, torch._ops.HigherOrderOperator]]op_overloadzOptional[list[dict[str, Any]]]arg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                6   t         |   |||       || _        |r|ni | _        || _        |
| _        | j                  |       | j                  |       |	| _        | j                          i | _
        g | _        t        j                  j                  | _        y Nr  )rB  r  r  r   r  r  set_cpp_kernel_nameset_python_kernel_namer  collect_arg_kwarg_propertiesr  r  rV   r   rr  fx_node)r0  r   r  rW  r  r   r  r  r  r  r  rD  s              ru   r  zExternKernel.__init__  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww++rt   c                     | g| j                   S r   )r  r6  s    ru   r#  zExternKernel.get_outputs4  s    -t,,--rt   c                    t               S r   r+   r6  s    ru   r%  z%ExternKernel.get_unbacked_symbol_defs7  r&  rt   c                @   t        | j                  t        j                  j                        r\| j                  j
                  j                  D cg c]2  }|j                  s$|j                  |j                  |j                  d4 c}n+t        t        | j                              D cg c]  }i  c}| _        t        | j                  t        j                  j                        rP| j                  j
                  j                  D ci c]&  }|j                  |j                  |j                  d( c}ni | _        t        | j                  t        j                  j                        r| j                   sJ| j                  j
                  j                  D cg c]  }|j                  s|j                   c}| _        | j                  j
                  j                  D cg c]  }|j                  s| c}| _        y y c c}w c c}w c c}w c c}w c c}w )N)r   r   rh  )r   rh  )rx   r  r   _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typerh  r   r   rW  r  allarg_propertiesr  schema_kwargs)r0  r   r   s      ru   r  z)ExternKernel.collect_arg_kwarg_properties:  s    $**EJJ,A,AB ))11;; || FFKK%&__ $C$456"6 	$ $**EJJ,A,AB ))11;; qOO
  	 d&&

(=(=>55$($4$4$<$<$F$F6 !,,AFF62  ++33==""D ?- 76"s*   7H+	H+HH/H'H9Hc                z    t        | j                  t              r!| j                          | j	                          y y r   )rx   r  r   apply_constraintr  r6  s    ru   r
  zExternKernel.decide_layout]  s-    dkk>2!!#  3rt   c                J    t        | |      \  }}|r|j                  |       y y r   )rK   r  )r0  wrapper
origin_str_detailed_origin_strs       ru   codegen_commentzExternKernel.codegen_commentb  s*    +>tW+M(
(j) rt   c                    t         r   r  r0  r  s     ru   codegenzExternKernel.codegeng  r  rt   c                   || _         t        j                  j                  r.t	        | j
                  t        j                  j                        sy | j
                  }| j                   |j                  dk(  rU|j                  dk(  r|j                  j                  d      d   n|j                  j                  dd      }d| d| _         y |j                  j                  | _         y y )Natenrw  .r   r   z
at::_ops::z::call)r  rV   r   cpp_wrapperrx   r  r   r  r  	namespace_overloadnamero   r-  replacer  r   )r0  r  r  opnames       ru   r  z ExternKernel.set_cpp_kernel_namej  s    .ww""*ejj33+
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (rt   c                   || _         |y | j                  }|y t        |t        j                  j
                        rd|j                   | _         y |j                  j                  dd       d|j                   | _         y )Nztorch.ops.higher_order.z._ops.z.ops.r  )	r  r  rx   r   r  HigherOrderOperatorro   rp   r  )r0  r  r  s      ru   r  z#ExternKernel.set_python_kernel_name  s    "4)!!>

 > >?(??P&QD# $$,,Xw?@&//ARS #rt   c                &   | j                         x}r|j                  nt        j                  j                  }t        j                  j
                  r4t        j                  j                  j                  | j                  |      S | j                  S r   )
r   r   rV   r   device_typer  r  get_c_shim_func_namer  r  )r0  dr   s      ru   get_kernel_namezExternKernel.get_kernel_name  sn    !%!22A29L9L ww"" GG  55d6J6JFS	
 ((	
rt   c           	        t         j                  | j                         | j                         | j	                         | j                         | j                         | j                               }|j                          |S )N)r   r   r0  r1  r*  r(  )	r  rR  r   r   r  r   rG  rD  r  )r   r  s     ru   
copy_inputzExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	rt   c                H   ||d}t        j                  |      \  }g g }g }|D ]  }j                  t        |t              xr t        |t
                      d   r|j                  |       Lt        |t        j                        r5t        j                  j                  j                  j                  |d       }|j                  |        fd}	|D 
cg c]  }
| j                  |
       }}
|D ]  }
t        |
      st        |
d        g }|D ]  }
t        |
t               se|
j#                         t        j                  j$                  v r;|j                  t        j                  j$                  |
j#                                   yt        |
t               se|
j#                         t        j                  j&                  v r;|j                  t        j                  j&                  |
j#                                   t        |
t(              r!|j                  |
j+                                t        |
t,        j.                  j0                  j
                        ro|
j2                  j4                  }|
j2                  j6                  dk(  r|J |j                  t,        j8                  j:                  |   j=                                |j                  t?        |
d               |	||      \  }} ||i |}d }t        j@                  j                  x}rOtC        |t        jD                  |       tG        ||t        jD                  jH                  jK                  d	            }t        |tL        tN        f      s|gn|}|D ]~  }t        |t,        jP                        s|jR                  s+d
}t        j                  jD                  jH                  jK                  dd       x}r| d| }|t        j                  _*         ||||	|fS c c}
w )N)r   r   r   )r]  c                $   g }t        |       }t        |      }D ]9  }|r|j                  t        |              |j                  t        |             ; t        j                  |      }|j                  dg       |j                  di       fS )Nr   r   )iterr  nextpytreetree_unflattenrN  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorrU  	args_specis_arg_tensors	          ru   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_args  s    Fo.J!"56N* 8	MM$z"23MM$~"67	8
 %%fi8A55$aeeHb&999rt   Tr#  r   )r   rN  zEsparsity not handled. Please file issue for sparse inference weights.stack_tracez Found from : 
 )+r*  tree_flattenr  rx   rc   GeneratorStater   r   rV   r   r   r   create_symintnoder  r   r  r8  r  	constantstorchbind_constantsr  get_real_objr   r  irr   r   r   r   default_generatorsclone_stater   	fake_moder'   rr  r%   rH  rN  ry   rz   Tensor	is_sparsedisable_cudagraphs_reason)rS  r  r   r   binded_args	args_flattensor_argsnon_tensor_argsr  r3  r   example_argsdevice_indexnew_args
new_kwargsexample_outputr  r   example_out_lir   msgr4  r1  r2  s                         @@ru   process_kernelzExternKernel.process_kernel  s{     $v6%22;?	9%' 		,C  3'O
30O,O R ""3'c5::.''**44FFsQUFVC&&s+		,
	: 6AAs((+AA  	6A$Q'%a5	6  	  	LA a*qzz|qww?P?P/P##AGG$5$5ajjl$CDq(+JJLAGG$?$??##AGG$?$?

$MNA/##ANN$45Au11@@A xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK'	L*  .lOL*8Z8JN---9-Iq~~~F 9>1>>+>+>+B+B5+I! ntUm<  	
   	8A!U\\*q{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471	8 
 	
{ Bs    Pc           
        t        |t              sJ t        |t              r|S |j                         }t        j
                  j                  |j                               }|J |j                         }|d|j                  v rt        |j                  t              r|j                  d   j                  t        j                        s-|j                  d   j                  t        j                        r)|j!                  t#        |j%                                      n|j'                          t)        j*                  |j%                         d      \  }}|d   } |j-                         |      }t        j
                  j.                  j1                  ||      }t        j
                  j.                  j3                  ||      }	t        j
                  j.                  j5                  ||      }
t7        ||	      |
z   }||k7  rt8        j;                  d|	|
|       t<        t        |j>                  tA        |jC                         |jE                         |j%                         |	|
            S )	z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        rN  r  rU  r`   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sra  r  )#rx   r8  r  r  rV   r   r  r  rG  rH  r  r   r)  r   rt  rv  r  r"   r   r  r3   r   r  r   r!  stride_vars
offset_varrO   r  r  r_  r  r  r  r   )rS  r   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsr  r  r   r  r  expecteds               ru   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_view  s    !X&&&a)H gg  !7!7!9: # 3 3 5 "-.333=//@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

J  ]
  ,  55eZH''""..ujA!!,,UJ?Z1F:HIIR	 &%,,.kkmZZ\	
 		
rt   c                   |
t               S t        |t        j                  t        j                  j
                  j                  t        f      rt        |      S t        |t              r[t        j                  j                  t        j                  |j                  |j!                         |j#                                     S t        |t$              r|S t        |t&              r| j)                  |j*                        S t        |t,              r4t-        | j)                  |j*                        |j/                               S t        |t0              r;|j3                          t5        |j7                               r	 | j9                  |      S t        |t<              r|j3                          |S t        |t>        t        f      r|S | jA                  |      S # t:        $ r Y Vw xY w)N)rC  )r   r   r  )!r  rx   r   r   r   r   r   r   r   r  rV   r   add_tensor_constantr   r  r   r   r   r  rb   r  r  r  r   r8  r  r   r  rU  r_  r1  NonTensorObjr%  r  s     ru   r  zExternKernel.realize_inputY  sm   9'))a%**ekk&9&9&A&A3GH(a00a"77..QWWAKKM!,,.Q  a(Ha#$$QVV,,a)"&&qvv.q||~  a"IIK$Q]]_5::1== a$IIKHa,(=>?H~~a   + s   G 	G*)G*c                    t        |      r<t        |j                               dk(  r|S |j                         D ]  }|dk(  s	|c S  | j                  |      S rE  )r   r   r   r%  )rS  r   r   s      ru   require_stride1zExternKernel.require_stride1z  sT     #1<<>"a',,. Q;H ~~a  rt   c                	   ||J |j                         dv r|s|S t        |      rt        |j                         t              r}|rht        |ddt        ||      rJt        t        j                  j                  j                  |j                         j                              n||       |S t        |ddd ||       |S t        |j                         t        t        f      rf|r|j                         j                  |      s5|rCt!        ||j                         j                  |j#                               r|t%        ||      S |S t        |j                         t&              rt        |j                         j)                         t              rt+        d      t        |j                         j)                         t              rt|r-|j                         j)                         j                  |      sC|rCt!        ||j                         j)                         j                  |j#                               r|S t        |t,              rX|r|j                         j                  |      s5|r5t!        ||j                         j                  |j#                               r|S t        |t.              rt        |j0                  t2              rt        |j0                  t4              st        |j7                               rvt        |j7                         j0                  t8              sN	 | j;                  |j0                        |_        |r| j=                  |||      S |r| j?                  |||      S 	 d }|j#                         }|t        j                  j                  }tC        tE        |j#                                     D cg c]<  }|jG                  ||   d      r%|jI                  |j#                         |   d	      r|> }}|D ].  }	tJ        jL                  jN                  jQ                  ||	dd
      }0 | jS                  |      }t        |dd|||       |rt        ||      sJ |S |r<||J tJ        jL                  jN                  jU                  ||      }t%        ||      S |S # t@        $ r Y Hw xY wc c}w )N)r   r1   TF)r$  r.  r/  r  r-  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutr0  r   r   r1   )+rv  r   rx   r   r   r  r6  r   rV   r   r   
size_hintsr   r  r  r5  r  r   r!  r  r  r  r  rb   r  r8  r  r  r  rU  require_stride_orderr  r_  r   r   r  r  r   r  loweringslice_r%  rB  )
rS  r   r   r  r  expanded_dims	orig_sizer   r   r  s
             ru   require_strideszExternKernel.require_strides  s>     M$===;;=F"=H !#!,,..9 *#(-
  B!UK - ! 0 0 ; ;ALLN<Q<Q R "'&3 H *#(-%)&3&3 HALLN[/,JK1<<>;;EB!1%q||~'<'<ajjl %0 4A}E 
 ALLN,FGalln88:NK(b    : : <kJq||~99;MMeT%5)LLN668??JJL H a%q||~77>-!1<<>#8#8!**,
 Hq)$1668,qvv7%ammo6q}}335FG88@335 4   #44= 5   # .2JJL	$ww''H s1::<0133M!4DaH11!**,q/1E M  % BOO,,33AsAqAB
 NN1!''	
 5a???  (]-FFF((//9=A21mDDW ' s   15R: 'R: AS
:	SSc                *    | j                  |||      S )N)r  r  rb  )rS  r   r  r  s       ru   r  z"ExternKernel.require_exact_strides!  s!    ""]- # 
 	
rt   c                *    | j                  |||      S )N)r   r  rd  )rS  r   r   r  s       ru   r]  z!ExternKernel.require_stride_order'  s    ""1E"OOrt   c                .    | j                  |t              S r   )r]  ru  r  s     ru   require_channels_lastz"ExternKernel.require_channels_last+  s    ''+<==rt   c                .    | j                  |t              S r   )r]  rw  r  s     ru   require_channels_last_3dz%ExternKernel.require_channels_last_3d/  s    ''+=>>rt   c                    | j                  |t        t        t        t	        |j                                                       S r   )r]  ry   r8  r   r   r   r  s     ru   require_contiguouszExternKernel.require_contiguous3  s/    ''4s1::<?P9Q0R+STTrt   c                     y r   rs   r6  s    ru   r  zExternKernel.apply_constraint7  r  rt   c                   t        |t        t        f      sJ t        |t              rt        |      }| j                  sJ d       t	        |      }t	        | j                        }||k  rqt
        j                  d| j                  ||z
         t        ||      D ]>  }| j                  |   d   }|j                  ||v r||   n| j                  |   d          @ |S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   rh  )
rx   ry   rz   r  r   r  r  r  r   r  )r0  r   r   n_args
n_pos_argsr   arg_names          ru   fill_non_provided_argsz#ExternKernel.fill_non_provided_args:  s     $u...dE":D""U$UU"T,,-
 JII^  V#	 6:. ..q1&96) 8$,,Q/@ rt   c                8   t         j                  j                  rCg }d }|r]| j                  rQt	        | j
                        t	        |      k(  sJ d       | j                  D ci c]  }|j                  d      | }}t        | j
                        D ]  \  }}|*|j                  ||         }|r|j                  d      nd }n\t	        | j                        |z   }	| j                  r6|	t	        | j                        k  r| j                  |	   j                  d      nd }|j                  t         j                  j                  j                  ||              |S t        t         j                  j                  j                  | j
                        S c c}w )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )rV   r   r  r  r   r  rN  r   rW  r  r  val_to_arg_strrT  )
r0  r?  r  name_to_arg_propertiesr  r   r   proptype_r   s
             ru   codegen_const_argszExternKernel.codegen_const_args\  ss   77F
 &*",,4--.#e*< Z< 594G4G*-0CGGFOS(*& * "$"4"45 M1)5155eAh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!UKLM Mqww++::D<N<NOO%*s   $Fc                   t         j                  j                  rC| j                  7| j	                  g | j
                  | j                  | j                        }d}n| j
                  }d}g }t        |      D ]  \  }}t         j                  j                  r| j                  r|t        | j                        k  sJ d       | j                  |   j                  d      }|j                  t         j                  j                  j                  ||             |j                  t         j                  j                  j                  |              |r|j                  | j!                                |S )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rV   r   r  r  rq  rW  r  r   r   r  r   rN  r  r  rs  r2  rw  )r0  rW  need_codegen_constant_argsr   r   r   rv  s          ru   codegen_argszExternKernel.codegen_args|  s5   774#3#3#?003$++3 2 23T[[F */&[[F)-&f% 	DDAqww""**q3t7J7J3K/K CK ++A.226:AGG00??5IJAGG00??BC	D &KK//12rt   c                "   ||v r|j                  |      S || j                  v r| j                  j                  |      S | j                  r8|| j                  v r*| j                  j                  |      j                  d      S t        | d      )zGiven an argument name, queries for values in (in order):
        1. any provided kwargs for this function.
        2. the class self.kwargs member.
        3. any available default arguments in self.allarg_properties.rh  z not in self.allarg_properties)rN  r   r	  r  )r0  rp  r   s      ru   get_kwargs_valuezExternKernel.get_kwargs_value  s    
 v::h''t{{";;??8,,!!h$2H2H&H))--h7;;OLLz)GHIIrt   c           	        t         j                  j                  r| j                  t	        | j
                        dk(  rg S g }| j                  D ]  }|r|dk(  r| j                  |      }t        |t        j                        r|j                  |       H| j                  r8|| j                  v r*| j                  j                  |      j                  d      nd }|j                  t         j                  j                  j                  ||              |S | j                   j#                         D cg c]3  \  }}| dt         j                  j                  j                  |       5 }}}|S c c}}w )Nr   r   r   r<  )rV   r   r  r  r   r
  r  r|  rx   r   r   r  r	  rN  r  rs  r   r  )r0  skip_outr   rp  r  rv  ks          ru   codegen_kwargszExternKernel.codegen_kwargs  sO   77+D4F4F0G10L	F >> QE 1))(3a,MM!$  11h$BXBX6X ..228<@@H! 
 MM!''"6"6"E"Ea"OPQ(  !KK--/Aq #Qqww++::1=>?F  	s   78E4c           	        t         j                  rt        j                  j                  st        | j                               dk(  ry t        j                  j                  j                  | j                               }t        j                  j                  j                  | j                               }|j                  d| j                          d| d| d       y y y )Nr   zassert_size_stride(r  r   )r2   size_assertsrV   r   r  rR   r   r  codegen_shape_tupler   r  r  )r0  r  r   r   s       ru   codegen_size_assertsz!ExternKernel.codegen_size_asserts  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF%dmmo%6bbJ (;rt   c                N    | j                         }| j                         }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r   r   )r0  _size_strides      ru   get_group_stridezExternKernel.get_group_stride  s*     //#r{G##rt   c                   t         j                  j                  }| j                         }| j	                         }|D cg c]  }|j                  |       }}t        t        |            D cg c]  }t        d|        }}t        t        t        |            |j                  d      }t        |      D 	ci c]  \  }}	|	|
 }
}}	t        t        |
            D cg c]  }|
|   	 }}|D cg c]  }||   	 }}| j                         } ||      }t         j                  j                  j                  |||g      \  }}}t        d      \  }}t        t!        | ||D cg c]
  } ||       c}                  }t#        t%        j&                  |      |      }|t)        |      fS c c}w c c}w c c}	}w c c}w c c}w c c}w )zC
        Manually get canonicalization of the output index
        r"  T)r   r  c)rV   r   r   r   r   r   r   r   rP   r  r|  r   r  r%  r;   r{   r   rS   r   rB  rz   )r0  r   rm  r  r   r   r  index_orderr   r   r   r   r  r   	new_sizesr   r*  r   add_varreplacements                       ru   canonicalizezExternKernel.canonicalize  s   
 77##//#29:Q8%%a(::;@U;LMa(1QC1M
MU3w<0g6I6ISWX+4[+ABxsC#s(BB$)#f+$67q77-23jm3
3##%
#%&WW%5%5%E%Ew&
"	7F !%
73z7	3R1GAJ3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F5/F:>F?$G6G
+Gc                    t        t        j                            }| j                  D ]  }|t	        |      z  } | j
                  j                         D ]  }|t	        |      z  } |S r   )r,   r   r   r  maybe_free_unbacked_symbolsr   r|   )r0  rU  r  s      ru   r  z%ExternKernel.get_unbacked_symbol_uses  sk     u||$&%% 	2C,S11A	2;;%%' 	2C,S11A	2rt   c           
     "   t        | dd       }d|g}|t        j                  |       D cg c]'  }|j                   dt        | |j                         ) c}z  }|j	                  d| j
                         | j                  |      S c c}w )Nr  zpython_kernel_name=r<  r=  )r   r  fieldsr   r  r*  rZ  )r0  kernel_namerW  r  s       ru   rH  zExternKernel.__str__  s    d$8$?!+1
 	$++D1
 zzl!GD%**567
 	
 	|D$4$4#789u%%
s   ,Brs   NNNNrs   Nr  r*  r,  r   r  r  r   r   )r  r  r   r   )r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])NNF)r   zOptional[Sequence[int]]r  r  r  )r?  r'  r  )7ro   rp   rq   r  rr   r  r  r{   r   r  r  r  ry   r  r  r  r  r  r  r  r#  r%  r  r
  r  r  r  r  r#  r  r%  rx  rM  rU  r  rZ  rb  r  r]  rg  ri  rk  r  rq  rw  rz  r|  r  r  r  r  r  rH  rw  rz  r{  s   @ru   r  r     sv   %'M?'.[..tDFND-1K*1(,,%)O]) 4E;3D3D4!= 
 	    6:N29<@9@<MK<M<M=9  .?[->->t-T*T &(, 
,<.!F!
*
";0
 
 
 k

k
 k
Z B
 B
H ! !@ ! !  *.6:Z 'Z 4	Z Zx 
 

 P P > > ? ? U U DP@4J:	$'>
& Hrt   r  c                  B     e Zd ZddZ	 	 	 	 	 	 	 d	 d fdZddZ xZS )ExternKernelOutc                   | j                  |       g | j                         | j                  d      }| j                         }t        j
                  j                  r| j                  dk(  rd}n| j                         }| j                         x}r|j                  nt        j
                  j                  }|j                  || j                         | j                  r| j                  j                         nd ||       y )NT)r~  ztorch::inductor::_mm_plus_mmaoti_torch__mm_plus_mm_out)r  rz  r  r#  rV   r   r  r  r   r   r   generate_extern_kernel_outr  r  )r0  r  r   r  r"  r   s         ru   r  zExternKernelOut.codegen  s    W%J""$Jt':':D':'IJ**,GG$$(FF 7K..0K!%!22A29L9L**""$484D4DD..0$	
rt   c
                    t         
|   d || j                  |      ||xs i d ||||	
       t        j                  j                  |       | _        t        j                  j                  |        y r   )rB  r  rV  rV   r   rY  r   rZ  )r0  r  rW  r  r   r  r  r  r  r  rD  s             ru   r  zExternKernelOut.__init__#  si     	'Lb)	
 GG++D1		""4(rt   c                     yrL  rs   r6  s    ru   r  zExternKernelOut.should_allocate>  rK  rt   r  r  r  )ro   rp   rq   r  r  r  rz  r{  s   @ru   r  r    s3    
2 &() 
)6rt   r  c                        e Zd Zd fdZ xZS )RandomSeedsc                   t        j                  t         j                        }t        |   t        |t         j                  |g      g |j                  |j                  |ggddt        j                  j                         y )Nr  zaten.randint.low_outzat::_ops::randint_low_out::call)r  rW  r  r  r  r  )r   rb  rD  rB  r  r  r  rj  r  randintlow_out)r0  countr   limitsrD  s       ru   r  zRandomSeeds.__init__C  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
rt   )r  r   r   r  r   r   ro   rp   rq   r  rz  r{  s   @ru   r  r  B  s    
 
rt   r  c                  F     e Zd ZddZ	 	 	 	 	 	 d	 d fdZddZd Z xZS )r  c                   | j                  |       g | j                         | j                         }t        j                  j
                  j                  | |       t        | j                  t              r| j                  |       y y r   )r  rz  r  rV   r   r  generate_extern_kernel_allocrx   r  rm  r  r0  r  r   s      ru   r  zExternKernelAlloc.codegenW  sl    W%=""$=t':':'<=	99$Edkk6*%%g. +rt   c	                    t         	|   d || j                  |      ||xs i d ||||
       g | _        t        j
                  j                  |       | _        t        j
                  j                  |        y r   )	rB  r  rV  rn  rV   r   rY  r   rZ  )
r0  r  rW  r  r   r  r  r  r  rD  s
            ru   r  zExternKernelAlloc.__init__^  sp     	'Lb)	
 ')GG++D1		""4(rt   c                     yr  rs   r6  s    ru   r  z!ExternKernelAlloc.should_allocate|  r  rt   c                    t         r   r  r6  s    ru   r  z"ExternKernelAlloc.apply_constraint  r  rt   r  )rs   NNNrs   Nr  )ro   rp   rq   r  r  r  r  rz  r{  s   @ru   r  r  V  s5    / &() 
)<"rt   r  c                  :     e Zd ZdZd fdZddZd ZddZ xZS )	rs  zP
    An output buffer that represents the mutation of a pre-existing buffer
    c                    t         |   d |       |j                         }t        j                  j                  |       |g| _        || _        t        j                  j                  |       | _	        y r  )
rB  r  r  rV   r   r  mutation_namesmutating_noderY  r   )r0  r  mutated_noder  mutated_node_namerD  s        ru   r  zMutationOutput.__init__  s`    d62(113	##$5601(5GG++D1	rt   c                    | j                   S r   )r  r6  s    ru   rJ  zMutationOutput.get_defining_op  s    !!!rt   c                    | j                   S r   )r  r6  s    ru   r  z!MutationOutput.get_mutation_names  r  rt   c                     yr  rs   r6  s    ru   r  zMutationOutput.should_allocate  r  rt   )r  r  r   r   r  r  )	ro   rp   rq   r  r  rJ  r  r  rz  r{  s   @ru   rs  rs    s    2"#rt   rs  c                  v     e Zd ZU dZi Zded<   e	 d	 	 	 	 	 	 	 dd       Z	 d	 	 	 	 	 	 	 	 	 d	 fdZd
dZ	 xZ
S )TMADescriptora$  
    An IR node representing a host-side TMA descriptor in the Triton API
    (the ones obtained via create_{1d,2d}_tma_descriptor calls). Mostly
    useful for user-defined Triton kernels relying on host-side TMA; but
    can, in principle, be used for Inductor's Triton templates, too.
    zdict[Any, TMADescriptor]_CACHEc                    t        |      |||f}|| j                  vrt        ||||      | j                  |<   | j                  |   S r   )idr  r  )rS  r  r  
block_dimselement_sizer   s         ru   rR  zTMADescriptor.create  sH     &z4\:cjj +FD*lSCJJsOzz#rt   c           
     N   t        |      dv sJ t        |      t        |      k(  sJ ||j                         j                  }|| _        || _        || _        || _        t        | j                        | _        |g}g | j                  | j
                  | j                  }t        | %  d t        t        ||j                                     |t        |      d        t        j                  j!                  |       | _        t        j                  j%                  |        y )N)r1   r   r  )r   r   r#  r  r  r  r  rW  rB  r  r  r  r   rz   rV   r   rY  r   rZ  )r0  r  r  r  r  rW  r  rD  s          ru   r  zTMADescriptor.__init__  s    4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	 !,,. - 	
  GG++D1		""4(rt   c                &    |j                  |        y r   )generate_tma_descriptorr  s     ru   r  zTMADescriptor.codegen      ''-rt   r   )r  rc   r  list[Union[int, torch.SymInt]]r  r  r  r  )
r  rc   r  r  r  r  r  r  r   r   r  )ro   rp   rq   r  r  rr   rx  rR  r  r  rz  r{  s   @ru   r  r    s     (*F$) '+

 -
 3	

 $
 
" '++)+) -+) 3	+)
 $+) 
+)Z.rt   r  c                  V     e Zd Zd ZddZd	 fdZd	dZ	 	 d fdZd
dZddZ	 xZ
S )UserDefinedTritonKernelc                D   ddl m} ddlm} |j	                  | j
                        g }g }g }t        |      rt        d      r%|j                  fdj                  D               n)t        d      sJ |j                  j                         t        d      r:j                  D ]*  }|j                  j                  j                  |          , n)t        d      sJ |j                  j                         j                   }j                  |||fS )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  P   K   | ]  }j                   j                  |     y wr   )r   	arg_names)r   r   r  s     ru   r   zBUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>  s%      */0FII''**s   #&restore_value	reset_idxreset_to_zero)triton.runtime.autotunerr  *torch._higher_order_ops.triton_kernel_wrapr  
get_kernel
kernel_idxrx   r  r2  r  r  r  r  r   r  r  configs)r0  r  r  r  restore_value_argsreset_to_zero_argsr   r  s          @ru   get_kernel_and_metadataz/UserDefinedTritonKernel.get_kernel_and_metadata  s   6P"--doo>(*(*fi( v}-")) *4:4F4F*  v777"))&*>*>?v{+)) FA&--fii.A.A!.DEF v777"))&*>*>?nnGYYFw 24FFFrt   c           
        ddl m} | j                         \  }}}}|j                  ||| j                  ||| j
                        \  }}}	| j                  D 
ci c]  }
|
| j                  |
       }}
t        |j                  D cg c]  }|j                  |    c}      }g }g }g }t        j                  |j                         t        t        j                  d      |	            D ]C  \  }}|j!                  |       t#        |t$              r?|j!                  |j'                                |j!                  |j)                                gt#        |t*        t,        t.        t0        j2                  f      r,|j!                  |       |j!                  t5        |             ||v r'|j!                  d       |j!                  t*               |B	  |       r(|j!                  d       |j!                  t*               |j7                          ,t9        dt5        |       d|        | j;                  |       |j=                  |||||d| j?                                y c c}
w c c}w )	Nr   )triton_version_uses_attrs_dictrM  r   zUnsupported arg type: r   T)	arg_typesraw_argstriton_metar   r   ) torch._inductor.utilsr  r  !define_user_defined_triton_kernelr   gridr  r|  r,   
constexprsr  r=  rO  r  r   repeatr  rx   rc   r  r   r   r  rl   r   r   r   rQ  r_  r  generate_kernel_callr   )r0  r  r  r  r  r  r  new_namer  extra_launch_argsr  
named_argsr   constexpr_namesr   r  raw_args_filteredr   r  s                      ru   r  zUserDefinedTritonKernel.codegen  sA   H ((*	
 55KKII
		
 261S1S
,-At$$Q''

 
 %6CTCT%Uaf&6&6q&9%UV!	')"I$4$4R$8:K L
 	WID# $$S)#v&C1134  1C#udEJJ!?@C   c+( B  % 23KKO$$S)%))+),B49+RPSu*UVV?	WB 	W%$$&#??$ 	% 	
U
 &Vs   I)I.c                L    t         |          t        | j                        z  S r   )rB  r  r&   r  rC  s    ru   r  z0UserDefinedTritonKernel.get_unbacked_symbol_usesO  s"     w/14I$))4TTTrt   c                    t               S r   r+   r6  s    ru   r%  z0UserDefinedTritonKernel.get_unbacked_symbol_defsT  r&  rt   c                  g }i }g }|j                         D ]  \  }}	t        |	t              rXt        j	                  | j                  |	            }
||v rt        j                  |
g||    }
|j                  |
       |
||<   n|j                  |	       |	||<    t        |      dk7  sJ |d   j                         | _        t        | 5  d t        | j                        |t        |      |       || _        || _        | j%                         \  }}}}|j&                  D cg c]	  }||v s| c}| _        ddlm} t        |      dkD  r|d   j.                  ni } ||i ||      D cg c]  }||   	 c}| _        | j0                  D cg c]#  }t3        t        | j                        ||       % c}| _        t6        j8                  j;                  |        y c c}w c c}w c c}w )Nr   rJ  )identify_mutated_tensors)r  rx   rb   rU  r  r  r  rR  r  r   r   r   rB  r  r  rz   r  r  r  r  r  r  r  r   mutable_argsrs  r  rV   r   rZ  )r0  r  r  tma_descriptor_metadatakernel_argsrW  r   r  r  r  r   r  r  r   r  r  autotuned_kwargsr   r  rD  s                      ru   r  z UserDefinedTritonKernel.__init__W  s    %%' 		DAq!Y' 99$:L:LQ:OP//%,,QL1H1KLAa q	$$Q'q			 6{aQi**,dkk*- 	
 %	 $ < < >A "++.
sk/AC.
* 	X03Gq0@71:,,b 0;;;*:;
 
 ((!
 :T[[93E!
 	
""4(%.

!
s   '	G#1G#1G((G-c                ,    t        | j                        S r   )ry   r  r6  s    ru   r#  z#UserDefinedTritonKernel.get_outputs  s    D))**rt   c                    | j                   S r   rJ  r6  s    ru   r   z"UserDefinedTritonKernel.get_device  rK  rt   r  r,  r*  r  )ro   rp   rq   r  r  r  r%  r  r#  r   rz  r{  s   @ru   r  r    s3    G>I
VU
3)	3)j+rt   r  c                  B     e Zd ZdZddZddZd Zd	dZd fdZ xZ	S )
InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    c                   d | j                   D        \  }t        j                  j                  r\|j	                  | j                          d| ddj                  t        t        | j                               d|j                          y |j	                  | j                          d| ddj                  t        t        | j                               d|j                          y )Nc              3  <   K   | ]  }|j                           y wr   r  r   r   s     ru   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     ;!##%;r  rS  r  z, NULL)r   )rW  rV   r   r  r  r#  rV  rT  reprr  ending)r0  r  r   s      ru   r  z InplaceBernoulliFallback.codegen  s    ;t{{;77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klrt   c                     yr  rs   r6  s    ru   r  z(InplaceBernoulliFallback.should_allocate  r  rt   c                >    | j                   d   j                         gS rx  rW  r  r6  s    ru   r  z+InplaceBernoulliFallback.get_mutation_names      A'')**rt   c                    t               S r   r+   r6  s    ru   r%  z1InplaceBernoulliFallback.get_unbacked_symbol_defs  r&  rt   c                ^   t         |   d t        |j                               | j	                  |g      ||       t
        j                  j                  |j                                t
        j                  j                  |       | _
        t
        j                  j                  |        y )NrJ  r  )rB  r  r  r   rV  rV   r   r  r  rY  r   rZ  )r0  r  r   r  rD  s       ru   r  z!InplaceBernoulliFallback.__init__  s~    alln-$# 	 	
 	
##AJJL1GG++D1		""4(rt   r  r  r,  
ro   rp   rq   r  r  r  r  r%  r  rz  r{  s   @ru   r  r    s&    +
) 
)rt   r  c                  Z     e Zd ZdZddZd	dZd Zd
dZ	 	 d fdZe	ddd       Z
 xZS )InplaceCopyFallbackr  c                R    | j                         \  }}}|j                  |||       y r   )rz  codegen_device_copy)r0  r  r  r  non_blockings        ru   r  zInplaceCopyFallback.codegen  s)    #'#4#4#6 c<##Cl;rt   c                     yr  rs   r6  s    ru   r  z#InplaceCopyFallback.should_allocate  r  rt   c                >    | j                   d   j                         gS rx  r  r6  s    ru   r  z&InplaceCopyFallback.get_mutation_names  r  rt   c                    t               S r   r+   r6  s    ru   r%  z,InplaceCopyFallback.get_unbacked_symbol_defs  r&  rt   c                   t         |   d |||dd       t        j                  j	                  |d   j                                t        j                  j                  |       | _        t        j                  j                  |        y )Nz
aten.copy_aoti_torch_copy_)r  r  r   )	rB  r  rV   r   r  r  rY  r   rZ  )r0  r  rW  r  rD  s       ru   r  zInplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(rt   c                    ||fD cg c]  }| j                  |       }}|f}t        t        |j                               ||      }|S c c}w r  )r  r  r  r   )rS  r  r  r  r   rW  r  r  s           ru   rR  zInplaceCopyFallback.create  sV    14c
;1###A&;;%$cnn./

  <s   Ar  r  r,  r  )r  rl   )ro   rp   rq   r  r  r  r  r%  r  rx  rR  rz  r{  s   @ru   r  r    s?    <+)
 
)$  rt   r  c                  6    e Zd ZdZddZd	dZd Zd
dZd	dZy)MutatingFirstArgExternKernelr  c                    g d | j                   D        t        t        | j                        }|j	                  | j                          ddj                  |       d|j                          y )Nc              3  <   K   | ]  }|j                           y wr   r  r  s     ru   r   z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s     9a!!#9r  rS  r  r   )rW  rT  r  r  r  r#  rV  r  )r0  r  argrefss      ru   r  z$MutatingFirstArgExternKernel.codegen  sl    
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
rt   c                     yr  rs   r6  s    ru   r  z,MutatingFirstArgExternKernel.should_allocate  r  rt   c                >    | j                   d   j                         gS rx  r  r6  s    ru   r  z/MutatingFirstArgExternKernel.get_mutation_names  r  rt   c                    t               S r   r+   r6  s    ru   r%  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  r&  rt   c                     yrL  rs   r6  s    ru   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effects  rK  rt   Nr  r  r,  )	ro   rp   rq   r  r  r  r  r%  r  rs   rt   ru   r  r    s     
+rt   r  c                        e Zd Zd fdZ xZS )ResizeStorageBytesc                *   t        |t              sJ d       t        |   d t	        |j                               | j                  |g      |f       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j                  |        d| _        d| _        t        j                  j                   j#                  |j$                  j                                y )NzTODO: dynamic shapesrJ  )r  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)rx   r   rB  r  r  r   rV  rV   r   r  r  rY  r   rZ  r  r  never_reuse_buffersr  r  )r0  variablers  rD  s      ru   r  zResizeStorageBytes.__init__	  s    (C(@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG	##''(>(>(@Art   r  r  r{  s   @ru   r  r    s    B Brt   r  c                  (     e Zd Zd fdZddZ xZS )SetSourceTensorKernelc                   |j                          t        | 	  |j                         ||gdt        j
                  j                  j                  j                         t        j                  j                  j                  |j                  j                                t        j                  j                  j                  |j                                t        j                  j                  j                  | j                                |j                         }t!        t#        |      ||       t!        t#        |      ||       g| _        y )Nz!torch.ops.aten.set_.source_Tensor)r  r  rJ  )r  rB  r  r   r   rT   r  set_source_TensorrV   r   r  r  r  r  r   rs  r  r  )r0  self_tensorstorage_tensorr   rD  s       ru   r  zSetSourceTensorKernel.__init__  s    $$&%%'.)B		++99	 	 	
 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4k4H:V4ndK!
rt   c                v    | j                   d   j                         | j                   d   j                         gS rE  r  r6  s    ru   r  z2SetSourceTensorKernel.get_inputs_that_alias_output+  s/    A'')4;;q>+B+B+DEErt   r  r  )ro   rp   rq   r  r  rz  r{  s   @ru   r  r    s    
"Frt   r  c                  X     e Zd ZdZd
dZddZd ZddZddd	 	 	 	 	 	 	 d fd	Z xZ	S )ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    c           
        | j                   d   }t        j                  j                  rddd}||v r||   }| j                  rd | j
                  D        \  }}}n%d | j
                  D        \  }}| j                  d   }|j                  ||| j                  d   ||g| j                  | j                  | j                  || j                                y )	Nr<  r  r  )r  multiplyc              3  <   K   | ]  }|j                           y wr   r  r  s     ru   r   z*ScatterFallback.codegen.<locals>.<genexpr>?  s     Jq224Jr  c              3  <   K   | ]  }|j                           y wr   r  r  s     ru   r   z*ScatterFallback.codegen.<locals>.<genexpr>A  s     EA!--/Er  r1   r   )r   rV   r   r  src_is_tensorrW  r  generate_scatter_fallbackr  r  r  )r0  r  r<  get_operator_enumr   r   r  s          ru   r  zScatterFallback.codegen6  s    X&77(-6 B***62JdkkJOQsEEJQ$$Q'C))""1%uc2  ##!	
rt   c                     yr  rs   r6  s    ru   r  zScatterFallback.should_allocateM  r  rt   c                >    | j                   d   j                         gS rx  r  r6  s    ru   r  z"ScatterFallback.get_mutation_namesP  r  rt   c                    t               S r   r+   r6  s    ru   r%  z(ScatterFallback.get_unbacked_symbol_defsS  r&  rt   NTr<  include_selfc          
     f   t        |t              | _        | j                  r%|||fD cg c]  }| j                  |       }	}|f}
n$||fD cg c]  }| j                  |       }	}||f}
t        |   d t        |j                               | j                  |	      |
||dt        |      ddg|       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j!                  |        y c c}w c c}w )NrJ  r.  r<  r/  )r  r  r  )rx   rb   r(  r  rB  r  r  r   rV  r   rV   r   r  r  rY  r   rZ  )r0  r  r   r  r   r  r<  r/  r   tensorsr  rD  s              ru   r  zScatterFallback.__init__V  s    (Y7 78%oFt))!,FGF FM78%jAt))!,AGA #JMalln-(|<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D)D.r  r  r,  )r  r   r<  r  r/  rl   r   r   r  r{  s   @ru   r#  r#  /  sV    
.+ !%!!) 	!) !) !) 
!) !)rt   r#  c                  B     e Zd ZdZddZddZd Zd	dZd fdZ xZ	S )
IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    c                   d | j                   D        ^}}}g }t        |      }t        | j                        D ]b  \  }}| j                  |   |j	                  t        |             0|j	                  t        j                  j                  j                         d  |j                  | j                         |||g| j                           y )Nc              3  <   K   | ]  }|j                           y wr   r  r  s     ru   r   z+IndexPutFallback.codegen.<locals>.<genexpr>  s     &Rq':':'<&Rr  )rW  r(  r   r  r  r)  rV   r   r  r  generate_index_put_fallbackr#  rw  )	r0  r  r   r|   valid_indicesr  iter_valid_indicesr   r   s	            ru   r  zIndexPutFallback.codegen  s    &Rdkk&R#F]!-0dll+ 	>DAq||A*t$678qww33<<=		> 	,++  "Aw	
9=9P9P9R	
rt   c                     yr  rs   r6  s    ru   r  z IndexPutFallback.should_allocate  r  rt   c                >    | j                   d   j                         gS rx  r  r6  s    ru   r  z#IndexPutFallback.get_mutation_names  r  rt   c                    t               S r   r+   r6  s    ru   r%  z)IndexPutFallback.get_unbacked_symbol_defs  r&  rt   c           	     
   || _         |D cg c]  }||	 }}||g|D cg c]  }| j                  |       }}d}	t        
|   d t	        j                               | j                  |      |fd|	|       t        j                  j                  | j                  d   j                                t        j                  j                  |       | _        t        j                  j                  |        y c c}w c c}w )Naoti_torch_index_put_outrJ  zaten.index_put_)r  r  r  r   )r  r  rB  r  r  r   rV  rV   r   r  rW  r  rY  r   rZ  )r0  r  r   r  r|   
accumulater   r7  r1  r  rD  s             ru   r  zIndexPutFallback.__init__  s    $+=qq}==34f2M}2MNQ4%%a(NN4alln-(M0+# 	 	
 	
##DKKN$;$;$=>GG++D1		""4( >Ns   C;C;D r  r  r,  r  r{  s   @ru   r3  r3  z  s&    
+) )rt   r3  c                  $    e Zd Zed        ZddZy)
DeviceCopyc                   |j                         sKt        d |j                         D              r+t        j                  j
                  s|j                  |      S t        j                  j                  |       t        j                  j                  |j                                t        d       |f}t        t        ||j                         |j                               | j!                  |      g|      S )Nc              3  T   K   | ]   }|t         j                  j                  v  " y wr   )rV   r   r8  r  s     ru   r   z$DeviceCopy.create.<locals>.<genexpr>  s     GqA***Gs   &(zDeviceCopy in input programr  )r  r   rA  r2   aot_inductoruse_runtime_constant_foldingr  rV   r   add_device_infor   rJ   r@  r   r   r   r  )rS  r   r   r  r  s        ru   rR  zDeviceCopy.create  s     GA4D4D4FGG''DD''//	'	/78%kkmZZ\
 q!"
 	
rt   c                   | j                         }t        |      dk(  sJ | j                  r2|j                  |d   | j                  j	                         |d          y |j                  |d   | j	                         |d          y )Nr   r   r1   )rz  r   r  r  r  r  s      ru   r  zDeviceCopy.codegen  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Srt   Nr  )ro   rp   rq   rx  rR  r  rs   rt   ru   r@  r@    s    
 
.Trt   r@  c                  D     e Zd ZdZddZddZd	 fdZd
dZd	dZ xZ	S )r~   z;
    The result of a call to aten._local_scalar_dense.
    c                    t               S r   r+   r6  s    ru   r@  zDynamicScalar.get_reads  r&  rt   c                     yr  rs   r6  s    ru   r  zDynamicScalar.should_allocate  r  rt   c                    |j                          t        | 	  d t        t	        j
                  d            | j                  |g             || _        || _        y Nr   rJ  )	r  rB  r  r  r   r   rV  symkeypath)r0  rL  rM  r  rD  s       ru   r  zDynamicScalar.__init__  sK    *ELL$78$:M:Mtf:U	
 rt   c                .    t        | j                  g      S r   )r,   rL  r6  s    ru   r%  z&DynamicScalar.get_unbacked_symbol_defs  s    488*%%rt   c                &    |j                  |        y r   )codegen_dynamic_scalarr  s     ru   r  zDynamicScalar.codegen  s    &&t,rt   r  r  r  r,  )
ro   rp   rq   r  r@  r  r  r%  r  rz  r{  s   @ru   r~   r~     s!    &-rt   r~   c                  J     e Zd ZdZddZd	dZd
 fdZd	dZd Zd
dZ	 xZ
S )r   z5
    The result of a call to aten._assert_scalar
    c                    t               S r   r+   r6  s    ru   r@  zAssertScalar.get_reads  r&  rt   c                     yr  rs   r6  s    ru   r  zAssertScalar.should_allocate  r  rt   c                ~    t         |   d t        t        j                  d            g        || _        || _        y rK  )rB  r  r  r   r   scalarrL  )r0  rU  rL  rD  s      ru   r  zAssertScalar.__init__  s7    ell512	
 rt   c                     yrL  rs   r6  s    ru   r  zAssertScalar.has_side_effects  rK  rt   c                ,    t        | j                        S r   )r&   rU  r6  s    ru   r  z%AssertScalar.get_unbacked_symbol_uses   s    $T[[11rt   c           	        t         j                  sy t        t        | j	                                     }t
        j                  j                  rad| d}t
        j                  j                  j                  | j                  d      }|j                  d| d| j                   d| d       y t
        j                  j                  j                  | j                  d      }|j                  d	| d
       |j                  dt        | j                         d       |j                  | j                          d       y )Nzstd::to_string(r   F)rY  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not r/  z    raise RuntimeError(z = None)r2   scalar_assertsr)  r(  r  rV   r   r  r  codegen_cpp_sizevarrU  r  rL  codegen_python_sizevarr  r  )r0  r  symbol
symbol_strsizevars        ru   r  zAssertScalar.codegen  s   $$ d488:;<77*6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G y23 7TXX7GqIJ  19:rt   r  r  r  )ro   rp   rq   r  r@  r  r  r  r  r  rz  r{  s   @ru   r   r     s&    	2;rt   r   c                  "    e Zd ZU ded<   ded<   y)ExternKernelNoder   r   zexport_schema.Noder   Nrn   rs   rt   ru   r`  r`     s    
I
rt   r`  c                       e Zd Z	 ddd	 d fdZddZddZd Zed        Zd Z	d	 Z
d
 Zd ZddZedd       Zed        Z fdZ xZS )FallbackKernelNr  c               \    t            |t        |      t        |      |       d _        | _        t        |t        j                  j                  t        j                  j                  f      sJ d| dt        |       d       | _        | _        |i n| _        t        j                  j!                   j"                         g  _        g  _        t         j                  t        j                  j                        ry d j                  j)                         v ry  j                  j*                  }t        j,                  j.                  j1                   j                        r- j&                  j3                  |d   j5                                y |j6                  rt9        |      st;        d|        j                   j<                   j>                        \  }	}d
 fd	}
t        j,                  j.                  jA                  ||	|      D ]  \  }} |
||        y )Nr  Fz#Fails to create FallbackKernel for r   z not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                    t         j                  t        j                        rt        |t        t
        f      sJ t        j                   j                        rt        |t
        t        f      rJ |y  j                  y d fd}t        j                   j                        r||D ]
  } ||        y y t        j                   j                        sJ  ||       y )Nc                   j                   j                  | j                                j                  j                  r?j
                  j                  t        t        | j                               |              y y r  )	alias_namesr  r  
alias_infois_writer  rs  r  r   )r   infor0  s    ru   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias  sZ      ''

5??++))00&z'H!TR ,rt   r  )
rx   r   r   ListTypery   rz   library_utilsis_tensor_like_typeri  is_tensorlist_like_type)rk  r  rl  optional_tensor_argr0  s   `   ru   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutations  s    $))U^^4!#e}55500; &cE4=999{& 44TYY??/2 7+!"567 # %88CCC#rt   r  )!rB  r  rz   use_runtime_dispatchr  rx   r   r  r  r  r   r  r3  r   rV   r   warn_fallbackr  rh  r  r   r  _libraryr  mutates_and_returns_first_argr  r  
is_mutabler   r_  rW  r  
zip_schema)r0  r  r  rD  nontensor_argsr3  r   r  schemar   rr  rk  r  rD  s   `            ru   r  zFallbackKernel.__init__'  s    	+.!	 	 	
 %*!!2

%%

..
 	X 14<.W	X 
 ","Nb	d556 '))+d&&

(F(FG !1!1!6!6!88
 !!)) >>==d>N>NO&&{1~'>'>'@A%;F%C%9&B  **4;;8J8JKf	: --88vN 	4ID#(s3	4rt   c           	     n    |j                  | j                         | j                  t        | dd             S Nr  )(codegen_unbacked_symbol_defs_for_outputsr  rn  r   r  s     ru   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defs  s0    ??MMOT\\749Ld+S
 	
rt   c                    t        | dd       x}rBt        t        j                  j                  j
                  |      }|J |j                         S t               S r|  r   r(   rV   r   r   r   r#  r,   r0  r  resolveds      ru   r%  z'FallbackKernel.get_unbacked_symbol_defs  Z     '.A4 HHH0  **,=H '''==?"<rt   c                ,   t         j                   G d d             }| j                  D cg c]  } ||j                                }}| j	                  || j
                        \  }}t        j                  j                  rt        | j                  t        j                  j                        r| j                  ||      }t        | j                  j                   j"                  |      D cg c]9  \  }}t        j                  j$                  j'                  ||j(                        ; }}}n6|D cg c]+  }t        j                  j$                  j'                  |      - }}| j*                  j-                  |       |S c c}w c c}}w c c}w )Nc                       e Zd ZU ded<   ddZy))FallbackKernel.codegen_args.<locals>.Shimr   refc                    | j                   S r   )r  r6  s    ru   rw  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__  s    xxrt   Nr  )ro   rp   rq   rr   rw  rs   rt   ru   Shimr    s    H rt   r  )r  	dataclassrW  r  r3  r  rV   r   r  rx   r  r   r  r  rq  r   r  r  r  rs  r  r   r  )r0  r  r   rD  r   r   params          ru   rz  zFallbackKernel.codegen_args  sH   				  	  
	  =AKKHqtA//12HH**;8J8JKf77:d.>.>

@U@U#V..tV<D !$D$4$4$<$<$F$F ME1 $$33AuGD 
 EIIqAGG((77:IDI 	6" I
 Js   F1>F70Fc                   | r3| D cg c]#  }|j                         s|j                         % }}|d   S t        |t        j                        r|j                  S t        |t
        t        f      r\t        d |D              }|D cg c]  }|s|	 }}t        |      dk(  r|d   S |D ]  }t        |j                        s|c S  |d   S y c c}w c c}w )Nr   c              3  H   K   | ]  }t         j                  d |        y wr   )rb  find_devicer  s     ru   r   z-FallbackKernel.find_device.<locals>.<genexpr>  s"      $89**43$r-  r1   )r   rx   r   r?  r   ry   rz   r,   r   rN   r   )rD  rJ  r  devices
device_setr   s         ru   r  zFallbackKernel.find_device  s    3>SC#..BRs~~'SGS1:nell3!(((ntUm4# $=K$ J -7A&&vAGA7|q qz!! "&++&!M" 1:! T Bs   CCCCc                    t        | j                  t        j                  j                        ryt        | j                        j                         S r  )rx   r  r   r  r  r$   rw  r6  s    ru   r  zFallbackKernel.has_side_effects  s9    d&&

(F(FGt//0;;==rt   c                    | j                   S r   )rh  r6  s    ru   r  z+FallbackKernel.get_inputs_that_alias_output  rH  rt   c                N    t        | j                        dk  sJ | j                  S rW  )r   r  r6  s    ru   r  z!FallbackKernel.get_mutation_names  s'    4&&'1,,,"""rt   c           	        t         j                  d| j                         | j                         t	        | t
              sJ | j                  | j                  | j                        \  }}| j                  ||      }| j                  D cg c]  } | j                  |fi | }}| j                  }t        j                  j                  sg ||S t        d d       }|j!                  |||      }d }t	        |t"        j$                  j&                  j(                        r#|j+                  |d   |d         j,                  }	n|j.                  j,                  }	t1        |	      dk(  r>| j2                  r| j2                  n| j4                  }
|	d   j6                  } |||
      g}n9t9        |	| j2                        D cg c]  \  }} ||j6                  |       }}}t;        | j                         t=        j>                  | j                  jA                         ||i             }t        j                  jB                  jE                  |       g ||S c c}w c c}}w )Nz4Extern kernel node added for node %s with target %s.c           	     p   t        | t        j                        ro|}t        |t        t        f      rt        |      dk(  sJ |d   }t        j                  j                  t        j                  |j                                     S t        | t        j                        rxt        | j                         t        j                        rPt        j                  j                  |D cg c]&  }t        j                  |j                               ( c}      S t        dt        |              c c}w )Nr1   r   r:  )	as_tensor)
as_tensorszUnsupported return type )rx   r   
TensorTypery   rz   r   export_schemaArgumentrR  TensorArgumentr  rm  getElementTypeRuntimeErrorr   )return_typeoutputr   s      ru   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output  s   +u'7'78ftUm4v;!+++ )C$--44+::O 5   K8Z**,e.>.>> %--44 $*  &44#,,.I  5   #%=d;>O=P#QRR s   )+D3r   r1   )r|  rW  rn  metadata)r   r   )#r  r  r  r  rx   rb  r3  rW  r  rq  r  r|  rV   r   aot_moder   serialize_inputsr   _higher_order_ops	torchbindCallTorchBindrz  returnsr  r   rn  r  r  r   r`  r  rW   r   extern_kernel_nodesr  )r0  r   r   r   ordered_kwargsr|  
serializernamed_argumentsr  r  rn  r  output_argumentsreturn_schemar  r   s                   ru   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_node  s-   		BMMO	
 $///**4;;8J8JKf**48 99
 "D!!#00
 
 !!ww+T+N++*46
$55fdFK	S. fe55??MMNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[' JK .1$,,-G )M6 %]%<%<fE   
  ##'',,.&(	
 	
##**40''''G
d s   IIc                .     j                   }|j                  dk(  rqt        |t        j                  j
                        sJ t        j                  j                  rddl	m
} t        |      |vrtt        j                  d|       d _        nV|j                  dk(  r&t        |t        j                  j
                        s#J t        j                  j                  rd _        d
 fd}dd j                          j                  r |        ng  j!                          j#                         }t        j                  j                  r^t        |t        j                  j
                        r:t%        fd	t'        ||j(                  j*                        D              r |        nUt        j                  j,                  j/                   |       t         j0                  t2              r j5                          j7                         y )Nr  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc            	         d } j                         }j                  j                         j                  j                  | j
                  |j                  rj                         y j                         y r   )r  ,generate_fallback_kernel_with_runtime_lookupr  r  r  r  rn  r  )r   exported_argsr0  r  s     ru   do_runtime_dispatchz3FallbackKernel.codegen.<locals>.do_runtime_dispatchF  sk    D ::<M@@''$$   $	 372G2G	rt   c                    t        | t        j                        xsD t        | t        j                        xr( t        | j	                         t        j                        S r   )rx   r   
NumberTypeOptionalTyper  )r   s    ru   	is_numberz)FallbackKernel.codegen.<locals>.is_numberU  sK    a!1!12 1e001 Eq//153C3CDrt   c              3  T   K   | ]  \  }}d |v xr  |j                          ! yw)zc10::complexN)r  )r   arg_strop_argr  s      ru   r   z)FallbackKernel.codegen.<locals>.<genexpr>c  s5      ' #g-M)F<L<L2MMr  r  )r   ztorch.JitTyper   rl   )r  r  rx   r   r  r  rV   r   r  torchgen.aoti.fallback_opsr  r   r  rC  rs  r  rz  r  r  r   r  r  r  generate_fallback_kernelr  rm  r  r~  )r0  r  r  r  r  r   r  s   ``    @ru   r  zFallbackKernel.codegen/  s   !!v%fejj&;&;<<<ww""Lv;&;; KKa 15D--fejj&;&;<<<WW  (,D%		 	W%$$!AT&&(A4+>+>+@AD##vuzz'<'<= +.tV^^5M5M+N  $%$$==dDIdkk62--g6))'2rt   c           	         t        | j                  | j                  t        | j	                               t        | j                                     S r   )r  r   r   rH   r   r   )r  s    ru   tensor_to_layoutzFallbackKernel.tensor_to_layoutu  s9    MMLL%fkkm4%fmmo6	
 	
rt   c                    t         j                  f}||vrt        j                  j                  n	t               }|5    j                  |g|i |\  }}}}	}
d d d         j                        }|  t        |      ||	
      n!|sJ d         t        |      ||	
       fd |g       }t        |t        t        t        f      r	|_        |S |g_        |S # 1 sw Y   xY w)NrJ  rc  z"Not sure where to find device infoc                N    t         t        t        f      r. t                fdt	        t                     D              S t         t              r: j                         D ci c]  \  }}| |t               |fgz           c}}S t         t        j                        rt        j                               S t         t              r S t         t        j                        r j                  j                  S  J dt                d       y c c}}w )Nc              3  T   K   | ]  } |   t              |fgz          ! y wr   )r   )r   r   generate_outputr  r  s     ru   r   zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>  s5      $ $F1Iw4<:K9L/LM$r  zFallbackKernel output type z is not supported)rx   ry   rz   r   r   r   r{   r  r   r?  MultiOutputr  r   SymIntr   rC  )r  r  r   rN  rS  r  packeds   ``  ru   r  z.FallbackKernel.create.<locals>.generate_output  s   &4-0#tF| $"3v;/$   FD) %+LLN S g$v,9L8M.MNN  FELL1"((0 
 FC(FELL1{{'''~ 1$v,?PQ~ %s   +#D!)r  *_fused_moving_avg_obs_fq_helper_functionalrV   r   r>  r   rM  r  r  r  rx   ry   rz   r{   rn  )rS  r  r   r   fake_incorrect_kernelscontextrJ  rD  rE  r3  r  r   rn  r  r  s   `            @@ru   rR  zFallbackKernel.create~  s'   "&"Q"Q!S!'/E!EAGG;= 	  	< #""6;D;F;!	< n=!&)"3F ???6!0"3F	6 "."5geT23$FN  &YFN	< 	<s   C00C9c                     t         |          S r   )rB  r  rC  s    ru   r  zFallbackKernel.apply_constraint  s    w'))rt   r   r  r,  )r  torch.Tensor)ro   rp   rq   r  r~  r%  rz  r  r  r  r  r  r  r  r  rx  rR  r  rz  r{  s   @ru   rb  rb  &  s     j4 j4 
j4X

 .  (>
 #M(^D3L 
 
 D DL* *rt   rb  c                  <     e Zd ZdZddZddZdd	 d	 fdZ xZS )
ComplexViewz9View a complex number as two dtyped numbers or vice versac                     yr  rs   r6  s    ru   r  zComplexView.should_allocate  r  rt   c                >    | j                   d   j                         gS rx  r  r6  s    ru   r  z(ComplexView.get_inputs_that_alias_output  s    A'')**rt   Nrc  c               0    t         |   ||||||       y )Nrc  )rB  r  )r0  r  r  rD  ry  r3  r  rD  s          ru   r  zComplexView.__init__  s)     	/ 	 	
rt   r  r  r  )ro   rp   rq   r  r  r  r  rz  r{  s   @ru   r  r    s)    C+ 
 

 
rt   r  c                       e Zd ZU ded<   ddZy)r  r  r   c                    | j                   S r   rJ  r6  s    ru   r   zMultiOutputLayout.get_device  rK  rt   Nr  )ro   rp   rq   rr   r   rs   rt   ru   r  r    s    rt   r  c                  R     e Zd Zd ZddZ	 	 	 	 	 	 d fdZd	dZd
dZddZ xZ	S )r  c                   t        |      dkD  r|d   \  }}t        |t              r| j                  | d| d|dd        S t        |t              rWt
        j                  j                  j                  || j                         t        |            }| j                  ||dd        S t        |t              r| j                  | d| d|dd        S t        d|      |S )Nr   []r1   z['z']znon supported index type: )r   r   ry   codegen_list_tuple_accessrz   rV   r   r  codegen_tuple_accessr  r   r{   r  )r0  basenamer  ityper   tuple_accesss         ru   r  z%MultiOutput.codegen_list_tuple_access  s    w<!qzHE1%&55
!A3a6H'RSRT+VVE5) ww33HHdmmos1v  55lGABKPPE4(55
"QCr6JGTUTVKXX$%A5IIOrt   c                    |j                  | j                         | j                  | j                  d   j                         | j                               | j                  |       y rx  )codegen_multi_outputr  r  rW  r  r  r  s     ru   r  zMultiOutput.codegen  sN    $$MMO**4;;q>+B+B+DdllS	
 	!!'*rt   c                    t         |   d ||gd       t        j                  j	                  |       | _        t        j                  j                  |        || _        y r  )rB  r  rV   r   rY  r   rZ  r  )r0  r  r  r  rD  s       ru   r  zMultiOutput.__init__  sI     	vw3GG++D1		""4(rt   c                <    | j                   d   j                         S rx  )rW  r  r6  s    ru   r  z$MultiOutput.get_unbacked_symbol_uses  s    {{1~6688rt   c                p    t        | j                        dk(  rt        | j                  d   t              ryy)Nr1   r   TF)r   rW  rx   r  r6  s    ru   r  zMultiOutput.should_allocate  s,    t{{q t{{1~'89rt   c                    | j                   D cg c]>  }t        |t              r,t        |j	                               dkD  r|j                         @ c}S c c}w rx  )rW  rx   rb  r   r  r  )r0  rc  s     ru   r  z(MultiOutput.get_inputs_that_alias_output"  sN     {{
#~.C4467!; LLN
 	
 
s   AAr  )r  r   r  zlist[tuple[Any, ...]]r   r   r,  r  r  )
ro   rp   rq   r  r  r  r  r  r  rz  r{  s   @ru   r  r    s=    $+		 '		
 
	9
rt   r  c                     e Zd ZU dZded<   d+dZd,dZd-dZd.dZd/dZ	d0d	Z
d1d2dZd3dZd4dZd5dZd4dZ	 d6	 	 	 	 	 d7dZd8dZd9dZ	 d6	 	 	 	 	 d:dZd;dZd<dZd=dZd>dZd?dZd@dZd+dZd+dZdAdZdBdZd0dZdBdZd?d Z dCd!Z!dDd"Z"dEd#Z#d1dFd$Z$e%dGd%       Z&dHd&Z'dGd'Z(dId(Z)e%d)        Z*d0d*Z+e+Z,y
)Jr  zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    rc   r  c                6    | j                   j                         S r   rU  r6  s    ru   r  z!MutableBox.has_exceeded_max_reads5  rV  rt   c                6    | j                   j                         S r   rJ  r6  s    ru   r   zMutableBox.get_device8  rH  rt   c                6    | j                   j                         S r   r  r6  s    ru   r  zMutableBox.make_loader;      yy$$&&rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.make_indexer>  r\  rt   c                6    | j                   j                         S r   )r  r   r6  s    ru   r   zMutableBox.get_strideA  rH  rt   c                6    | j                   j                         S r   rM  r6  s    ru   r  zMutableBox.get_nameD  rN  rt   Nc                8    | j                   j                  |      S r   )r  r  r  s     ru   r  zMutableBox.has_large_inner_fnG  s    yy++I66rt   c                8    | j                   j                  |      S r   rR  r  s     ru   r  zMutableBox.mark_reuseJ  rS  rt   c                6    | j                   j                         S r   r[  r6  s    ru   r  zMutableBox.realize_hintM  r\  rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.unwrap_viewP  r  rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.freeze_layoutS  s    yy&&((rt   c                :    | j                   j                  ||      S r   )r  r  r  s      ru   r  z*MutableBox.freeze_layout_with_stride_orderV  s     yy88NNrt   c                8    | j                   j                  |      S r   )r  r  r  s     ru   r  z(MutableBox.freeze_layout_with_fill_order[  s    yy66u==rt   c                8    | j                   j                  |      S r   )r  r  r  s     ru   r  z(MutableBox.freeze_layout_with_same_order^  s    yy66v>>rt   c                :    | j                   j                  ||      S r   )r  r  r  s      ru   r  z+MutableBox.freeze_layout_with_exact_stridesa  s     yy99-WWrt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.get_read_writesf      yy((**rt   c                6    | j                   j                         S r   r  r6  s    ru   r@  zMutableBox.get_readsi  rb  rt   c                6    | j                   j                         S r   r  r6  s    ru   r  zMutableBox.num_readsl  rb  rt   c                6    | j                   j                         S r   r^  r6  s    ru   r  zMutableBox.get_storage_numelo  r_  rt   c                6    | j                   j                         S r   rI  r6  s    ru   r  zMutableBox.get_reduction_typer  rG  rt   c                6    | j                   j                         S r   rF  r6  s    ru   r  zMutableBox.get_reduction_sizeu  rG  rt   c                6    | j                   j                         S r   ra  r6  s    ru   r  zMutableBox.is_externx  rb  rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.is_no_op{  rN  rt   c                8    | j                   j                  |      S r   rN  r  s     ru   r  zMutableBox.constant_to_device~  s    yy++F33rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.get_mutation_names  rG  rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  zMutableBox.get_operation_name  rG  rt   c                6    | j                   j                         S r   )r  r  r6  s    ru   r  z'MutableBox.get_inputs_that_alias_output  s    yy5577rt   c                6    | j                   j                         S r   rX  r6  s    ru   r  zMutableBox.realize  rY  rt   c                6    | j                   j                         S r   r:  r6  s    ru   r  z#MutableBox.get_unbacked_symbol_uses  r;  rt   c                6    | j                   j                         S r   rg  r6  s    ru   rA  zMutableBox.get_read_names  rh  rt   c                6    | j                   j                         S r   )r  rJ  r6  s    ru   rJ  zMutableBox.get_defining_op  r  rt   c                8    | j                   j                  |      S r   )r  r  r  s     ru   r  zMutableBox.codegen_reference  s    yy**622rt   c                6    | j                   j                         S r   r  ri  r6  s    ru   r  zMutableBox.layout  s     yy((**rt   c                6    | j                   j                         S r   rG  r6  s    ru   r   zMutableBox.get_layout  rH  rt   c                6    | j                   j                         S r   r	  r6  s    ru   ri  zMutableBox.get_output_spec  r  rt   c                6    | j                   j                         S r   r  r6  s    ru   r   zMutableBox.get_size  rN  rt   c                .    | j                   j                  S r   )r  r   r6  s    ru   r   zMutableBox.dtype  s    yyrt   c                t   t        | j                  t              rQt        |       j                   dt        | j                        j                   d}d}| j                  j                  }n&t        |       j                   d}| j                  }d}|t        t        |            |g}dj                  |      S )NrS  z))r   
)rx   r  r  r   ro   rU  r   rV  )r0  line0endlrA  rW  s        ru   rH  zMutableBox.__str__  s    dii,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyrt   r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r,  r  r  r  r  r  r  )-ro   rp   rq   r  rr   r  r   r  r  r   r  r  r  r  r  r  r  r  r  r  r  r@  r  r  r  r  r  r  r  r  r  r  r  r  rA  rJ  r  r  r  r   ri  r   r   rH  rw  rs   rt   ru   r  r  -  s5    L2&'(&$7+(') 7<OO/3O	O
>? DIX+X<@X	X
+%%-..%$4..8#4*+3 + +&+$   " Hrt   r  c                      e Zd Zed        Zy)rb   c                N    t        | t              r| S t        t        |             S r   )rx   r   rb   r1  )r  s    ru   rR  zTensorBox.create  s"    d12KD)**rt   N)ro   rp   rq   r  rR  rs   rt   ru   rb   rb     s    + +rt   c                  D    e Zd Zd Zd Zd
dZddZddZd ZddZ	d Z
y	)r1  c                    t        | j                  t        t        f      r4| j                  j	                         t
        j                  j                  v S yr  )rx   r  r  r  r  rV   r   graph_inputsr6  s    ru   r  zStorageBox.is_input_buffer  s:    dii+!?@99%%'177+?+???rt   c                    t        | j                  t              xr4 | j                  j                         t        j
                  j                  v S r   )rx   r  r  r  rV   r   r8  r6  s    ru   rd  zStorageBox.is_module_buffer  s9    tyy>3 :		""$(9(99	
rt   c           	        t        | j                  t        t        t        t
        t        f      r| j                  j                         S t        | j                  t        t        t        t        f      sJ t        | j                               | j                  j                         }| j                  j                         }t        d t        | j                  j!                         | j                  j#                         | j                  j%                               | j                        | _        t&        j(                  j+                  | j                        | j                  _        t&        j(                  j/                  | j                         | j0                  | j                  _        || j                  _        || j                  _        | j                  j,                  S )Nr  r  )rx   r  r  rU  r  r  rS  r  r  r  r  r  r   rG  rD  r   r   r   r   rV   r   rY  r   rZ  r&  r*  r(  )r0  r*  r(  s      ru   r  zStorageBox.realize  sO   II	
 99%%''$))iD$%GH 	
$IIK
 	
H ii//1II++-	"!yy++-ii))+YY'')
 
	 00;			""499- LL		 +		'		yy~~rt   c                    t        | j                  t        t        f      r9| j                  j	                         j
                  dkD  r| j                          yyy)zL
        Called on buffers we expect to be forced to realize later.
        r1   N)rx   r  r  r  rd  nontrivial_read_countr  r6  s    ru   r  zStorageBox.realize_hint  sF    
 tyy9i"89		**,BBQFLLN G :rt   c                    t        | j                  t              xr3 | j                         t        j
                  kD  xs | j                         S r   )rx   r  r  r  r2   realize_acc_reads_thresholdr  r6  s    ru   r  z!StorageBox.has_exceeded_max_reads  s@    $))Y/ 
NNvAAA )&&(	
rt   c                F   |dkD  rt        | j                  t        t        f      r{t	        | j                        r3| j                  j                         ddg}t        fd|D              ry| j                         t        j                  kD  xs | j                         S y)zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        r1   expsigmoidc              3  :   K   | ]  }|j                   v   y wr   )used_ops)r   r   opcounts     ru   r   z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>  s     @qG,,,@s   TF)rx   r  r  r  r   rd  r  r  r2   realize_reads_thresholdr  )r0  r  	heavy_opsr 	  s      @ru   should_realize_on_reusez"StorageBox.should_realize_on_reuse  s    
 19DII	9/EFdii ))446"I.	@i@@ 6#A#AA -**, rt   c                H    | j                  |      r| j                          y y r   )r#	  r  r  s     ru   r  zStorageBox.mark_reuse  s    ''.LLN /rt   c                6    | j                   j                         S r   r  r6  s    ru   r  zStorageBox.num_reads  rb  rt   Nr  r  r  r  )ro   rp   rq   r  rd  r  r  r  r#	  r  r  rs   rt   ru   r1  r1    s+    

B
$%rt   r1  c                  0    e Zd ZU ded<   ded<   dZded<   y)Subgraphr   r   ztorch.fx.GraphModulegraph_moduleNzOptional[GraphLowering]r   )ro   rp   rq   rr   r   rs   rt   ru   r'	  r'	    s    
I&&%)E")rt   r'	  c                    | D cg c]$  }t        |t              r|j                         n|& } }t        t	        d | D                    t        |       k  S c c}w )Nc              3  2   K   | ]  }t        |        y wr   )r  )r   r3  s     ru   r   z'_has_aliased_buffers.<locals>.<genexpr>+  s     ;"V*;r7  )rx   r  r  r   r,   )buffersr3  s     ru   _has_aliased_buffersr,	  %  s^      !+6? COG 
 z;7;;<s7|KKs   )Ac                  v     e Zd ZU dZded<   dZded<   dZded<   	 	 	 	 	 	 	 	 d fdZedd	       Z	dd
Z
 xZS )InvokeSubgraphNOptional[Subgraph]subgraphzOptional[list[TensorBox]]operandsOptional[list[MultiOutput]]rn  c                    t         |   d ||       || _        t        j                  j                  |       | _        t        j                  j                  |        y r  )rB  r  r0	  rV   r   rY  r   rZ  )r0  r0	  r1	  r  rD  s       ru   r  zInvokeSubgraph.__init__4  sQ     	 	 	

 !GG++D1		""4(rt   c                   t         j                  j                  j                  d   }|D cg c]  }|j                  d    }}|D cg c]  }| j                  |       }}d }g }t        |      D ]a  \  }}	t        |	t              r|j                  |	       ( |||   j                               }
|j                  | j                  |	|
             c |}|j                  |t         j                  j                  |j                  ||j                        |_        t        j                  |j                        5   |j                  j                   |  d d d        |j                  j"                  }d }|D ]$  }	t        |	t              r|	j%                         } n |J t'        ||t)        |            dfd}t        |      D cg c]  \  }} |||       }}}|_        |S c c}w c c}w # 1 sw Y   xY wc c}}w )	Nr   rN  c                    | D cg c]4  }t        |t        j                        r|j                  j                  n|6 c}S c c}w r   )rx   r   r  r   rC  )r   r   s     ru   handle_sym_exprz.InvokeSubgraph.create.<locals>.handle_sym_exprK  s0    OUV!:a#>AFFKKAEVVVs   9Agmexample_inputssubgraph_namerJ  )r0	  r1	  r  c           
        t        | t        t        f      r| S t        t	        | j                         | j                         | j                         | j                         | j                         j                        t        |fg      S )Nra  )rx   r   r  r  r  r   r   r   r   r   r  ry   )r  indinvoke_subgraphs     ru   create_outputz,InvokeSubgraph.create.<locals>.create_outputs  s|    &#8:N"OP"%002$..0#__.%002%00299 $C[M
 
rt   )r  rc   r<	  r   )rV   r   rr  r   rH  r  r   rx   r   r  r   r  make_subgraphr(	  r   set_graph_handlerrungraph_outputsr   r.	  r  rn  )rS  r0	  r1	  fx_operandsr   fake_operandsr6	  new_operandsr   operandexample_stridern  r   r>	  r   r  r=	  s                   @ru   rR  zInvokeSubgraph.create@  s    gg**//30;<1<<
 3;;QC%%a(;;	W %h/ 	XLC'#89##G,!0s1C1J1J1L!M##C$=$=g~$VW	X  >>!WW22((,&mm 3 HN
 $$X^^4 3"""M23 ....  	Gg'<= ++-	 !!!($F3
	  >Gw=OP	6=+PP")C =
 <,3 3H Qs   G1	G6?G;H;Hc                &    |j                  |        y r   )codegen_invoke_subgraphr  s     ru   r  zInvokeSubgraph.codegen  r  rt   )r0	  r'	  r1	  zlist[TensorBox]r  r  r   r   )r0	  r'	  r  )ro   rp   rq   r0	  rr   r1	  rn  r  rx  rR  r  rz  r{  s   @ru   r.	  r.	  .  se    #'H '*.H'.+/G(/
) 
),;
)EV
)	
) D DL.rt   r.	  c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZd	ed
<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	e
	 	 	 	 	 	 	 	 dd       ZddZddZ xZS )ConditionalNr  	predicate7Optional[list[Union[TensorBox, ShapeAsConstantBuffer]]]r1	  r/	  true_subgraphfalse_subgraphr2	  rn  c                    || _         || _        || _        || _        t	        |g|z         \  }}t
        	|   d |||       ||| _        t        j                  j                  |       | _        t        j                  j                  |        y N)r   r  rW  r  )rL	  r1	  rN	  rO	  _split_by_sym_typerB  r  r  rV   r   rY  r   rZ  )
r0  rL	  r1	  rN	  rO	  r  r  sym_argsrD  rD  s
            ru   r  zConditional.__init__  s     # *, 2I;3I J+"	 	 	
 (%6D"GG++D1		""4(rt   c                   | j                  |      }|D cg c]  }| j                  |       }}t        j                  j                  j                  d   }|D cg c]  }|j
                  d    }}||fD ]  }|j                  t        j                  j                  |j                  ||j                        |_        t        j                  |j                        5   |j                  j                  |  d d d         |j                  j                  }	|j                  j                  }
d|	fd|
ffD ]!  \  }}t        |	      st        d| d|        t        |	      t        |
      k(  s	J |	|
f       t        t!        |	|
            D ]  \  }\  }}|j#                         |j#                         k(  s
J |||f       |j%                         |j%                         k(  s
J |||f       |j'                         j(                  |j'                         j(                  k(  rJ |||f        t+        d |g|z   D              }t-        t        j                  j.                  j0                  t        j                  j                  j
                  j3                  d	d             }|J d
       t5        ||||t7        |      |      }dd}t        t!        |	t        j                  j                  j
                  d               D cg c]  \  }\  }}t9        t;        |j#                         |j%                         |j=                         D cg c]
  } ||       c}|j?                         D cg c]
  } ||       c}|j'                         j(                        |t@        |fg       }}}}}||_!        |S c c}w c c}w # 1 sw Y   axY wc c}w c c}w c c}}}}w )Nr   rN  r7	  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  \   K   | ]$  }t        |t              s|j                          & y wr   )rx   r   r   )r   os     ru   r   z%Conditional.create.<locals>.<genexpr>  s)      
a!67 LLN
rK  r  zcannot determine devicerJ  )rL	  r1	  rN	  rO	  r  r  c                R    t        | t              r| S | j                  j                  S r   )rx   r   r   rC  )r   s    ru   _maybe_exprz'Conditional.create.<locals>._maybe_expr  s    !S!66;;rt   ra  )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.expr])"r  rV   r   rr  r   rH  r?	  r(	  r   r@	  rA	  rB	  r,	  r  r   r   r   r   r   r   r  r)  r(   r   r   rN  rK	  r  r  r  r   r   ry   rn  )rS  rL	  rU	  rV	  r1	  r   rC	  rD	  r0	  true_outputsfalse_outputsr   rn  r   tofor   r  conditionalrZ	  r  merged_outputri  s                          ru   rR  zConditional.create  s    %%i0	2:;QC%%a(;;gg**//30;<1<< (+ 		7H~~%!"!6!6,,#0"*-- "7 "
 ((8 7&HNN&&67 7		7 }}22 44(,7*m9TU 	MD'#L1$**./TU\T]_ 	 < C$66U}8UU6$S}%EF 	QKAxB==?bmmo5B2r{B5<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC	Q
  
[8+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
	& /8L!''"6"6";";E"BC/
 
 +*FM !,,. **,4A4F4F4HIb+b/I6C6J6J6LMKOM!,,.55 

 
& &Y <<7 7b JM
s<   OO	!OAO%
O#O%
7O 0O%
O	
O%
c           	         |j                  |        |j                  | j                         | j                  t	        | di              y r|  )codegen_conditionalr}  r  rn  r   r  s     ru   r  zConditional.codegen  s9    ##D)88MMOT\\749Lb+Q	
rt   c                    t        | dd       x}rBt        t        j                  j                  j
                  |      }|J |j                         S t               S r|  r  r  s      ru   r%  z$Conditional.get_unbacked_symbol_defs  r  rt   )rL	  rc   r1	  -list[Union[TensorBox, ShapeAsConstantBuffer]]rN	  r'	  rO	  r'	  r  r  r  z,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   )rL	  rb   rU	  r'	  rV	  r'	  r1	  rd	  r  r,  )ro   rp   rq   rL	  rr   r1	  rN	  rO	  rn  r  rx  rR  r  r%  rz  r{  s   @ru   rK	  rK	    s    "&I&HLHEL(,M%,)-N&-+/G(/)) @)  	)
 !) ") H) 
)8 TT T 	T
 @T Tl
 rt   rK	  c                    g }g }| D ]?  }t        |t              r|j                  |j                         /|j                  |       A ||fS r   )rx   r   r  rC  )r   non_sym_argsrS	  r  s       ru   rR	  rR	    sS     LH %c01OOCHH%$	% \!!rt   c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZded	<   	 	 	 	 	 	 	 	 	 	 	 	 d fd
Z	e
	 	 	 	 	 	 	 	 dd       ZddZ xZS )	WhileLoopNrM	  carried_inputsadditional_inputsr/	  cond_subgraphbody_subgraphr2	  rn  c                   || _         || _        || _        || _        t	        ||z         \  }}t
        |   d |||       t        j                  j                  |       | _
        t        j                  j                  |        y rQ	  )ri	  rj	  rk	  rl	  rR	  rB  r  rV   r   rY  r   rZ  )	r0  ri	  rj	  rk	  rl	  r  rS	  rD  rD  s	           ru   r  zWhileLoop.__init__-  s     -!2** 2>DU3U V+"	 	 	
 GG++D1		""4(rt   c                &	   |D cg c]  }| j                  |       }}|D cg c]  }| j                  |       }}||z   }t        j                  j                  j                  d   t        j                  j                  j                  d   z   }|D cg c]  }|j
                  d    }}||fD ]  }	|	j                  t        j                  j                  |	j                  ||	j                        |	_        t        j                  |	j                        5   |	j                  j                  |  d d d         |j                  j                  }
|j                  j                  }t        |      rt        d|       t        |
      dk(  sJ |
       |
d   }t        |t               sK|j#                         t$        j&                  k(  sJ |       t        |j)                               dk(  sJ |       t        |      dkD  sJ d       |d   j+                         }t        |      t        |      k(  s	J ||f       t-        t/        ||            D ]  \  }\  }}	 	 	 	 	 	 dd	} ||j)                         |j)                                 ||j1                         |j1                                |j+                         |j+                         k(  sJ ||||f       |j#                         |j#                         k(  s
J |||f       |j3                         j4                  |j3                         j4                  k(  rJ |||f        t7        ||||t9        |
            }t-        |      D cg c]w  \  }}t;        t=        |j+                         |j#                         |j)                         |j1                         |j3                         j4                        |t>        |fg      y }}}t/        ||      D ]g  \  }}|jA                         t        j                  jB                  v s1t        j                  jD                  jG                  |jA                                i ||_$        |S c c}w c c}w c c}w # 1 sw Y   xY wc c}}w )Nr   rN  r7	  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r1   r   z9torch.while_loop is assumed to have at least one operand.c                    t        | |      D ]/  \  }}t        j                  j                  j	                  ||       1 y r   )r   rV   r   r   r  )	lhs_exprs	rhs_exprslhsrhss       ru   _guard_list_equalsz,WhileLoop.create.<locals>._guard_list_equalsy  s8     !$Iy 9 <HCGG$$11#s;<rt   rJ  )ri	  rj	  rk	  rl	  r  ra  )rq	  list[Union[int, sympy.expr]]rr	  rv	  r   r   )%r  rV   r   rr  r   rH  r?	  r(	  r   r@	  rA	  rB	  r,	  r  r   rx   r   r   r   rl   r   r   r   r   r   r   r  rh	  r  r  r  ry   r  r	  r  r  rn  )rS  cond_fnbody_fnri	  rj	  r   
all_inputsfx_all_inputsfake_all_inputsr0	  cond_outputsbody_outputsr.  r   r   opboru	  
while_loopr  rn  rc  r   s                          ru   rR  zWhileLoop.createE  s@    9GG1#++A.GG;LMaS..q1MM#&77
,,11"58L8L8Q8QRT8UU2?@Q166%=@@ '* 		9H~~%!"!6!6,,#0"*-- "7 "
 ((8 9&HNN&&89 9		9 }}22}}22- XXdWeg  < A%3|3%O!23;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+ >"c,&77W.,9WW7$S%FG 	QKAxB<7<7< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC	Q" )/!!$F3

* '|4
 6 !,,. **,*!,,.!,,.55 

 
 NG4 	@HC||~!5!55 ++//?	@ %
{ HM A9 9j
s#   Q1Q6Q;R 3A<R R
	c                &    |j                  |        y r   )codegen_while_loopr  s     ru   r  zWhileLoop.codegen  s    ""4(rt   )ri	  rd	  rj	  rd	  rk	  r'	  rl	  r'	  r  r  r   r   )rw	  r'	  rx	  r'	  ri	  rd	  rj	  rd	  r  )ro   rp   rq   ri	  rr   rj	  rk	  rl	  rn  r  rx  rR  r  rz  r{  s   @ru   rh	  rh	  %  s    NRNKRQUNU(,M%,(,M%,+/G(/)E) I)  	)
  ) ") 
)0 dd d F	d
 Id dL)rt   rh	  c                  @     e Zd Z	 ddd	 d fdZd fdZd	dZ xZS )
r   Nrc  c          	     j   t         |   |||||d |       ddlm} |D 	cg c]   }	t	        |	t
              r|	j                  n|	" }
}	 ||g ||
|      }|J || _        t        j                  j                  j                  |d       | _        | t        j                  j                  |<   y c c}	w )N)r   r  r   )get_effect_key)rB  r  torch._higher_order_ops.effectsr	  rx   r  r   effect_typerV   r   effectful_opsrN  prev_effect_buffer)r0  r  r  rD  ry  r3  r   r  r	  r  uncovered_argsr	  rD  s               ru   r  zEffectfulKernel.__init__  s     	/ 	 	
 	C GR
ABz!_5AGG1<
 
 %V-O~-O-OQWX&&&&"#''"7"7";";K"N-1k*
s   %B0c                    t         |          }| j                  F|j                  j	                  t        j                  | j                  j                                      |S r   )rB  r  r	  r  r  r3   r  r  )r0  r  rD  s     ru   r  zEffectfulKernel.get_read_writes  sU    g-/"".!!$$T%<%<%E%E%GH rt   c                     yrL  rs   r6  s    ru   r  z EffectfulKernel.has_side_effects  rK  rt   r   r  r  r  )ro   rp   rq   r  r  r  rz  r{  s   @ru   r   r     s,     2 2 
2@rt   r   c                      e Zd Zy)rX  Nr  rs   rt   ru   rX  rX    s    rt   rX  c                  V    e Zd ZU ddlmZ ded<   ded<   d Zddd	Zdd
ZddZ	ddZ
y)r  r   )FakeScriptObjectr   r   +Union[FakeScriptObject, torch.ScriptObject]r   c                    | j                   S r   r:  r6  s    ru   r  zTorchBindObject.get_name  r  rt   Nc                    | j                   S r   r:  r  s     ru   r  z!TorchBindObject.codegen_reference  r  rt   c                    | j                   S r   r   r6  s    ru   	get_valuezTorchBindObject.get_value  r]  rt   c                    t        | j                  t        j                        r| j                  S | j                  j                  S r   )rx   r   r   ScriptObjectreal_objr6  s    ru   r:  zTorchBindObject.get_real_obj  s0    djj%"4"45::::&&&rt   c                N   | j                         }t        |j                               }t        j                  |      d   }|D cg c]=  }t        |t        j                        r!|j                         |j                         z  ? }}t        j                  d |d      S c c}w )Nr   c                    | |z   S r   rs   )r   ys     ru   r  z/TorchBindObject.get_buf_bytes.<locals>.<lambda>  s
    QU rt   )r:  r{   __obj_flatten__r*  r5  rx   r   r?  r  numelr  r<  )r0  real_script_obj	flat_dict
flat_elemsr   
flat_sizess         ru   get_buf_byteszTorchBindObject.get_buf_bytes  s    ++-88:;	((3A6
  
!U\\* NNqwwy(

 

  2JBB
s   AB"r   r  )r   r	  )r   ztorch.ScriptObjectr  )ro   rp   rq   "torch._library.fake_class_registryr	  rr   r  r  r	  r:  r	  rs   rt   ru   r  r    s*    C
I66'
Crt   r  c                  2    e Zd ZU ded<   ded<   d Zdd	dZy)
r6  r   r   r  r   c                    | j                   S r   r:  r6  s    ru   r  zGeneratorState.get_name  r  rt   Nc                    | j                   S r   r:  r  s     ru   r  z GeneratorState.codegen_reference  r  rt   r   r  )ro   rp   rq   rr   r  r  rs   rt   ru   r6  r6    s    
Irt   r6  c                  V    e Zd ZddZddZdd	dZe	 	 	 	 d
d       Ze	 	 dd       Zy)_CollectiveKernelc                     yr  rs   r6  s    ru   r  z!_CollectiveKernel.should_allocate  r  rt   c                     yrL  rs   r6  s    ru   r  z"_CollectiveKernel.has_side_effects  rK  rt   Nc                H   t        | j                        t        j                  j                  u sJ d       | j                  }|j
                  j                  | _        |j
                  j                  D cg c]  }|j                  s|j                   c}| _
        y c c}w )Nz,Setting cpp kernel needs a valid op_overload)r   r  r   r  r  r  r   r  r  r  r  )r0  r  r  r   s       ru   r  z%_CollectiveKernel.set_cpp_kernel_name  s    D$$%)>)>> 	
:	
> !!%~~22 #NN44.
AFF.
* .
s   6BBc           
     *   t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           |d   j                         } | t        |      ||      }t        j                  |      }|j                  j                  |D cg c]  }t        t        |      ||       c}       |j                  j                  |D cg c]  }|j                          c}       d|v r`|j                  j                  t        t        |      |d   |             |j                  j                  |d   j                                y y # 1 sw Y   GxY wc c}w c c}w )Nr  r   rJ  r   )rV   r   r>  rM  r  r   r  r*  tree_leavesr  r2  rs  rh  r  r  )rS  r  rW  r   r   _example_outputrD  rE  r3  r  
tensor_argr   r  inpsr  rc  s                   ru   create_inplacez _CollectiveKernel.create_inplace,  s    WW 	D #""66CDCFC!	D %E2C1D&EE$% 	!J 	! Q**,f%
 !!&)&&OST^Jf5sFCT	

 	!!T"Bc3<<>"BCF?##**z8&-P %%fUm&<&<&>? 9	D 	D. U #Cs   E>=F;F>Fc           
     J   t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           t        t              rx| j                  ||      } | t        |      ||      }t        |      D cg c](  \  }}t        | j                  |      |t        |fg      * c}}|_        |j                  S  | | j                  |      ||      }|g|_        |S # 1 sw Y   xY wc c}}w )Nr  rJ  )rV   r   r>  rM  r  rx   ry   r  r  r   r  r  rn  )rS  r  rW  r   r   rJ  rD  rE  r3  r  r	  r   r  r   r  s                  ru   create_out_of_placez%_CollectiveKernel.create_out_of_placei  sY    WW 	D #""66CDCFC!	D %F3D2E&FF$% 	!J 	! nd+__[.AF!0F "+>!: Av ((0AYKFN >>!$$^4F %XFNMO	D 	D*s   D,-DDr  r   r  )rW  !Union[TensorBox, list[TensorBox]]r   r   )rW  r	  )	ro   rp   rq   r  r  r  rx  r	  r	  rs   rt   ru   r	  r	    sV    
	
" $@>$@	$@ $@x *>* *rt   r	  c                  8     e Zd Zd Zedd       Zd fdZ xZS )_WaitKernelc                
   | j                   d   }t        |t              r|j                   d   gS t        |t              rC|j                   d   }t        |t              r"|j                  d   \  }}|j                   |   gS g S g S rx  )rW  rx   r	  r  r  )r0  rc  collr   r   s        ru   get_volatile_readsz_WaitKernel.get_volatile_reads  s}    kk!nc,-JJqM?"[) ::a=D$ 12Q3C())I Irt   c                r   t         j                  j                  5  | j                  ||      \  }}}}}d d d        rJ | d|         | t	        |j                               |      }|j                  j                  t        t	        |j                               ||             y # 1 sw Y   zxY w)Nr  rJ  )	rV   r   r>  rM  r  r   r  r  rs  )	rS  r  rc  r	  rD  rE  r3  r  r  s	            ru   create_waitz_WaitKernel.create_wait  s    WW 	0 ""63/!	0 %E2C1D&EE$cnn./
 	&&:S^^-=>VL	
!	0 	0s   B--B6c                    t         |          }| j                         }|D ]>  }|j                  j	                  t        j                  |j                                      @ |S r   )rB  r  r	  r  r  r3   r  r  )r0  r  volatile_readsvrrD  s       ru   r  z_WaitKernel.get_read_writes  sZ    g-/002  	GB!!,"6"6r{{}"EF	Grt   )rc  rb   r   r   r  )ro   rp   rq   r	  rx  r	  r  rz  r{  s   @ru   r	  r	    s&    * 
 
* rt   r	  c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S r   )rx   r*   r   r&   rz   ry   r,   r   r   r  r   r?  )r   rU  r   s      ru   r  r    s    !h%&$Q''	At}	%u||$& 	0A,Q//A	0	Au||	$$Q''|rt   )r   r   r   r   )r   r   r   r  )r   Sequence[int]r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   z&Callable[[Sequence[_T]], Sequence[_V]]r   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   r	  )r   Sequence[Union[int, Integer]]r   r	  r  )r   zLiteral[None]r   rl   r   r   )r   rc   r   rl   r   r  )r   r  r   rl   r   zOptional[torch.Tensor])r   zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])r   z2Union[IRNode, OutputSpec, torch.device, None, str]r   r  )r   z&Union[IRNode, torch.device, None, str]r   rl   )r   zUnion[Buffer, TensorBox]r   r   r   rl   )r  r  r	  r  r
  r  r   rl   )r  Union[TensorBox, BaseView]r  z"Sequence[Union[int, torch.SymInt]]r   r	  )r   zUnion[Expr, Sequence[Expr]]r   r  r   rU   )r  r   r   r  r  rl   r   r  )r   rc   r   rl   )TFNFN)r   rc   r$  rl   r.  rl   r/  'Optional[Sequence[Union[int, Integer]]]r  rl   r  r	  r   ztuple[StorageBox, Layout])r   rc   r/  r	  r   rl   )r   r  r
  r  r   rl   )r   r  r   r   )r+	  ri  r   rl   )r   z	list[Any]r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   r  (  
__future__r   r	  r  r  r=  loggingtextwrapr(  rK  collections.abcr   r   r   r   r   enumr	   r
   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   unittest.mockr   r   r   r   r   torch._export.serde.schema_exportserderz  r  torch._library.utilsru  r  rn  torch._loggingr   torch.fxtorch.utils._pytree_pytreer*  torch._dynamo.utilsr   torch._export.serde.serializer   *torch._higher_order_ops.auto_functionalizer   torch._inductorr   torch._prims_commonr   r    r!   r"   r#   torch._subclasses.fake_tensorr$   %torch.fx.experimental.symbolic_shapesr%   r&   r'   r(   r)   r*   torch.utils._ordered_setr,   torch.utils._sympy.functionsr-   r.   r/   torch.utils._sympy.symbolr0   rM  r2   r3   codegen.commonr4   r5   r6   r7   r8   r9   r:   r;   	loop_bodyr<   ops_handlerr=   r>   r?   r@   runtime.benchmarkingrA   runtime.hintsrB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   virtualizedrT   rU   rV   torch.fx.noderW   codegen.cuda.cuda_templaterX   r   rY   rZ   r   rr   r   __version__r  r  ImportErrorr[   r\   r]   r   r^   r  r_   	getLoggerro   r  rU  r  r{   r   rd   r  rg   r   r   r   r   r   ru  rw  r   r   r   r   r   r   r   r   r  r  r!  rc   r  r.  r  r  r  r  r  r  INNER_FN_TYr  r  r  r  r  r  r   r+  r  r6  r8  r}   r~  r  r  r{  r  r  r  r	  r  r  r   r$  r   rm  r  r   r  r  r  r  r  r2  r  r  r  r  r  r   r  rS  rk  rl   ry   PrimitiveInfoTyper{  r  r  r  r  rU  r  r  r  r  r  r  rs  r  r  r  r  r  r  r  r#  r3  r@  r~   r   r`  rb  r  r  r  r  rb   r1  r'	  r,	  r.	  rK	  rR	  rh	  r   rX  r  r6  r	  r	  r  rs   rt   ru   <module>r	     sL   "         9 9 :  
 
 
 = <   ' ' 2 2 , ,   $ $ ( ? M #  :  0 L L * " 
    N N - :    $ * ) "8$% %L)$''NJ T]T]T]CI&) &C,-) -g!			8??4	8yy~~'T  k	sDk!12K8STU	i 	 d#  $$D44 , ! $  TX	1>P	 TX
	1
>P

 
 N 
 N 
 O 
 O .2&*8!%	>9	>	>;('0     
	.#G&#G/#G  #GLp, p,f UA A AH ~
F ~
 ~
B& 
 
 
@ 
i 
 
F |$y!y!u=)< 8  JN<N<N +<NBF<N<N~ o

 o

 o

d 7AB7S9 7St#1 #L[
+ [
| @
5 @
 @
H 	 	 	 Q5 Q Qh	 !<@=A999 9 :	9
 9 ;9 9x:	 \
v \
 \
~ K K K\ (( ( (V 79( 79 79t (  : U; U Up Ph P Pf % % %POA OAd 6  " K| K K$ S| S S'9	<7 7 HTZ HT HTV& $I7V I7XPf P2T $%{ $%N   .Q* Q*h UxV x xv U&fi & & & 
K 
[ 
& 6  " 
F 
 
 UE4_ E4 E4P
B
_ B
J1> 1h #udCeCeT<Q6R1SST -$ -$`"| "
D=. D=NM M$( (& U>? > >B h9 hV UH< H HV U2l 2 2j
/ 
(*" *"ZV .G.L G.Tll l^&)| &)T-, -`< 6B5 B"F- F,H)l H)V,)| ,)^!T !TH-L -46;< 6;r U  
`*& `*F U
. 
 
< 
  8
, 8
z N N Nb+
 +T% T%n U*v * *L UY.\ Y. Y.x UH , H  H V"
"2" UG) G) G)T,n ,^	6 	 Cl C CD \  B BJ2# 2pyr  NJs   b6 6	cc